i386.md (sse4_1_round<mode>2): Add avx512f alternative.
* config/i386/i386.md (sse4_1_round<mode>2): Add avx512f alternative. * config/i386/sse.md (sse4_1_round<ssescalarmodesuffix>): Likewise. * gcc.target/i386/avx-vround-1.c: New test. * gcc.target/i386/avx-vround-2.c: New test. * gcc.target/i386/avx512vl-vround-1.c: New test. * gcc.target/i386/avx512vl-vround-2.c: New test. From-SVN: r235576
This commit is contained in:
parent
97fceb9fa9
commit
3ddffba914
8 changed files with 272 additions and 14 deletions
|
@ -1,3 +1,8 @@
|
|||
2016-04-28 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* config/i386/i386.md (sse4_1_round<mode>2): Add avx512f alternative.
|
||||
* config/i386/sse.md (sse4_1_round<ssescalarmodesuffix>): Likewise.
|
||||
|
||||
2016-04-28 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
|
||||
|
||||
PR testsuite/70595
|
||||
|
|
|
@ -15497,15 +15497,19 @@
|
|||
|
||||
|
||||
(define_insn "sse4_1_round<mode>2"
|
||||
[(set (match_operand:MODEF 0 "register_operand" "=x")
|
||||
(unspec:MODEF [(match_operand:MODEF 1 "register_operand" "x")
|
||||
(match_operand:SI 2 "const_0_to_15_operand" "n")]
|
||||
[(set (match_operand:MODEF 0 "register_operand" "=x,v")
|
||||
(unspec:MODEF [(match_operand:MODEF 1 "register_operand" "x,v")
|
||||
(match_operand:SI 2 "const_0_to_15_operand" "n,n")]
|
||||
UNSPEC_ROUND))]
|
||||
"TARGET_ROUND"
|
||||
"%vround<ssemodesuffix>\t{%2, %1, %d0|%d0, %1, %2}"
|
||||
"@
|
||||
%vround<ssemodesuffix>\t{%2, %1, %d0|%d0, %1, %2}
|
||||
vrndscale<ssemodesuffix>\t{%2, %1, %d0|%d0, %1, %2}"
|
||||
[(set_attr "type" "ssecvt")
|
||||
(set_attr "prefix_extra" "1")
|
||||
(set_attr "prefix" "maybe_vex")
|
||||
(set_attr "prefix_extra" "1,*")
|
||||
(set_attr "length_immediate" "*,1")
|
||||
(set_attr "prefix" "maybe_vex,evex")
|
||||
(set_attr "isa" "noavx512f,avx512f")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_insn "rintxf2"
|
||||
|
|
|
@ -14867,25 +14867,26 @@
|
|||
})
|
||||
|
||||
(define_insn "sse4_1_round<ssescalarmodesuffix>"
|
||||
[(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x")
|
||||
[(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v")
|
||||
(vec_merge:VF_128
|
||||
(unspec:VF_128
|
||||
[(match_operand:VF_128 2 "register_operand" "Yr,*x,x")
|
||||
(match_operand:SI 3 "const_0_to_15_operand" "n,n,n")]
|
||||
[(match_operand:VF_128 2 "register_operand" "Yr,*x,x,v")
|
||||
(match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")]
|
||||
UNSPEC_ROUND)
|
||||
(match_operand:VF_128 1 "register_operand" "0,0,x")
|
||||
(match_operand:VF_128 1 "register_operand" "0,0,x,v")
|
||||
(const_int 1)))]
|
||||
"TARGET_ROUND"
|
||||
"@
|
||||
round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
|
||||
round<ssescalarmodesuffix>\t{%3, %2, %0|%0, %2, %3}
|
||||
vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
|
||||
[(set_attr "isa" "noavx,noavx,avx")
|
||||
vround<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}
|
||||
vrndscale<ssescalarmodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
|
||||
[(set_attr "isa" "noavx,noavx,avx,avx512f")
|
||||
(set_attr "type" "ssecvt")
|
||||
(set_attr "length_immediate" "1")
|
||||
(set_attr "prefix_data16" "1,1,*")
|
||||
(set_attr "prefix_data16" "1,1,*,*")
|
||||
(set_attr "prefix_extra" "1")
|
||||
(set_attr "prefix" "orig,orig,vex")
|
||||
(set_attr "prefix" "orig,orig,vex,evex")
|
||||
(set_attr "mode" "<MODE>")])
|
||||
|
||||
(define_expand "round<mode>2"
|
||||
|
|
|
@ -1,3 +1,10 @@
|
|||
2016-04-28 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
* gcc.target/i386/avx-vround-1.c: New test.
|
||||
* gcc.target/i386/avx-vround-2.c: New test.
|
||||
* gcc.target/i386/avx512vl-vround-1.c: New test.
|
||||
* gcc.target/i386/avx512vl-vround-2.c: New test.
|
||||
|
||||
2016-04-28 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
|
||||
|
||||
PR testsuite/70595
|
||||
|
|
59
gcc/testsuite/gcc.target/i386/avx-vround-1.c
Normal file
59
gcc/testsuite/gcc.target/i386/avx-vround-1.c
Normal file
|
@ -0,0 +1,59 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-Ofast -mavx -mno-avx2" } */
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
||||
__attribute__((noinline, noclone)) double
|
||||
f1 (double x)
|
||||
{
|
||||
return __builtin_round (x);
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) float
|
||||
f2 (float x)
|
||||
{
|
||||
return __builtin_roundf (x);
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) __m128d
|
||||
f3 (__m128d x, __m128d y)
|
||||
{
|
||||
return _mm_round_sd (x, y, _MM_FROUND_NINT);
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) __m128
|
||||
f4 (__m128 x, __m128 y)
|
||||
{
|
||||
return _mm_round_ss (x, y, _MM_FROUND_NINT);
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) __m128d
|
||||
f5 (__m128d x)
|
||||
{
|
||||
return _mm_round_pd (x, _MM_FROUND_NINT);
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) __m128
|
||||
f6 (__m128 x)
|
||||
{
|
||||
return _mm_round_ps (x, _MM_FROUND_NINT);
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) __m256d
|
||||
f7 (__m256d x)
|
||||
{
|
||||
return _mm256_round_pd (x, _MM_FROUND_NINT);
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) __m256
|
||||
f8 (__m256 x)
|
||||
{
|
||||
return _mm256_round_ps (x, _MM_FROUND_NINT);
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "vroundsd\[^\n\r\]*xmm" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vroundss\[^\n\r\]*xmm" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vroundpd\[^\n\r\]*xmm" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vroundps\[^\n\r\]*xmm" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vroundpd\[^\n\r\]*ymm" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vroundps\[^\n\r\]*ymm" 1 } } */
|
77
gcc/testsuite/gcc.target/i386/avx-vround-2.c
Normal file
77
gcc/testsuite/gcc.target/i386/avx-vround-2.c
Normal file
|
@ -0,0 +1,77 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-require-effective-target avx } */
|
||||
/* { dg-options "-Ofast -mavx -mno-avx2" } */
|
||||
|
||||
#ifndef CHECK_H
|
||||
#define CHECK_H "avx-check.h"
|
||||
#define TEST avx_test
|
||||
#define SRC "avx-vround-1.c"
|
||||
#endif
|
||||
|
||||
#include CHECK_H
|
||||
#include SRC
|
||||
|
||||
static void
|
||||
TEST (void)
|
||||
{
|
||||
union128d a, ae;
|
||||
union128 b, be;
|
||||
union256d c, ce;
|
||||
union256 d, de;
|
||||
if (f1 (0.5) != 1.0 || f1 (1.5) != 2.0 || f1 (-0.5) != -1.0 || f1 (-1.5) != -2.0)
|
||||
abort ();
|
||||
if (f2 (0.5f) != 1.0f || f2 (1.5f) != 2.0f || f2 (-0.5f) != -1.0f || f2 (-1.5f) != -2.0f)
|
||||
abort ();
|
||||
a.x = f3 (_mm_set1_pd (7.0), _mm_set1_pd (0.5));
|
||||
ae.x = _mm_set_pd (7.0, 0.0);
|
||||
if (check_union128d (a, ae.a))
|
||||
abort ();
|
||||
a.x = f3 (_mm_set1_pd (7.0), _mm_set1_pd (1.5));
|
||||
ae.x = _mm_set_pd (7.0, 2.0);
|
||||
if (check_union128d (a, ae.a))
|
||||
abort ();
|
||||
a.x = f3 (_mm_set1_pd (7.0), _mm_set1_pd (-0.5));
|
||||
ae.x = _mm_set_pd (7.0, 0.0);
|
||||
if (check_union128d (a, ae.a))
|
||||
abort ();
|
||||
a.x = f3 (_mm_set1_pd (7.0), _mm_set1_pd (-1.5));
|
||||
ae.x = _mm_set_pd (7.0, -2.0);
|
||||
if (check_union128d (a, ae.a))
|
||||
abort ();
|
||||
b.x = f4 (_mm_set1_ps (7.0f), _mm_set1_ps (0.5f));
|
||||
be.x = _mm_set_ps (7.0f, 7.0f, 7.0f, 0.0f);
|
||||
if (check_union128 (b, be.a))
|
||||
abort ();
|
||||
b.x = f4 (_mm_set1_ps (7.0f), _mm_set1_ps (1.5f));
|
||||
be.x = _mm_set_ps (7.0f, 7.0f, 7.0f, 2.0f);
|
||||
if (check_union128 (b, be.a))
|
||||
abort ();
|
||||
b.x = f4 (_mm_set1_ps (7.0f), _mm_set1_ps (-0.5f));
|
||||
be.x = _mm_set_ps (7.0f, 7.0f, 7.0f, 0.0f);
|
||||
if (check_union128 (b, be.a))
|
||||
abort ();
|
||||
b.x = f4 (_mm_set1_ps (7.0f), _mm_set1_ps (-1.5f));
|
||||
be.x = _mm_set_ps (7.0f, 7.0f, 7.0f, -2.0f);
|
||||
if (check_union128 (b, be.a))
|
||||
abort ();
|
||||
a.x = f5 (_mm_set_pd (0.5, 1.5));
|
||||
ae.x = _mm_set_pd (0.0, 2.0);
|
||||
if (check_union128d (a, ae.a))
|
||||
abort ();
|
||||
a.x = f5 (_mm_set_pd (-0.5, -1.5));
|
||||
ae.x = _mm_set_pd (0.0, -2.0);
|
||||
if (check_union128d (a, ae.a))
|
||||
abort ();
|
||||
b.x = f6 (_mm_set_ps (0.5f, 1.5f, -0.5f, -1.5f));
|
||||
be.x = _mm_set_ps (0.0f, 2.0f, 0.0f, -2.0f);
|
||||
if (check_union128 (b, be.a))
|
||||
abort ();
|
||||
c.x = f7 (_mm256_set_pd (0.5, 1.5, -0.5, -1.5));
|
||||
ce.x = _mm256_set_pd (0.0, 2.0, 0.0, -2.0);
|
||||
if (check_union256d (c, ce.a))
|
||||
abort ();
|
||||
d.x = f8 (_mm256_set_ps (0.5f, 1.5f, -0.5f, -1.5f, 0.25f, 1.0f, -16.5f, 0.75f));
|
||||
de.x = _mm256_set_ps (0.0f, 2.0f, 0.0f, -2.0f, 0.0f, 1.0f, -16.0f, 1.0f);
|
||||
if (check_union256 (d, de.a))
|
||||
abort ();
|
||||
}
|
96
gcc/testsuite/gcc.target/i386/avx512vl-vround-1.c
Normal file
96
gcc/testsuite/gcc.target/i386/avx512vl-vround-1.c
Normal file
|
@ -0,0 +1,96 @@
|
|||
/* { dg-do compile { target { ! ia32 } } } */
|
||||
/* { dg-options "-Ofast -mavx512vl" } */
|
||||
|
||||
#include <x86intrin.h>
|
||||
|
||||
__attribute__((noinline, noclone)) double
|
||||
f1 (double x)
|
||||
{
|
||||
register double a __asm__ ("xmm16") = __builtin_round (x);
|
||||
__asm__ ("" : "+v" (a));
|
||||
return a;
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) float
|
||||
f2 (float x)
|
||||
{
|
||||
register float a __asm__ ("xmm16") = __builtin_roundf (x);
|
||||
__asm__ ("" : "+v" (a));
|
||||
return a;
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) __m128d
|
||||
f3 (__m128d x, __m128d y)
|
||||
{
|
||||
register __m128d a __asm__ ("xmm16") = x, b __asm__ ("xmm17") = y;
|
||||
__asm__ ("" : "+v" (a), "+v" (b));
|
||||
a = _mm_round_sd (a, b, _MM_FROUND_NINT);
|
||||
__asm__ ("" : "+v" (a));
|
||||
return a;
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) __m128
|
||||
f4 (__m128 x, __m128 y)
|
||||
{
|
||||
register __m128 a __asm__ ("xmm16") = x, b __asm__ ("xmm17") = y;
|
||||
__asm__ ("" : "+v" (a), "+v" (b));
|
||||
a = _mm_round_ss (a, b, _MM_FROUND_NINT);
|
||||
__asm__ ("" : "+v" (a));
|
||||
return a;
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) __m128d
|
||||
f5 (__m128d x)
|
||||
{
|
||||
register __m128d a __asm__ ("xmm16") = x;
|
||||
__asm__ ("" : "+v" (a));
|
||||
a = _mm_round_pd (a, _MM_FROUND_NINT);
|
||||
__asm__ ("" : "+v" (a));
|
||||
return a;
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) __m128
|
||||
f6 (__m128 x)
|
||||
{
|
||||
register __m128 a __asm__ ("xmm16") = x;
|
||||
__asm__ ("" : "+v" (a));
|
||||
a = _mm_round_ps (a, _MM_FROUND_NINT);
|
||||
__asm__ ("" : "+v" (a));
|
||||
return a;
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) __m256d
|
||||
f7 (__m256d x)
|
||||
{
|
||||
register __m256d a __asm__ ("xmm16") = x;
|
||||
__asm__ ("" : "+v" (a));
|
||||
a = _mm256_round_pd (a, _MM_FROUND_NINT);
|
||||
__asm__ ("" : "+v" (a));
|
||||
return a;
|
||||
}
|
||||
|
||||
__attribute__((noinline, noclone)) __m256
|
||||
f8 (__m256 x)
|
||||
{
|
||||
register __m256 a __asm__ ("xmm16") = x;
|
||||
__asm__ ("" : "+v" (a));
|
||||
a = _mm256_round_ps (a, _MM_FROUND_NINT);
|
||||
__asm__ ("" : "+v" (a));
|
||||
return a;
|
||||
}
|
||||
|
||||
/* Instead of vround{sd,ss,pd,ps} this should use vrndscale{sd,ss,pd,ps}
|
||||
counterparts, so that [xy]mm1[67] can be referenced directly in the
|
||||
instructions. */
|
||||
/* { dg-final { scan-assembler-times "vrndscalesd\[^\n\r\]*xmm" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vrndscaless\[^\n\r\]*xmm" 2 } } */
|
||||
/* { dg-final { scan-assembler-times "vrndscalepd\[^\n\r\]*xmm" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrndscaleps\[^\n\r\]*xmm" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrndscalepd\[^\n\r\]*ymm" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "vrndscaleps\[^\n\r\]*ymm" 1 } } */
|
||||
/* { dg-final { scan-assembler-not "vroundsd\[^\n\r\]*xmm" } } */
|
||||
/* { dg-final { scan-assembler-not "vroundss\[^\n\r\]*xmm" } } */
|
||||
/* { dg-final { scan-assembler-not "vroundpd\[^\n\r\]*xmm" } } */
|
||||
/* { dg-final { scan-assembler-not "vroundps\[^\n\r\]*xmm" } } */
|
||||
/* { dg-final { scan-assembler-not "vroundpd\[^\n\r\]*ymm" } } */
|
||||
/* { dg-final { scan-assembler-not "vroundps\[^\n\r\]*ymm" } } */
|
9
gcc/testsuite/gcc.target/i386/avx512vl-vround-2.c
Normal file
9
gcc/testsuite/gcc.target/i386/avx512vl-vround-2.c
Normal file
|
@ -0,0 +1,9 @@
|
|||
/* { dg-do run { target { ! ia32 } } } */
|
||||
/* { dg-require-effective-target avx512vl } */
|
||||
/* { dg-options "-Ofast -mavx512vl" } */
|
||||
|
||||
#define CHECK_H "avx512vl-check.h"
|
||||
#define TEST avx512vl_test
|
||||
#define SRC "avx512vl-vround-1.c"
|
||||
|
||||
#include "avx-vround-2.c"
|
Loading…
Add table
Reference in a new issue