From 3ddffba914b25228f477c726c564ee18078e0cb9 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Thu, 28 Apr 2016 17:57:16 +0200 Subject: [PATCH] i386.md (sse4_1_round2): Add avx512f alternative. * config/i386/i386.md (sse4_1_round2): Add avx512f alternative. * config/i386/sse.md (sse4_1_round): Likewise. * gcc.target/i386/avx-vround-1.c: New test. * gcc.target/i386/avx-vround-2.c: New test. * gcc.target/i386/avx512vl-vround-1.c: New test. * gcc.target/i386/avx512vl-vround-2.c: New test. From-SVN: r235576 --- gcc/ChangeLog | 5 + gcc/config/i386/i386.md | 16 ++-- gcc/config/i386/sse.md | 17 ++-- gcc/testsuite/ChangeLog | 7 ++ gcc/testsuite/gcc.target/i386/avx-vround-1.c | 59 ++++++++++++ gcc/testsuite/gcc.target/i386/avx-vround-2.c | 77 +++++++++++++++ .../gcc.target/i386/avx512vl-vround-1.c | 96 +++++++++++++++++++ .../gcc.target/i386/avx512vl-vround-2.c | 9 ++ 8 files changed, 272 insertions(+), 14 deletions(-) create mode 100644 gcc/testsuite/gcc.target/i386/avx-vround-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx-vround-2.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-vround-1.c create mode 100644 gcc/testsuite/gcc.target/i386/avx512vl-vround-2.c diff --git a/gcc/ChangeLog b/gcc/ChangeLog index c129db94825..3f99dbdad61 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,8 @@ +2016-04-28 Jakub Jelinek + + * config/i386/i386.md (sse4_1_round2): Add avx512f alternative. + * config/i386/sse.md (sse4_1_round): Likewise. + 2016-04-28 Rainer Orth PR testsuite/70595 diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index b19bc93505e..6befec76c0d 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -15497,15 +15497,19 @@ (define_insn "sse4_1_round2" - [(set (match_operand:MODEF 0 "register_operand" "=x") - (unspec:MODEF [(match_operand:MODEF 1 "register_operand" "x") - (match_operand:SI 2 "const_0_to_15_operand" "n")] + [(set (match_operand:MODEF 0 "register_operand" "=x,v") + (unspec:MODEF [(match_operand:MODEF 1 "register_operand" "x,v") + (match_operand:SI 2 "const_0_to_15_operand" "n,n")] UNSPEC_ROUND))] "TARGET_ROUND" - "%vround\t{%2, %1, %d0|%d0, %1, %2}" + "@ + %vround\t{%2, %1, %d0|%d0, %1, %2} + vrndscale\t{%2, %1, %d0|%d0, %1, %2}" [(set_attr "type" "ssecvt") - (set_attr "prefix_extra" "1") - (set_attr "prefix" "maybe_vex") + (set_attr "prefix_extra" "1,*") + (set_attr "length_immediate" "*,1") + (set_attr "prefix" "maybe_vex,evex") + (set_attr "isa" "noavx512f,avx512f") (set_attr "mode" "")]) (define_insn "rintxf2" diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index 45442bd7fec..d4cdc42fd7c 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -14867,25 +14867,26 @@ }) (define_insn "sse4_1_round" - [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x") + [(set (match_operand:VF_128 0 "register_operand" "=Yr,*x,x,v") (vec_merge:VF_128 (unspec:VF_128 - [(match_operand:VF_128 2 "register_operand" "Yr,*x,x") - (match_operand:SI 3 "const_0_to_15_operand" "n,n,n")] + [(match_operand:VF_128 2 "register_operand" "Yr,*x,x,v") + (match_operand:SI 3 "const_0_to_15_operand" "n,n,n,n")] UNSPEC_ROUND) - (match_operand:VF_128 1 "register_operand" "0,0,x") + (match_operand:VF_128 1 "register_operand" "0,0,x,v") (const_int 1)))] "TARGET_ROUND" "@ round\t{%3, %2, %0|%0, %2, %3} round\t{%3, %2, %0|%0, %2, %3} - vround\t{%3, %2, %1, %0|%0, %1, %2, %3}" - [(set_attr "isa" "noavx,noavx,avx") + vround\t{%3, %2, %1, %0|%0, %1, %2, %3} + vrndscale\t{%3, %2, %1, %0|%0, %1, %2, %3}" + [(set_attr "isa" "noavx,noavx,avx,avx512f") (set_attr "type" "ssecvt") (set_attr "length_immediate" "1") - (set_attr "prefix_data16" "1,1,*") + (set_attr "prefix_data16" "1,1,*,*") (set_attr "prefix_extra" "1") - (set_attr "prefix" "orig,orig,vex") + (set_attr "prefix" "orig,orig,vex,evex") (set_attr "mode" "")]) (define_expand "round2" diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index ff7051bd97f..6aef47cc753 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,10 @@ +2016-04-28 Jakub Jelinek + + * gcc.target/i386/avx-vround-1.c: New test. + * gcc.target/i386/avx-vround-2.c: New test. + * gcc.target/i386/avx512vl-vround-1.c: New test. + * gcc.target/i386/avx512vl-vround-2.c: New test. + 2016-04-28 Rainer Orth PR testsuite/70595 diff --git a/gcc/testsuite/gcc.target/i386/avx-vround-1.c b/gcc/testsuite/gcc.target/i386/avx-vround-1.c new file mode 100644 index 00000000000..b760607eacd --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vround-1.c @@ -0,0 +1,59 @@ +/* { dg-do compile } */ +/* { dg-options "-Ofast -mavx -mno-avx2" } */ + +#include + +__attribute__((noinline, noclone)) double +f1 (double x) +{ + return __builtin_round (x); +} + +__attribute__((noinline, noclone)) float +f2 (float x) +{ + return __builtin_roundf (x); +} + +__attribute__((noinline, noclone)) __m128d +f3 (__m128d x, __m128d y) +{ + return _mm_round_sd (x, y, _MM_FROUND_NINT); +} + +__attribute__((noinline, noclone)) __m128 +f4 (__m128 x, __m128 y) +{ + return _mm_round_ss (x, y, _MM_FROUND_NINT); +} + +__attribute__((noinline, noclone)) __m128d +f5 (__m128d x) +{ + return _mm_round_pd (x, _MM_FROUND_NINT); +} + +__attribute__((noinline, noclone)) __m128 +f6 (__m128 x) +{ + return _mm_round_ps (x, _MM_FROUND_NINT); +} + +__attribute__((noinline, noclone)) __m256d +f7 (__m256d x) +{ + return _mm256_round_pd (x, _MM_FROUND_NINT); +} + +__attribute__((noinline, noclone)) __m256 +f8 (__m256 x) +{ + return _mm256_round_ps (x, _MM_FROUND_NINT); +} + +/* { dg-final { scan-assembler-times "vroundsd\[^\n\r\]*xmm" 2 } } */ +/* { dg-final { scan-assembler-times "vroundss\[^\n\r\]*xmm" 2 } } */ +/* { dg-final { scan-assembler-times "vroundpd\[^\n\r\]*xmm" 1 } } */ +/* { dg-final { scan-assembler-times "vroundps\[^\n\r\]*xmm" 1 } } */ +/* { dg-final { scan-assembler-times "vroundpd\[^\n\r\]*ymm" 1 } } */ +/* { dg-final { scan-assembler-times "vroundps\[^\n\r\]*ymm" 1 } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx-vround-2.c b/gcc/testsuite/gcc.target/i386/avx-vround-2.c new file mode 100644 index 00000000000..46b5f06f308 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx-vround-2.c @@ -0,0 +1,77 @@ +/* { dg-do run } */ +/* { dg-require-effective-target avx } */ +/* { dg-options "-Ofast -mavx -mno-avx2" } */ + +#ifndef CHECK_H +#define CHECK_H "avx-check.h" +#define TEST avx_test +#define SRC "avx-vround-1.c" +#endif + +#include CHECK_H +#include SRC + +static void +TEST (void) +{ + union128d a, ae; + union128 b, be; + union256d c, ce; + union256 d, de; + if (f1 (0.5) != 1.0 || f1 (1.5) != 2.0 || f1 (-0.5) != -1.0 || f1 (-1.5) != -2.0) + abort (); + if (f2 (0.5f) != 1.0f || f2 (1.5f) != 2.0f || f2 (-0.5f) != -1.0f || f2 (-1.5f) != -2.0f) + abort (); + a.x = f3 (_mm_set1_pd (7.0), _mm_set1_pd (0.5)); + ae.x = _mm_set_pd (7.0, 0.0); + if (check_union128d (a, ae.a)) + abort (); + a.x = f3 (_mm_set1_pd (7.0), _mm_set1_pd (1.5)); + ae.x = _mm_set_pd (7.0, 2.0); + if (check_union128d (a, ae.a)) + abort (); + a.x = f3 (_mm_set1_pd (7.0), _mm_set1_pd (-0.5)); + ae.x = _mm_set_pd (7.0, 0.0); + if (check_union128d (a, ae.a)) + abort (); + a.x = f3 (_mm_set1_pd (7.0), _mm_set1_pd (-1.5)); + ae.x = _mm_set_pd (7.0, -2.0); + if (check_union128d (a, ae.a)) + abort (); + b.x = f4 (_mm_set1_ps (7.0f), _mm_set1_ps (0.5f)); + be.x = _mm_set_ps (7.0f, 7.0f, 7.0f, 0.0f); + if (check_union128 (b, be.a)) + abort (); + b.x = f4 (_mm_set1_ps (7.0f), _mm_set1_ps (1.5f)); + be.x = _mm_set_ps (7.0f, 7.0f, 7.0f, 2.0f); + if (check_union128 (b, be.a)) + abort (); + b.x = f4 (_mm_set1_ps (7.0f), _mm_set1_ps (-0.5f)); + be.x = _mm_set_ps (7.0f, 7.0f, 7.0f, 0.0f); + if (check_union128 (b, be.a)) + abort (); + b.x = f4 (_mm_set1_ps (7.0f), _mm_set1_ps (-1.5f)); + be.x = _mm_set_ps (7.0f, 7.0f, 7.0f, -2.0f); + if (check_union128 (b, be.a)) + abort (); + a.x = f5 (_mm_set_pd (0.5, 1.5)); + ae.x = _mm_set_pd (0.0, 2.0); + if (check_union128d (a, ae.a)) + abort (); + a.x = f5 (_mm_set_pd (-0.5, -1.5)); + ae.x = _mm_set_pd (0.0, -2.0); + if (check_union128d (a, ae.a)) + abort (); + b.x = f6 (_mm_set_ps (0.5f, 1.5f, -0.5f, -1.5f)); + be.x = _mm_set_ps (0.0f, 2.0f, 0.0f, -2.0f); + if (check_union128 (b, be.a)) + abort (); + c.x = f7 (_mm256_set_pd (0.5, 1.5, -0.5, -1.5)); + ce.x = _mm256_set_pd (0.0, 2.0, 0.0, -2.0); + if (check_union256d (c, ce.a)) + abort (); + d.x = f8 (_mm256_set_ps (0.5f, 1.5f, -0.5f, -1.5f, 0.25f, 1.0f, -16.5f, 0.75f)); + de.x = _mm256_set_ps (0.0f, 2.0f, 0.0f, -2.0f, 0.0f, 1.0f, -16.0f, 1.0f); + if (check_union256 (d, de.a)) + abort (); +} diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vround-1.c b/gcc/testsuite/gcc.target/i386/avx512vl-vround-1.c new file mode 100644 index 00000000000..55ea792fef7 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vround-1.c @@ -0,0 +1,96 @@ +/* { dg-do compile { target { ! ia32 } } } */ +/* { dg-options "-Ofast -mavx512vl" } */ + +#include + +__attribute__((noinline, noclone)) double +f1 (double x) +{ + register double a __asm__ ("xmm16") = __builtin_round (x); + __asm__ ("" : "+v" (a)); + return a; +} + +__attribute__((noinline, noclone)) float +f2 (float x) +{ + register float a __asm__ ("xmm16") = __builtin_roundf (x); + __asm__ ("" : "+v" (a)); + return a; +} + +__attribute__((noinline, noclone)) __m128d +f3 (__m128d x, __m128d y) +{ + register __m128d a __asm__ ("xmm16") = x, b __asm__ ("xmm17") = y; + __asm__ ("" : "+v" (a), "+v" (b)); + a = _mm_round_sd (a, b, _MM_FROUND_NINT); + __asm__ ("" : "+v" (a)); + return a; +} + +__attribute__((noinline, noclone)) __m128 +f4 (__m128 x, __m128 y) +{ + register __m128 a __asm__ ("xmm16") = x, b __asm__ ("xmm17") = y; + __asm__ ("" : "+v" (a), "+v" (b)); + a = _mm_round_ss (a, b, _MM_FROUND_NINT); + __asm__ ("" : "+v" (a)); + return a; +} + +__attribute__((noinline, noclone)) __m128d +f5 (__m128d x) +{ + register __m128d a __asm__ ("xmm16") = x; + __asm__ ("" : "+v" (a)); + a = _mm_round_pd (a, _MM_FROUND_NINT); + __asm__ ("" : "+v" (a)); + return a; +} + +__attribute__((noinline, noclone)) __m128 +f6 (__m128 x) +{ + register __m128 a __asm__ ("xmm16") = x; + __asm__ ("" : "+v" (a)); + a = _mm_round_ps (a, _MM_FROUND_NINT); + __asm__ ("" : "+v" (a)); + return a; +} + +__attribute__((noinline, noclone)) __m256d +f7 (__m256d x) +{ + register __m256d a __asm__ ("xmm16") = x; + __asm__ ("" : "+v" (a)); + a = _mm256_round_pd (a, _MM_FROUND_NINT); + __asm__ ("" : "+v" (a)); + return a; +} + +__attribute__((noinline, noclone)) __m256 +f8 (__m256 x) +{ + register __m256 a __asm__ ("xmm16") = x; + __asm__ ("" : "+v" (a)); + a = _mm256_round_ps (a, _MM_FROUND_NINT); + __asm__ ("" : "+v" (a)); + return a; +} + +/* Instead of vround{sd,ss,pd,ps} this should use vrndscale{sd,ss,pd,ps} + counterparts, so that [xy]mm1[67] can be referenced directly in the + instructions. */ +/* { dg-final { scan-assembler-times "vrndscalesd\[^\n\r\]*xmm" 2 } } */ +/* { dg-final { scan-assembler-times "vrndscaless\[^\n\r\]*xmm" 2 } } */ +/* { dg-final { scan-assembler-times "vrndscalepd\[^\n\r\]*xmm" 1 } } */ +/* { dg-final { scan-assembler-times "vrndscaleps\[^\n\r\]*xmm" 1 } } */ +/* { dg-final { scan-assembler-times "vrndscalepd\[^\n\r\]*ymm" 1 } } */ +/* { dg-final { scan-assembler-times "vrndscaleps\[^\n\r\]*ymm" 1 } } */ +/* { dg-final { scan-assembler-not "vroundsd\[^\n\r\]*xmm" } } */ +/* { dg-final { scan-assembler-not "vroundss\[^\n\r\]*xmm" } } */ +/* { dg-final { scan-assembler-not "vroundpd\[^\n\r\]*xmm" } } */ +/* { dg-final { scan-assembler-not "vroundps\[^\n\r\]*xmm" } } */ +/* { dg-final { scan-assembler-not "vroundpd\[^\n\r\]*ymm" } } */ +/* { dg-final { scan-assembler-not "vroundps\[^\n\r\]*ymm" } } */ diff --git a/gcc/testsuite/gcc.target/i386/avx512vl-vround-2.c b/gcc/testsuite/gcc.target/i386/avx512vl-vround-2.c new file mode 100644 index 00000000000..c4607dad053 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/avx512vl-vround-2.c @@ -0,0 +1,9 @@ +/* { dg-do run { target { ! ia32 } } } */ +/* { dg-require-effective-target avx512vl } */ +/* { dg-options "-Ofast -mavx512vl" } */ + +#define CHECK_H "avx512vl-check.h" +#define TEST avx512vl_test +#define SRC "avx512vl-vround-1.c" + +#include "avx-vround-2.c"