re PR target/85572 (faster code for absolute value of __v2di)
PR target/85572 * config/i386/i386.c (ix86_expand_sse2_abs): Handle E_V2DImode and E_V4DImode. * config/i386/sse.md (abs<mode>2): Use VI_AVX2 iterator instead of VI1248_AVX512VL_AVX512BW. Handle V2DImode and V4DImode if not TARGET_AVX512VL using ix86_expand_sse2_abs. Formatting fixes. * g++.dg/other/sse2-pr85572-1.C: New test. * g++.dg/other/sse2-pr85572-2.C: New test. * g++.dg/other/sse4-pr85572-1.C: New test. * g++.dg/other/avx2-pr85572-1.C: New test. From-SVN: r260041
This commit is contained in:
parent
ac68185918
commit
4d4015db89
8 changed files with 165 additions and 25 deletions
|
@ -1,5 +1,12 @@
|
|||
2018-05-08 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR target/85572
|
||||
* config/i386/i386.c (ix86_expand_sse2_abs): Handle E_V2DImode and
|
||||
E_V4DImode.
|
||||
* config/i386/sse.md (abs<mode>2): Use VI_AVX2 iterator instead of
|
||||
VI1248_AVX512VL_AVX512BW. Handle V2DImode and V4DImode if not
|
||||
TARGET_AVX512VL using ix86_expand_sse2_abs. Formatting fixes.
|
||||
|
||||
PR target/85317
|
||||
* config/i386/i386.c (ix86_fold_builtin): Handle
|
||||
IX86_BUILTIN_{,P}MOVMSK{PS,PD,B}{,128,256}.
|
||||
|
|
|
@ -49837,39 +49837,70 @@ ix86_expand_sse2_abs (rtx target, rtx input)
|
|||
|
||||
switch (mode)
|
||||
{
|
||||
case E_V2DImode:
|
||||
case E_V4DImode:
|
||||
/* For 64-bit signed integer X, with SSE4.2 use
|
||||
pxor t0, t0; pcmpgtq X, t0; pxor t0, X; psubq t0, X.
|
||||
Otherwise handle it similarly to V4SImode, except use 64 as W instead of
|
||||
32 and use logical instead of arithmetic right shift (which is
|
||||
unimplemented) and subtract. */
|
||||
if (TARGET_SSE4_2)
|
||||
{
|
||||
tmp0 = gen_reg_rtx (mode);
|
||||
tmp1 = gen_reg_rtx (mode);
|
||||
emit_move_insn (tmp1, CONST0_RTX (mode));
|
||||
if (mode == E_V2DImode)
|
||||
emit_insn (gen_sse4_2_gtv2di3 (tmp0, tmp1, input));
|
||||
else
|
||||
emit_insn (gen_avx2_gtv4di3 (tmp0, tmp1, input));
|
||||
}
|
||||
else
|
||||
{
|
||||
tmp0 = expand_simple_binop (mode, LSHIFTRT, input,
|
||||
GEN_INT (GET_MODE_UNIT_BITSIZE (mode)
|
||||
- 1), NULL, 0, OPTAB_DIRECT);
|
||||
tmp0 = expand_simple_unop (mode, NEG, tmp0, NULL, false);
|
||||
}
|
||||
|
||||
tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
|
||||
NULL, 0, OPTAB_DIRECT);
|
||||
x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
|
||||
target, 0, OPTAB_DIRECT);
|
||||
break;
|
||||
|
||||
case E_V4SImode:
|
||||
/* For 32-bit signed integer X, the best way to calculate the absolute
|
||||
value of X is (((signed) X >> (W-1)) ^ X) - ((signed) X >> (W-1)). */
|
||||
case E_V4SImode:
|
||||
tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
|
||||
GEN_INT (GET_MODE_UNIT_BITSIZE (mode) - 1),
|
||||
NULL, 0, OPTAB_DIRECT);
|
||||
tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
|
||||
NULL, 0, OPTAB_DIRECT);
|
||||
x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
|
||||
target, 0, OPTAB_DIRECT);
|
||||
break;
|
||||
tmp0 = expand_simple_binop (mode, ASHIFTRT, input,
|
||||
GEN_INT (GET_MODE_UNIT_BITSIZE (mode) - 1),
|
||||
NULL, 0, OPTAB_DIRECT);
|
||||
tmp1 = expand_simple_binop (mode, XOR, tmp0, input,
|
||||
NULL, 0, OPTAB_DIRECT);
|
||||
x = expand_simple_binop (mode, MINUS, tmp1, tmp0,
|
||||
target, 0, OPTAB_DIRECT);
|
||||
break;
|
||||
|
||||
case E_V8HImode:
|
||||
/* For 16-bit signed integer X, the best way to calculate the absolute
|
||||
value of X is max (X, -X), as SSE2 provides the PMAXSW insn. */
|
||||
case E_V8HImode:
|
||||
tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
|
||||
tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
|
||||
|
||||
x = expand_simple_binop (mode, SMAX, tmp0, input,
|
||||
target, 0, OPTAB_DIRECT);
|
||||
break;
|
||||
x = expand_simple_binop (mode, SMAX, tmp0, input,
|
||||
target, 0, OPTAB_DIRECT);
|
||||
break;
|
||||
|
||||
case E_V16QImode:
|
||||
/* For 8-bit signed integer X, the best way to calculate the absolute
|
||||
value of X is min ((unsigned char) X, (unsigned char) (-X)),
|
||||
as SSE2 provides the PMINUB insn. */
|
||||
case E_V16QImode:
|
||||
tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
|
||||
tmp0 = expand_unop (mode, neg_optab, input, NULL_RTX, 0);
|
||||
|
||||
x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
|
||||
target, 0, OPTAB_DIRECT);
|
||||
break;
|
||||
x = expand_simple_binop (V16QImode, UMIN, tmp0, input,
|
||||
target, 0, OPTAB_DIRECT);
|
||||
break;
|
||||
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
if (x != target)
|
||||
|
|
|
@ -15211,12 +15211,14 @@
|
|||
(set_attr "mode" "<sseinsnmode>")])
|
||||
|
||||
(define_expand "abs<mode>2"
|
||||
[(set (match_operand:VI1248_AVX512VL_AVX512BW 0 "register_operand")
|
||||
(abs:VI1248_AVX512VL_AVX512BW
|
||||
(match_operand:VI1248_AVX512VL_AVX512BW 1 "vector_operand")))]
|
||||
[(set (match_operand:VI_AVX2 0 "register_operand")
|
||||
(abs:VI_AVX2
|
||||
(match_operand:VI_AVX2 1 "vector_operand")))]
|
||||
"TARGET_SSE2"
|
||||
{
|
||||
if (!TARGET_SSSE3)
|
||||
if (!TARGET_SSSE3
|
||||
|| ((<MODE>mode == V2DImode || <MODE>mode == V4DImode)
|
||||
&& !TARGET_AVX512VL))
|
||||
{
|
||||
ix86_expand_sse2_abs (operands[0], operands[1]);
|
||||
DONE;
|
||||
|
|
|
@ -1,5 +1,11 @@
|
|||
2018-05-08 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR target/85572
|
||||
* g++.dg/other/sse2-pr85572-1.C: New test.
|
||||
* g++.dg/other/sse2-pr85572-2.C: New test.
|
||||
* g++.dg/other/sse4-pr85572-1.C: New test.
|
||||
* g++.dg/other/avx2-pr85572-1.C: New test.
|
||||
|
||||
PR target/85317
|
||||
* gcc.target/i386/pr85317.c: New test.
|
||||
* gcc.target/i386/avx2-vpmovmskb-2.c (avx2_test): Add asm volatile
|
||||
|
|
21
gcc/testsuite/g++.dg/other/avx2-pr85572-1.C
Normal file
21
gcc/testsuite/g++.dg/other/avx2-pr85572-1.C
Normal file
|
@ -0,0 +1,21 @@
|
|||
// PR target/85572
|
||||
// { dg-do compile { target i?86-*-* x86_64-*-* } }
|
||||
// { dg-options "-O2 -mavx2 -mno-avx512f" }
|
||||
// { dg-final { scan-assembler-times {\mvpxor\M} 4 } }
|
||||
// { dg-final { scan-assembler-times {\mvpcmpgtq\M} 2 } }
|
||||
// { dg-final { scan-assembler-times {\mvpsubq\M} 2 } }
|
||||
|
||||
typedef long long V __attribute__((vector_size (16)));
|
||||
typedef long long W __attribute__((vector_size (32)));
|
||||
|
||||
V
|
||||
foo (V x)
|
||||
{
|
||||
return x < 0 ? -x : x;
|
||||
}
|
||||
|
||||
W
|
||||
bar (W x)
|
||||
{
|
||||
return x < 0 ? -x : x;
|
||||
}
|
14
gcc/testsuite/g++.dg/other/sse2-pr85572-1.C
Normal file
14
gcc/testsuite/g++.dg/other/sse2-pr85572-1.C
Normal file
|
@ -0,0 +1,14 @@
|
|||
// PR target/85572
|
||||
// { dg-do compile { target i?86-*-* x86_64-*-* } }
|
||||
// { dg-options "-O2 -msse2 -mno-sse3" }
|
||||
// { dg-final { scan-assembler-times {\mpxor\M} 2 } }
|
||||
// { dg-final { scan-assembler-times {\mpsubq\M} 2 } }
|
||||
// { dg-final { scan-assembler-times {\mpsrlq\M} 1 } }
|
||||
|
||||
typedef long long V __attribute__((vector_size (16)));
|
||||
|
||||
V
|
||||
foo (V x)
|
||||
{
|
||||
return x < 0 ? -x : x;
|
||||
}
|
45
gcc/testsuite/g++.dg/other/sse2-pr85572-2.C
Normal file
45
gcc/testsuite/g++.dg/other/sse2-pr85572-2.C
Normal file
|
@ -0,0 +1,45 @@
|
|||
// PR target/85572
|
||||
// { dg-do run { target i?86-*-* x86_64-*-* } }
|
||||
// { dg-options "-O2 -msse2" }
|
||||
// { dg-require-effective-target sse2_runtime }
|
||||
|
||||
typedef long long V __attribute__((vector_size (16)));
|
||||
typedef long long W __attribute__((vector_size (32)));
|
||||
|
||||
__attribute__((noipa)) V
|
||||
foo (V x)
|
||||
{
|
||||
return x < 0 ? -x : x;
|
||||
}
|
||||
|
||||
__attribute__((noipa)) void
|
||||
bar (W *x, W *y)
|
||||
{
|
||||
*y = *x < 0 ? -*x : *x;
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
V a = { 11LL, -15LL };
|
||||
V b = foo (a);
|
||||
if (b[0] != 11LL || b[1] != 15LL)
|
||||
__builtin_abort ();
|
||||
V c = { -123456789123456LL, 654321654321654LL };
|
||||
V d = foo (c);
|
||||
if (d[0] != 123456789123456LL || d[1] != 654321654321654LL)
|
||||
__builtin_abort ();
|
||||
V e = { 0, 1 };
|
||||
V f = foo (e);
|
||||
if (f[0] != 0 || f[1] != 1)
|
||||
__builtin_abort ();
|
||||
W g = { 17LL, -32LL, -123456789123456LL, 654321654321654LL }, h;
|
||||
bar (&g, &h);
|
||||
if (h[0] != 17LL || h[1] != 32LL
|
||||
|| h[2] != 123456789123456LL || h[3] != 654321654321654LL)
|
||||
__builtin_abort ();
|
||||
W i = { 0, 1, -1, 0 }, j;
|
||||
bar (&i, &j);
|
||||
if (j[0] != 0 || j[1] != 1 || j[2] != 1 || j[3] != 0)
|
||||
__builtin_abort ();
|
||||
}
|
14
gcc/testsuite/g++.dg/other/sse4-pr85572-1.C
Normal file
14
gcc/testsuite/g++.dg/other/sse4-pr85572-1.C
Normal file
|
@ -0,0 +1,14 @@
|
|||
// PR target/85572
|
||||
// { dg-do compile { target i?86-*-* x86_64-*-* } }
|
||||
// { dg-options "-O2 -msse4 -mno-avx" }
|
||||
// { dg-final { scan-assembler-times {\mpxor\M} 2 } }
|
||||
// { dg-final { scan-assembler-times {\mpcmpgtq\M} 1 } }
|
||||
// { dg-final { scan-assembler-times {\mpsubq\M} 1 } }
|
||||
|
||||
typedef long long V __attribute__((vector_size (16)));
|
||||
|
||||
V
|
||||
foo (V x)
|
||||
{
|
||||
return x < 0 ? -x : x;
|
||||
}
|
Loading…
Add table
Reference in a new issue