aarch64: Fix bogus cnot optimisation [PR114603]
aarch64-sve.md had a pattern that combined: cmpeq pb.T, pa/z, zc.T, #0 mov zd.T, pb/z, #1 into: cnot zd.T, pa/m, zc.T But this is only valid if pa.T is a ptrue. In other cases, the original would set inactive elements of zd.T to 0, whereas the combined form would copy elements from zc.T. gcc/ PR target/114603 * config/aarch64/aarch64-sve.md (@aarch64_pred_cnot<mode>): Replace with... (@aarch64_ptrue_cnot<mode>): ...this, requiring operand 1 to be a ptrue. (*cnot<mode>): Require operand 1 to be a ptrue. * config/aarch64/aarch64-sve-builtins-base.cc (svcnot_impl::expand): Use aarch64_ptrue_cnot<mode> for _x operations that are predicated with a ptrue. Represent other _x operations as fully-defined _m operations. gcc/testsuite/ PR target/114603 * gcc.target/aarch64/sve/acle/general/cnot_1.c: New test.
This commit is contained in:
parent
e4d074321b
commit
67cbb1c638
3 changed files with 49 additions and 19 deletions
|
@ -517,15 +517,22 @@ public:
|
|||
expand (function_expander &e) const override
|
||||
{
|
||||
machine_mode mode = e.vector_mode (0);
|
||||
if (e.pred == PRED_x)
|
||||
{
|
||||
/* The pattern for CNOT includes an UNSPEC_PRED_Z, so needs
|
||||
a ptrue hint. */
|
||||
e.add_ptrue_hint (0, e.gp_mode (0));
|
||||
return e.use_pred_x_insn (code_for_aarch64_pred_cnot (mode));
|
||||
}
|
||||
machine_mode pred_mode = e.gp_mode (0);
|
||||
/* The underlying _x pattern is effectively:
|
||||
|
||||
return e.use_cond_insn (code_for_cond_cnot (mode), 0);
|
||||
dst = src == 0 ? 1 : 0
|
||||
|
||||
rather than an UNSPEC_PRED_X. Using this form allows autovec
|
||||
constructs to be matched by combine, but it means that the
|
||||
predicate on the src == 0 comparison must be all-true.
|
||||
|
||||
For simplicity, represent other _x operations as fully-defined _m
|
||||
operations rather than using a separate bespoke pattern. */
|
||||
if (e.pred == PRED_x
|
||||
&& gen_lowpart (pred_mode, e.args[0]) == CONSTM1_RTX (pred_mode))
|
||||
return e.use_pred_x_insn (code_for_aarch64_ptrue_cnot (mode));
|
||||
return e.use_cond_insn (code_for_cond_cnot (mode),
|
||||
e.pred == PRED_x ? 1 : 0);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
@ -3363,24 +3363,24 @@
|
|||
;; - CNOT
|
||||
;; -------------------------------------------------------------------------
|
||||
|
||||
;; Predicated logical inverse.
|
||||
(define_expand "@aarch64_pred_cnot<mode>"
|
||||
;; Logical inverse, predicated with a ptrue.
|
||||
(define_expand "@aarch64_ptrue_cnot<mode>"
|
||||
[(set (match_operand:SVE_FULL_I 0 "register_operand")
|
||||
(unspec:SVE_FULL_I
|
||||
[(unspec:<VPRED>
|
||||
[(match_operand:<VPRED> 1 "register_operand")
|
||||
(match_operand:SI 2 "aarch64_sve_ptrue_flag")
|
||||
(const_int SVE_KNOWN_PTRUE)
|
||||
(eq:<VPRED>
|
||||
(match_operand:SVE_FULL_I 3 "register_operand")
|
||||
(match_dup 4))]
|
||||
(match_operand:SVE_FULL_I 2 "register_operand")
|
||||
(match_dup 3))]
|
||||
UNSPEC_PRED_Z)
|
||||
(match_dup 5)
|
||||
(match_dup 4)]
|
||||
(match_dup 4)
|
||||
(match_dup 3)]
|
||||
UNSPEC_SEL))]
|
||||
"TARGET_SVE"
|
||||
{
|
||||
operands[4] = CONST0_RTX (<MODE>mode);
|
||||
operands[5] = CONST1_RTX (<MODE>mode);
|
||||
operands[3] = CONST0_RTX (<MODE>mode);
|
||||
operands[4] = CONST1_RTX (<MODE>mode);
|
||||
}
|
||||
)
|
||||
|
||||
|
@ -3389,7 +3389,7 @@
|
|||
(unspec:SVE_I
|
||||
[(unspec:<VPRED>
|
||||
[(match_operand:<VPRED> 1 "register_operand")
|
||||
(match_operand:SI 5 "aarch64_sve_ptrue_flag")
|
||||
(const_int SVE_KNOWN_PTRUE)
|
||||
(eq:<VPRED>
|
||||
(match_operand:SVE_I 2 "register_operand")
|
||||
(match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
|
||||
|
@ -11001,4 +11001,4 @@
|
|||
GET_MODE (operands[2]));
|
||||
return "sel\t%0.<Vetype>, %3, %2.<Vetype>, %1.<Vetype>";
|
||||
}
|
||||
)
|
||||
)
|
||||
|
|
23
gcc/testsuite/gcc.target/aarch64/sve/acle/general/cnot_1.c
Normal file
23
gcc/testsuite/gcc.target/aarch64/sve/acle/general/cnot_1.c
Normal file
|
@ -0,0 +1,23 @@
|
|||
/* { dg-options "-O2" } */
|
||||
/* { dg-final { check-function-bodies "**" "" } } */
|
||||
|
||||
#include <arm_sve.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
** foo:
|
||||
** cmpeq (p[0-7])\.s, p0/z, z0\.s, #0
|
||||
** mov z0\.s, \1/z, #1
|
||||
** ret
|
||||
*/
|
||||
svint32_t foo(svbool_t pg, svint32_t y)
|
||||
{
|
||||
return svsel(svcmpeq(pg, y, 0), svdup_s32(1), svdup_s32(0));
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
Loading…
Add table
Reference in a new issue