PR middle-end/98865: Expand X*Y as X&-Y when Y is [0,1].
The patch is a revised solution for PR middle-end/98865 incorporating the feedback/suggestions from Richard Biener's review here: https://gcc.gnu.org/pipermail/gcc-patches/2022-May/593928.html Most significantly, this patch now performs the transformation/optimization during RTL expansion, where the target's rtx_costs can be used to determine whether the original multiplication (that may potentially be implemented by a shift or lea) is cheaper than a negation and a bit-wise and. Previously the expression (x>>63)*y would be compiled with -O2 as shrq $63, %rdi movq %rdi, %rax imulq %rsi, %rax but with this patch now produces: sarq $63, %rdi movq %rdi, %rax andq %rsi, %rax Likewise the expression (x>>63)*135 [that appears in a hot-spot of the Botan AES-128 benchmark] was previously: shrq $63, %rdi leaq (%rdi,%rdi,8), %rdx movq %rdx, %rax salq $4, %rax subq %rdx, %rax now becomes: movq %rdi, %rax sarq $63, %rax andl $135, %eax 2022-05-19 Roger Sayle <roger@nextmovesoftware.com> gcc/ChangeLog PR middle-end/98865 * expr.cc (expand_expr_real_2) [MULT_EXPR]: Expand X*Y as X&Y when both X and Y are [0, 1], X*Y as X&-Y when Y is [0,1] and likewise X*Y as -X&Y when X is [0,1] using tree_nonzero_bits. gcc/testsuite/ChangeLog PR middle-end/98865 * gcc.target/i386/pr98865.c: New test case.
This commit is contained in:
parent
cd427e17c0
commit
d863ba23fb
2 changed files with 86 additions and 0 deletions
32
gcc/expr.cc
32
gcc/expr.cc
|
@ -9541,6 +9541,38 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
|
||||||
}
|
}
|
||||||
|
|
||||||
expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL);
|
expand_operands (treeop0, treeop1, subtarget, &op0, &op1, EXPAND_NORMAL);
|
||||||
|
|
||||||
|
/* Expand X*Y as X&-Y when Y must be zero or one. */
|
||||||
|
if (SCALAR_INT_MODE_P (mode))
|
||||||
|
{
|
||||||
|
bool bit0_p = tree_nonzero_bits (treeop0) == 1;
|
||||||
|
bool bit1_p = tree_nonzero_bits (treeop1) == 1;
|
||||||
|
|
||||||
|
/* Expand X*Y as X&Y when both X and Y must be zero or one. */
|
||||||
|
if (bit0_p && bit1_p)
|
||||||
|
return REDUCE_BIT_FIELD (expand_and (mode, op0, op1, target));
|
||||||
|
|
||||||
|
if (bit0_p || bit1_p)
|
||||||
|
{
|
||||||
|
bool speed = optimize_insn_for_speed_p ();
|
||||||
|
int cost = add_cost (speed, mode) + neg_cost (speed, mode);
|
||||||
|
struct algorithm algorithm;
|
||||||
|
enum mult_variant variant;
|
||||||
|
if (CONST_INT_P (op1)
|
||||||
|
? !choose_mult_variant (mode, INTVAL (op1),
|
||||||
|
&algorithm, &variant, cost)
|
||||||
|
: cost < mul_cost (speed, mode))
|
||||||
|
{
|
||||||
|
target = bit0_p ? expand_and (mode, negate_rtx (mode, op0),
|
||||||
|
op1, target)
|
||||||
|
: expand_and (mode, op0,
|
||||||
|
negate_rtx (mode, op1),
|
||||||
|
target);
|
||||||
|
return REDUCE_BIT_FIELD (target);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return REDUCE_BIT_FIELD (expand_mult (mode, op0, op1, target, unsignedp));
|
return REDUCE_BIT_FIELD (expand_mult (mode, op0, op1, target, unsignedp));
|
||||||
|
|
||||||
case TRUNC_MOD_EXPR:
|
case TRUNC_MOD_EXPR:
|
||||||
|
|
54
gcc/testsuite/gcc.target/i386/pr98865.c
Normal file
54
gcc/testsuite/gcc.target/i386/pr98865.c
Normal file
|
@ -0,0 +1,54 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-O2 -fdump-tree-optimized" } */
|
||||||
|
|
||||||
|
#if __SIZEOF_INT__ == 4
|
||||||
|
unsigned int foo(unsigned int a, unsigned int b)
|
||||||
|
{
|
||||||
|
return (a >> 31) * b;
|
||||||
|
}
|
||||||
|
|
||||||
|
int bar(int a, int b)
|
||||||
|
{
|
||||||
|
return -(a >> 31) * b;
|
||||||
|
}
|
||||||
|
|
||||||
|
int baz(int a, int b)
|
||||||
|
{
|
||||||
|
int c = a >> 31;
|
||||||
|
int d = -c;
|
||||||
|
return d * b;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned int pin(int a, unsigned int b)
|
||||||
|
{
|
||||||
|
unsigned int t = a & 1;
|
||||||
|
return t * b;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if __SIZEOF_LONG_LONG__ == 8
|
||||||
|
unsigned long long fool(unsigned long long a, unsigned long long b)
|
||||||
|
{
|
||||||
|
return (a >> 63) * b;
|
||||||
|
}
|
||||||
|
|
||||||
|
long long barl (long long a, long long b)
|
||||||
|
{
|
||||||
|
return -(a >> 63) * b;
|
||||||
|
}
|
||||||
|
|
||||||
|
long long bazl (long long a, long long b)
|
||||||
|
{
|
||||||
|
long long c = a >> 63;
|
||||||
|
long long d = -c;
|
||||||
|
return d * b;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned long long pinl(long long a, unsigned long long b)
|
||||||
|
{
|
||||||
|
unsigned long long t = a & 1;
|
||||||
|
return t * b;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-not "imul" } } */
|
Loading…
Add table
Reference in a new issue