combine patterns for add-add fusion
This patch adds a function to genfusion.pl to add a couple more patterns so combine can do fusion of pairs of add and vaddudm instructions. gcc/ChangeLog: * config/rs6000/genfusion.pl (gen_addadd): New function. * config/rs6000/fusion.md: Regenerate file. * config/rs6000/rs6000-cpus.def: Add OPTION_MASK_P10_FUSION_2ADD to masks. * config/rs6000/rs6000.c (rs6000_option_override_internal): Handle default value of OPTION_MASK_P10_FUSION_2ADD. * config/rs6000/rs6000.opt: Add -mpower10-fusion-2add. gcc/testsuite/ChangeLog: * gcc.target/powerpc/fusion-p10-addadd.c: New file.
This commit is contained in:
parent
bdc7162351
commit
842a055189
6 changed files with 135 additions and 3 deletions
|
@ -2658,3 +2658,39 @@
|
|||
[(set_attr "type" "fused_vector")
|
||||
(set_attr "cost" "6")
|
||||
(set_attr "length" "8")])
|
||||
|
||||
;; add-add fusion pattern generated by gen_addadd
|
||||
(define_insn "*fuse_add_add"
|
||||
[(set (match_operand:GPR 3 "gpc_reg_operand" "=0,1,&r,r")
|
||||
(plus:GPR
|
||||
(plus:GPR (match_operand:GPR 0 "gpc_reg_operand" "r,r,r,r")
|
||||
(match_operand:GPR 1 "gpc_reg_operand" "%r,r,r,r"))
|
||||
(match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))
|
||||
(clobber (match_scratch:GPR 4 "=X,X,X,&r"))]
|
||||
"(TARGET_P10_FUSION && TARGET_P10_FUSION_2ADD)"
|
||||
"@
|
||||
add %3,%1,%0\;add %3,%3,%2
|
||||
add %3,%1,%0\;add %3,%3,%2
|
||||
add %3,%1,%0\;add %3,%3,%2
|
||||
add %4,%1,%0\;add %3,%4,%2"
|
||||
[(set_attr "type" "fuse_arithlog")
|
||||
(set_attr "cost" "6")
|
||||
(set_attr "length" "8")])
|
||||
|
||||
;; vaddudm-vaddudm fusion pattern generated by gen_addadd
|
||||
(define_insn "*fuse_vaddudm_vaddudm"
|
||||
[(set (match_operand:V2DI 3 "altivec_register_operand" "=0,1,&v,v")
|
||||
(plus:V2DI
|
||||
(plus:V2DI (match_operand:V2DI 0 "altivec_register_operand" "v,v,v,v")
|
||||
(match_operand:V2DI 1 "altivec_register_operand" "%v,v,v,v"))
|
||||
(match_operand:V2DI 2 "altivec_register_operand" "v,v,v,v")))
|
||||
(clobber (match_scratch:V2DI 4 "=X,X,X,&v"))]
|
||||
"(TARGET_P10_FUSION && TARGET_P10_FUSION_2ADD)"
|
||||
"@
|
||||
vaddudm %3,%1,%0\;vaddudm %3,%3,%2
|
||||
vaddudm %3,%1,%0\;vaddudm %3,%3,%2
|
||||
vaddudm %3,%1,%0\;vaddudm %3,%3,%2
|
||||
vaddudm %4,%1,%0\;vaddudm %3,%4,%2"
|
||||
[(set_attr "type" "fuse_vec")
|
||||
(set_attr "cost" "6")
|
||||
(set_attr "length" "8")])
|
||||
|
|
|
@ -240,8 +240,52 @@ EOF
|
|||
}
|
||||
}
|
||||
|
||||
sub gen_addadd
|
||||
{
|
||||
my ($kind, $vchr, $op, $type, $mode, $pred, $constraint);
|
||||
foreach $kind ('scalar','vector') {
|
||||
if ( $kind eq 'vector' ) {
|
||||
$vchr = "v";
|
||||
$op = "vaddudm";
|
||||
$type = "fuse_vec";
|
||||
$mode = "V2DI";
|
||||
$pred = "altivec_register_operand";
|
||||
$constraint = "v";
|
||||
} else {
|
||||
$vchr = "";
|
||||
$op = "add";
|
||||
$type = "fuse_arithlog";
|
||||
$mode = "GPR";
|
||||
$pred = "gpc_reg_operand";
|
||||
$constraint = "r";
|
||||
}
|
||||
my $c4 = "${constraint},${constraint},${constraint},${constraint}";
|
||||
print <<"EOF";
|
||||
|
||||
;; ${op}-${op} fusion pattern generated by gen_addadd
|
||||
(define_insn "*fuse_${op}_${op}"
|
||||
[(set (match_operand:${mode} 3 "${pred}" "=0,1,&${constraint},${constraint}")
|
||||
(plus:${mode}
|
||||
(plus:${mode} (match_operand:${mode} 0 "${pred}" "${c4}")
|
||||
(match_operand:${mode} 1 "${pred}" "%${c4}"))
|
||||
(match_operand:${mode} 2 "${pred}" "${c4}")))
|
||||
(clobber (match_scratch:${mode} 4 "=X,X,X,&${constraint}"))]
|
||||
"(TARGET_P10_FUSION && TARGET_P10_FUSION_2ADD)"
|
||||
"@
|
||||
${op} %3,%1,%0\\;${op} %3,%3,%2
|
||||
${op} %3,%1,%0\\;${op} %3,%3,%2
|
||||
${op} %3,%1,%0\\;${op} %3,%3,%2
|
||||
${op} %4,%1,%0\\;${op} %3,%4,%2"
|
||||
[(set_attr "type" "${type}")
|
||||
(set_attr "cost" "6")
|
||||
(set_attr "length" "8")])
|
||||
EOF
|
||||
}
|
||||
}
|
||||
|
||||
gen_ld_cmpi_p10();
|
||||
gen_2logical();
|
||||
gen_addadd;
|
||||
|
||||
exit(0);
|
||||
|
||||
|
|
|
@ -85,7 +85,8 @@
|
|||
| OTHER_POWER10_MASKS \
|
||||
| OPTION_MASK_P10_FUSION \
|
||||
| OPTION_MASK_P10_FUSION_LD_CMPI \
|
||||
| OPTION_MASK_P10_FUSION_2LOGICAL)
|
||||
| OPTION_MASK_P10_FUSION_2LOGICAL \
|
||||
| OPTION_MASK_P10_FUSION_2ADD)
|
||||
|
||||
/* Flags that need to be turned off if -mno-power9-vector. */
|
||||
#define OTHER_P9_VECTOR_MASKS (OPTION_MASK_FLOAT128_HW \
|
||||
|
@ -135,6 +136,7 @@
|
|||
| OPTION_MASK_P10_FUSION \
|
||||
| OPTION_MASK_P10_FUSION_LD_CMPI \
|
||||
| OPTION_MASK_P10_FUSION_2LOGICAL \
|
||||
| OPTION_MASK_P10_FUSION_2ADD \
|
||||
| OPTION_MASK_HTM \
|
||||
| OPTION_MASK_ISEL \
|
||||
| OPTION_MASK_MFCRF \
|
||||
|
|
|
@ -4465,16 +4465,22 @@ rs6000_option_override_internal (bool global_init_p)
|
|||
if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_MMA) == 0)
|
||||
rs6000_isa_flags |= OPTION_MASK_MMA;
|
||||
|
||||
if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION) == 0)
|
||||
if (TARGET_POWER10
|
||||
&& (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION) == 0)
|
||||
rs6000_isa_flags |= OPTION_MASK_P10_FUSION;
|
||||
|
||||
if (TARGET_POWER10 &&
|
||||
(rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_LD_CMPI) == 0)
|
||||
rs6000_isa_flags |= OPTION_MASK_P10_FUSION_LD_CMPI;
|
||||
|
||||
if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2LOGICAL) == 0)
|
||||
if (TARGET_POWER10
|
||||
&& (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2LOGICAL) == 0)
|
||||
rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2LOGICAL;
|
||||
|
||||
if (TARGET_POWER10
|
||||
&& (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2ADD) == 0)
|
||||
rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2ADD;
|
||||
|
||||
/* Turn off vector pair/mma options on non-power10 systems. */
|
||||
else if (!TARGET_POWER10 && TARGET_MMA)
|
||||
{
|
||||
|
|
|
@ -502,6 +502,10 @@ mpower10-fusion-2logical
|
|||
Target Undocumented Mask(P10_FUSION_2LOGICAL) Var(rs6000_isa_flags)
|
||||
Fuse certain integer operations together for better performance on power10.
|
||||
|
||||
mpower10-fusion-2add
|
||||
Target Undocumented Mask(P10_FUSION_2ADD) Var(rs6000_isa_flags)
|
||||
Fuse certain add operations together for better performance on power10.
|
||||
|
||||
mcrypto
|
||||
Target Mask(CRYPTO) Var(rs6000_isa_flags)
|
||||
Use ISA 2.07 Category:Vector.AES and Category:Vector.SHA2 instructions.
|
||||
|
|
40
gcc/testsuite/gcc.target/powerpc/fusion-p10-addadd.c
Normal file
40
gcc/testsuite/gcc.target/powerpc/fusion-p10-addadd.c
Normal file
|
@ -0,0 +1,40 @@
|
|||
/* { dg-do compile } */
|
||||
/* { dg-options "-mdejagnu-cpu=power10 -O3 -dp" } */
|
||||
|
||||
long addadd0(long a, long b, long c)
|
||||
{
|
||||
return a+b+c;
|
||||
}
|
||||
long addadd1(long a, long b, long c, long *t)
|
||||
{
|
||||
long r=a+b+c;
|
||||
*t = b;
|
||||
return r;
|
||||
}
|
||||
long addadd2(long s, long a, long b, long c)
|
||||
{
|
||||
return b+c+a;
|
||||
}
|
||||
|
||||
typedef vector long vlong;
|
||||
vlong vaddadd(vlong a, vlong b, vlong c)
|
||||
{
|
||||
return a+b+c;
|
||||
}
|
||||
vlong vaddadd1(vlong a, vlong b, vlong c, vlong *t)
|
||||
{
|
||||
vlong r=a+b+c;
|
||||
*t = b;
|
||||
return r;
|
||||
}
|
||||
vlong vaddadd2(vlong s, vlong a, vlong b, vlong c)
|
||||
{
|
||||
return a+b+c;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-assembler-times "fuse_add_add/0" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "fuse_add_add/1" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "fuse_add_add/2" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "fuse_vaddudm_vaddudm/0" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "fuse_vaddudm_vaddudm/1" 1 } } */
|
||||
/* { dg-final { scan-assembler-times "fuse_vaddudm_vaddudm/2" 1 } } */
|
Loading…
Add table
Reference in a new issue