combine patterns for add-add fusion

This patch adds a function to genfusion.pl to add a couple
more patterns so combine can do fusion of pairs of add and
vaddudm instructions.

gcc/ChangeLog:

	* config/rs6000/genfusion.pl (gen_addadd): New function.
	* config/rs6000/fusion.md: Regenerate file.
	* config/rs6000/rs6000-cpus.def: Add
	OPTION_MASK_P10_FUSION_2ADD to masks.
	* config/rs6000/rs6000.c (rs6000_option_override_internal):
	Handle default value of OPTION_MASK_P10_FUSION_2ADD.
	* config/rs6000/rs6000.opt: Add -mpower10-fusion-2add.

gcc/testsuite/ChangeLog:

	* gcc.target/powerpc/fusion-p10-addadd.c: New file.
This commit is contained in:
Aaron Sawdey 2021-01-25 21:11:52 -06:00
parent bdc7162351
commit 842a055189
6 changed files with 135 additions and 3 deletions

View file

@ -2658,3 +2658,39 @@
[(set_attr "type" "fused_vector")
(set_attr "cost" "6")
(set_attr "length" "8")])
;; add-add fusion pattern generated by gen_addadd
(define_insn "*fuse_add_add"
[(set (match_operand:GPR 3 "gpc_reg_operand" "=0,1,&r,r")
(plus:GPR
(plus:GPR (match_operand:GPR 0 "gpc_reg_operand" "r,r,r,r")
(match_operand:GPR 1 "gpc_reg_operand" "%r,r,r,r"))
(match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))
(clobber (match_scratch:GPR 4 "=X,X,X,&r"))]
"(TARGET_P10_FUSION && TARGET_P10_FUSION_2ADD)"
"@
add %3,%1,%0\;add %3,%3,%2
add %3,%1,%0\;add %3,%3,%2
add %3,%1,%0\;add %3,%3,%2
add %4,%1,%0\;add %3,%4,%2"
[(set_attr "type" "fuse_arithlog")
(set_attr "cost" "6")
(set_attr "length" "8")])
;; vaddudm-vaddudm fusion pattern generated by gen_addadd
(define_insn "*fuse_vaddudm_vaddudm"
[(set (match_operand:V2DI 3 "altivec_register_operand" "=0,1,&v,v")
(plus:V2DI
(plus:V2DI (match_operand:V2DI 0 "altivec_register_operand" "v,v,v,v")
(match_operand:V2DI 1 "altivec_register_operand" "%v,v,v,v"))
(match_operand:V2DI 2 "altivec_register_operand" "v,v,v,v")))
(clobber (match_scratch:V2DI 4 "=X,X,X,&v"))]
"(TARGET_P10_FUSION && TARGET_P10_FUSION_2ADD)"
"@
vaddudm %3,%1,%0\;vaddudm %3,%3,%2
vaddudm %3,%1,%0\;vaddudm %3,%3,%2
vaddudm %3,%1,%0\;vaddudm %3,%3,%2
vaddudm %4,%1,%0\;vaddudm %3,%4,%2"
[(set_attr "type" "fuse_vec")
(set_attr "cost" "6")
(set_attr "length" "8")])

View file

@ -240,8 +240,52 @@ EOF
}
}
sub gen_addadd
{
my ($kind, $vchr, $op, $type, $mode, $pred, $constraint);
foreach $kind ('scalar','vector') {
if ( $kind eq 'vector' ) {
$vchr = "v";
$op = "vaddudm";
$type = "fuse_vec";
$mode = "V2DI";
$pred = "altivec_register_operand";
$constraint = "v";
} else {
$vchr = "";
$op = "add";
$type = "fuse_arithlog";
$mode = "GPR";
$pred = "gpc_reg_operand";
$constraint = "r";
}
my $c4 = "${constraint},${constraint},${constraint},${constraint}";
print <<"EOF";
;; ${op}-${op} fusion pattern generated by gen_addadd
(define_insn "*fuse_${op}_${op}"
[(set (match_operand:${mode} 3 "${pred}" "=0,1,&${constraint},${constraint}")
(plus:${mode}
(plus:${mode} (match_operand:${mode} 0 "${pred}" "${c4}")
(match_operand:${mode} 1 "${pred}" "%${c4}"))
(match_operand:${mode} 2 "${pred}" "${c4}")))
(clobber (match_scratch:${mode} 4 "=X,X,X,&${constraint}"))]
"(TARGET_P10_FUSION && TARGET_P10_FUSION_2ADD)"
"@
${op} %3,%1,%0\\;${op} %3,%3,%2
${op} %3,%1,%0\\;${op} %3,%3,%2
${op} %3,%1,%0\\;${op} %3,%3,%2
${op} %4,%1,%0\\;${op} %3,%4,%2"
[(set_attr "type" "${type}")
(set_attr "cost" "6")
(set_attr "length" "8")])
EOF
}
}
gen_ld_cmpi_p10();
gen_2logical();
gen_addadd;
exit(0);

View file

@ -85,7 +85,8 @@
| OTHER_POWER10_MASKS \
| OPTION_MASK_P10_FUSION \
| OPTION_MASK_P10_FUSION_LD_CMPI \
| OPTION_MASK_P10_FUSION_2LOGICAL)
| OPTION_MASK_P10_FUSION_2LOGICAL \
| OPTION_MASK_P10_FUSION_2ADD)
/* Flags that need to be turned off if -mno-power9-vector. */
#define OTHER_P9_VECTOR_MASKS (OPTION_MASK_FLOAT128_HW \
@ -135,6 +136,7 @@
| OPTION_MASK_P10_FUSION \
| OPTION_MASK_P10_FUSION_LD_CMPI \
| OPTION_MASK_P10_FUSION_2LOGICAL \
| OPTION_MASK_P10_FUSION_2ADD \
| OPTION_MASK_HTM \
| OPTION_MASK_ISEL \
| OPTION_MASK_MFCRF \

View file

@ -4465,16 +4465,22 @@ rs6000_option_override_internal (bool global_init_p)
if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_MMA) == 0)
rs6000_isa_flags |= OPTION_MASK_MMA;
if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION) == 0)
if (TARGET_POWER10
&& (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION) == 0)
rs6000_isa_flags |= OPTION_MASK_P10_FUSION;
if (TARGET_POWER10 &&
(rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_LD_CMPI) == 0)
rs6000_isa_flags |= OPTION_MASK_P10_FUSION_LD_CMPI;
if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2LOGICAL) == 0)
if (TARGET_POWER10
&& (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2LOGICAL) == 0)
rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2LOGICAL;
if (TARGET_POWER10
&& (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2ADD) == 0)
rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2ADD;
/* Turn off vector pair/mma options on non-power10 systems. */
else if (!TARGET_POWER10 && TARGET_MMA)
{

View file

@ -502,6 +502,10 @@ mpower10-fusion-2logical
Target Undocumented Mask(P10_FUSION_2LOGICAL) Var(rs6000_isa_flags)
Fuse certain integer operations together for better performance on power10.
mpower10-fusion-2add
Target Undocumented Mask(P10_FUSION_2ADD) Var(rs6000_isa_flags)
Fuse certain add operations together for better performance on power10.
mcrypto
Target Mask(CRYPTO) Var(rs6000_isa_flags)
Use ISA 2.07 Category:Vector.AES and Category:Vector.SHA2 instructions.

View file

@ -0,0 +1,40 @@
/* { dg-do compile } */
/* { dg-options "-mdejagnu-cpu=power10 -O3 -dp" } */
long addadd0(long a, long b, long c)
{
return a+b+c;
}
long addadd1(long a, long b, long c, long *t)
{
long r=a+b+c;
*t = b;
return r;
}
long addadd2(long s, long a, long b, long c)
{
return b+c+a;
}
typedef vector long vlong;
vlong vaddadd(vlong a, vlong b, vlong c)
{
return a+b+c;
}
vlong vaddadd1(vlong a, vlong b, vlong c, vlong *t)
{
vlong r=a+b+c;
*t = b;
return r;
}
vlong vaddadd2(vlong s, vlong a, vlong b, vlong c)
{
return a+b+c;
}
/* { dg-final { scan-assembler-times "fuse_add_add/0" 1 } } */
/* { dg-final { scan-assembler-times "fuse_add_add/1" 1 } } */
/* { dg-final { scan-assembler-times "fuse_add_add/2" 1 } } */
/* { dg-final { scan-assembler-times "fuse_vaddudm_vaddudm/0" 1 } } */
/* { dg-final { scan-assembler-times "fuse_vaddudm_vaddudm/1" 1 } } */
/* { dg-final { scan-assembler-times "fuse_vaddudm_vaddudm/2" 1 } } */