combine patterns for add-add fusion
This patch adds a function to genfusion.pl to add a couple more patterns so combine can do fusion of pairs of add and vaddudm instructions. gcc/ChangeLog: * config/rs6000/genfusion.pl (gen_addadd): New function. * config/rs6000/fusion.md: Regenerate file. * config/rs6000/rs6000-cpus.def: Add OPTION_MASK_P10_FUSION_2ADD to masks. * config/rs6000/rs6000.c (rs6000_option_override_internal): Handle default value of OPTION_MASK_P10_FUSION_2ADD. * config/rs6000/rs6000.opt: Add -mpower10-fusion-2add. gcc/testsuite/ChangeLog: * gcc.target/powerpc/fusion-p10-addadd.c: New file.
This commit is contained in:
parent
bdc7162351
commit
842a055189
6 changed files with 135 additions and 3 deletions
|
@ -2658,3 +2658,39 @@
|
||||||
[(set_attr "type" "fused_vector")
|
[(set_attr "type" "fused_vector")
|
||||||
(set_attr "cost" "6")
|
(set_attr "cost" "6")
|
||||||
(set_attr "length" "8")])
|
(set_attr "length" "8")])
|
||||||
|
|
||||||
|
;; add-add fusion pattern generated by gen_addadd
|
||||||
|
(define_insn "*fuse_add_add"
|
||||||
|
[(set (match_operand:GPR 3 "gpc_reg_operand" "=0,1,&r,r")
|
||||||
|
(plus:GPR
|
||||||
|
(plus:GPR (match_operand:GPR 0 "gpc_reg_operand" "r,r,r,r")
|
||||||
|
(match_operand:GPR 1 "gpc_reg_operand" "%r,r,r,r"))
|
||||||
|
(match_operand:GPR 2 "gpc_reg_operand" "r,r,r,r")))
|
||||||
|
(clobber (match_scratch:GPR 4 "=X,X,X,&r"))]
|
||||||
|
"(TARGET_P10_FUSION && TARGET_P10_FUSION_2ADD)"
|
||||||
|
"@
|
||||||
|
add %3,%1,%0\;add %3,%3,%2
|
||||||
|
add %3,%1,%0\;add %3,%3,%2
|
||||||
|
add %3,%1,%0\;add %3,%3,%2
|
||||||
|
add %4,%1,%0\;add %3,%4,%2"
|
||||||
|
[(set_attr "type" "fuse_arithlog")
|
||||||
|
(set_attr "cost" "6")
|
||||||
|
(set_attr "length" "8")])
|
||||||
|
|
||||||
|
;; vaddudm-vaddudm fusion pattern generated by gen_addadd
|
||||||
|
(define_insn "*fuse_vaddudm_vaddudm"
|
||||||
|
[(set (match_operand:V2DI 3 "altivec_register_operand" "=0,1,&v,v")
|
||||||
|
(plus:V2DI
|
||||||
|
(plus:V2DI (match_operand:V2DI 0 "altivec_register_operand" "v,v,v,v")
|
||||||
|
(match_operand:V2DI 1 "altivec_register_operand" "%v,v,v,v"))
|
||||||
|
(match_operand:V2DI 2 "altivec_register_operand" "v,v,v,v")))
|
||||||
|
(clobber (match_scratch:V2DI 4 "=X,X,X,&v"))]
|
||||||
|
"(TARGET_P10_FUSION && TARGET_P10_FUSION_2ADD)"
|
||||||
|
"@
|
||||||
|
vaddudm %3,%1,%0\;vaddudm %3,%3,%2
|
||||||
|
vaddudm %3,%1,%0\;vaddudm %3,%3,%2
|
||||||
|
vaddudm %3,%1,%0\;vaddudm %3,%3,%2
|
||||||
|
vaddudm %4,%1,%0\;vaddudm %3,%4,%2"
|
||||||
|
[(set_attr "type" "fuse_vec")
|
||||||
|
(set_attr "cost" "6")
|
||||||
|
(set_attr "length" "8")])
|
||||||
|
|
|
@ -240,8 +240,52 @@ EOF
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
sub gen_addadd
|
||||||
|
{
|
||||||
|
my ($kind, $vchr, $op, $type, $mode, $pred, $constraint);
|
||||||
|
foreach $kind ('scalar','vector') {
|
||||||
|
if ( $kind eq 'vector' ) {
|
||||||
|
$vchr = "v";
|
||||||
|
$op = "vaddudm";
|
||||||
|
$type = "fuse_vec";
|
||||||
|
$mode = "V2DI";
|
||||||
|
$pred = "altivec_register_operand";
|
||||||
|
$constraint = "v";
|
||||||
|
} else {
|
||||||
|
$vchr = "";
|
||||||
|
$op = "add";
|
||||||
|
$type = "fuse_arithlog";
|
||||||
|
$mode = "GPR";
|
||||||
|
$pred = "gpc_reg_operand";
|
||||||
|
$constraint = "r";
|
||||||
|
}
|
||||||
|
my $c4 = "${constraint},${constraint},${constraint},${constraint}";
|
||||||
|
print <<"EOF";
|
||||||
|
|
||||||
|
;; ${op}-${op} fusion pattern generated by gen_addadd
|
||||||
|
(define_insn "*fuse_${op}_${op}"
|
||||||
|
[(set (match_operand:${mode} 3 "${pred}" "=0,1,&${constraint},${constraint}")
|
||||||
|
(plus:${mode}
|
||||||
|
(plus:${mode} (match_operand:${mode} 0 "${pred}" "${c4}")
|
||||||
|
(match_operand:${mode} 1 "${pred}" "%${c4}"))
|
||||||
|
(match_operand:${mode} 2 "${pred}" "${c4}")))
|
||||||
|
(clobber (match_scratch:${mode} 4 "=X,X,X,&${constraint}"))]
|
||||||
|
"(TARGET_P10_FUSION && TARGET_P10_FUSION_2ADD)"
|
||||||
|
"@
|
||||||
|
${op} %3,%1,%0\\;${op} %3,%3,%2
|
||||||
|
${op} %3,%1,%0\\;${op} %3,%3,%2
|
||||||
|
${op} %3,%1,%0\\;${op} %3,%3,%2
|
||||||
|
${op} %4,%1,%0\\;${op} %3,%4,%2"
|
||||||
|
[(set_attr "type" "${type}")
|
||||||
|
(set_attr "cost" "6")
|
||||||
|
(set_attr "length" "8")])
|
||||||
|
EOF
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
gen_ld_cmpi_p10();
|
gen_ld_cmpi_p10();
|
||||||
gen_2logical();
|
gen_2logical();
|
||||||
|
gen_addadd;
|
||||||
|
|
||||||
exit(0);
|
exit(0);
|
||||||
|
|
||||||
|
|
|
@ -85,7 +85,8 @@
|
||||||
| OTHER_POWER10_MASKS \
|
| OTHER_POWER10_MASKS \
|
||||||
| OPTION_MASK_P10_FUSION \
|
| OPTION_MASK_P10_FUSION \
|
||||||
| OPTION_MASK_P10_FUSION_LD_CMPI \
|
| OPTION_MASK_P10_FUSION_LD_CMPI \
|
||||||
| OPTION_MASK_P10_FUSION_2LOGICAL)
|
| OPTION_MASK_P10_FUSION_2LOGICAL \
|
||||||
|
| OPTION_MASK_P10_FUSION_2ADD)
|
||||||
|
|
||||||
/* Flags that need to be turned off if -mno-power9-vector. */
|
/* Flags that need to be turned off if -mno-power9-vector. */
|
||||||
#define OTHER_P9_VECTOR_MASKS (OPTION_MASK_FLOAT128_HW \
|
#define OTHER_P9_VECTOR_MASKS (OPTION_MASK_FLOAT128_HW \
|
||||||
|
@ -135,6 +136,7 @@
|
||||||
| OPTION_MASK_P10_FUSION \
|
| OPTION_MASK_P10_FUSION \
|
||||||
| OPTION_MASK_P10_FUSION_LD_CMPI \
|
| OPTION_MASK_P10_FUSION_LD_CMPI \
|
||||||
| OPTION_MASK_P10_FUSION_2LOGICAL \
|
| OPTION_MASK_P10_FUSION_2LOGICAL \
|
||||||
|
| OPTION_MASK_P10_FUSION_2ADD \
|
||||||
| OPTION_MASK_HTM \
|
| OPTION_MASK_HTM \
|
||||||
| OPTION_MASK_ISEL \
|
| OPTION_MASK_ISEL \
|
||||||
| OPTION_MASK_MFCRF \
|
| OPTION_MASK_MFCRF \
|
||||||
|
|
|
@ -4465,16 +4465,22 @@ rs6000_option_override_internal (bool global_init_p)
|
||||||
if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_MMA) == 0)
|
if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_MMA) == 0)
|
||||||
rs6000_isa_flags |= OPTION_MASK_MMA;
|
rs6000_isa_flags |= OPTION_MASK_MMA;
|
||||||
|
|
||||||
if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION) == 0)
|
if (TARGET_POWER10
|
||||||
|
&& (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION) == 0)
|
||||||
rs6000_isa_flags |= OPTION_MASK_P10_FUSION;
|
rs6000_isa_flags |= OPTION_MASK_P10_FUSION;
|
||||||
|
|
||||||
if (TARGET_POWER10 &&
|
if (TARGET_POWER10 &&
|
||||||
(rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_LD_CMPI) == 0)
|
(rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_LD_CMPI) == 0)
|
||||||
rs6000_isa_flags |= OPTION_MASK_P10_FUSION_LD_CMPI;
|
rs6000_isa_flags |= OPTION_MASK_P10_FUSION_LD_CMPI;
|
||||||
|
|
||||||
if (TARGET_POWER10 && (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2LOGICAL) == 0)
|
if (TARGET_POWER10
|
||||||
|
&& (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2LOGICAL) == 0)
|
||||||
rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2LOGICAL;
|
rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2LOGICAL;
|
||||||
|
|
||||||
|
if (TARGET_POWER10
|
||||||
|
&& (rs6000_isa_flags_explicit & OPTION_MASK_P10_FUSION_2ADD) == 0)
|
||||||
|
rs6000_isa_flags |= OPTION_MASK_P10_FUSION_2ADD;
|
||||||
|
|
||||||
/* Turn off vector pair/mma options on non-power10 systems. */
|
/* Turn off vector pair/mma options on non-power10 systems. */
|
||||||
else if (!TARGET_POWER10 && TARGET_MMA)
|
else if (!TARGET_POWER10 && TARGET_MMA)
|
||||||
{
|
{
|
||||||
|
|
|
@ -502,6 +502,10 @@ mpower10-fusion-2logical
|
||||||
Target Undocumented Mask(P10_FUSION_2LOGICAL) Var(rs6000_isa_flags)
|
Target Undocumented Mask(P10_FUSION_2LOGICAL) Var(rs6000_isa_flags)
|
||||||
Fuse certain integer operations together for better performance on power10.
|
Fuse certain integer operations together for better performance on power10.
|
||||||
|
|
||||||
|
mpower10-fusion-2add
|
||||||
|
Target Undocumented Mask(P10_FUSION_2ADD) Var(rs6000_isa_flags)
|
||||||
|
Fuse certain add operations together for better performance on power10.
|
||||||
|
|
||||||
mcrypto
|
mcrypto
|
||||||
Target Mask(CRYPTO) Var(rs6000_isa_flags)
|
Target Mask(CRYPTO) Var(rs6000_isa_flags)
|
||||||
Use ISA 2.07 Category:Vector.AES and Category:Vector.SHA2 instructions.
|
Use ISA 2.07 Category:Vector.AES and Category:Vector.SHA2 instructions.
|
||||||
|
|
40
gcc/testsuite/gcc.target/powerpc/fusion-p10-addadd.c
Normal file
40
gcc/testsuite/gcc.target/powerpc/fusion-p10-addadd.c
Normal file
|
@ -0,0 +1,40 @@
|
||||||
|
/* { dg-do compile } */
|
||||||
|
/* { dg-options "-mdejagnu-cpu=power10 -O3 -dp" } */
|
||||||
|
|
||||||
|
long addadd0(long a, long b, long c)
|
||||||
|
{
|
||||||
|
return a+b+c;
|
||||||
|
}
|
||||||
|
long addadd1(long a, long b, long c, long *t)
|
||||||
|
{
|
||||||
|
long r=a+b+c;
|
||||||
|
*t = b;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
long addadd2(long s, long a, long b, long c)
|
||||||
|
{
|
||||||
|
return b+c+a;
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef vector long vlong;
|
||||||
|
vlong vaddadd(vlong a, vlong b, vlong c)
|
||||||
|
{
|
||||||
|
return a+b+c;
|
||||||
|
}
|
||||||
|
vlong vaddadd1(vlong a, vlong b, vlong c, vlong *t)
|
||||||
|
{
|
||||||
|
vlong r=a+b+c;
|
||||||
|
*t = b;
|
||||||
|
return r;
|
||||||
|
}
|
||||||
|
vlong vaddadd2(vlong s, vlong a, vlong b, vlong c)
|
||||||
|
{
|
||||||
|
return a+b+c;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* { dg-final { scan-assembler-times "fuse_add_add/0" 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times "fuse_add_add/1" 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times "fuse_add_add/2" 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times "fuse_vaddudm_vaddudm/0" 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times "fuse_vaddudm_vaddudm/1" 1 } } */
|
||||||
|
/* { dg-final { scan-assembler-times "fuse_vaddudm_vaddudm/2" 1 } } */
|
Loading…
Add table
Reference in a new issue