re PR target/65614 (PowerPC VSX systems should use XSCPSGNDP to copy scalar fp data to/from Altivec registers)
[gcc] 2015-04-06 Michael Meissner <meissner@linux.vnet.ibm.com> PR target/65614 * config/rs6000/rs6000.c (struct processor_costs): Add cost field for SF->DF conversions to make FLOAT_EXTEND more expensive, so that LFD is used to load double constants instead of LFS. Add defaults for all costs structures. Add comments for missing initialization fields. (size32_cost): Likewise. (size64_cost): Likewise. (rs64a_cost): Likewise. (mpccore_cost): Likewise. (ppc403_cost): Likewise. (ppc405_cost): Likewise. (ppc440_cost): Likewise. (ppc476_cost): Likewise. (ppc601_cost): Likewise. (ppc603_cost): Likewise. (ppc604_cost): Likewise. (ppc604e_cost): Likewise. (ppc620_cost): Likewise. (ppc630_cost): Likewise. (ppccell_cost): Likewise. (ppc750_cost): Likewise. (ppc7450_cost): Likewise. (ppc8540_cost): Likewise. (ppce300c2c3_cost): Likewise. (ppce500mc_cost): Likewise. (ppce500mc64_cost): Likewise. (ppce5500_cost): Likewise. (ppce6500_cost): Likewise. (titan_cost): Likewise. (power4_cost): Likewise. (power6_cost): Likewise. (power7_cost): Likewise. (power8_cost): Likewise. (ppca2_cost): Likewise. (rs6000_rtx_costs): Make FLOAT_EXTEND use SFDF_convert field. * config/rs6000/rs6000.md (extendsfdf2_fpr): Generate XSCPSGNDP instead of XXLOR to copy SFmode to clear out dirty bits created when SFmode denormals are generated. (mov<mode>_hardfloat, FMOVE32 case): Likewise. (truncdfsf2_fpr): Add support for ISA 2.07 XSRSP instruction. [gcc/testsuite] 2015-04-06 Michael Meissner <meissner@linux.vnet.ibm.com> PR target/65614 * gcc.target/powerpc/compress-float-ppc-pic.c: Run test on power5 to get floating point compression. * gcc.target/powerpc/compress-foat-ppc.c: Likewise. From-SVN: r221888
This commit is contained in:
parent
68b3d8c987
commit
90e0a7020e
6 changed files with 106 additions and 18 deletions
|
@ -1,3 +1,48 @@
|
|||
2015-04-06 Michael Meissner <meissner@linux.vnet.ibm.com>
|
||||
|
||||
PR target/65614
|
||||
* config/rs6000/rs6000.c (struct processor_costs): Add cost field
|
||||
for SF->DF conversions to make FLOAT_EXTEND more expensive, so
|
||||
that LFD is used to load double constants instead of LFS. Add
|
||||
defaults for all costs structures. Add comments for missing
|
||||
initialization fields.
|
||||
(size32_cost): Likewise.
|
||||
(size64_cost): Likewise.
|
||||
(rs64a_cost): Likewise.
|
||||
(mpccore_cost): Likewise.
|
||||
(ppc403_cost): Likewise.
|
||||
(ppc405_cost): Likewise.
|
||||
(ppc440_cost): Likewise.
|
||||
(ppc476_cost): Likewise.
|
||||
(ppc601_cost): Likewise.
|
||||
(ppc603_cost): Likewise.
|
||||
(ppc604_cost): Likewise.
|
||||
(ppc604e_cost): Likewise.
|
||||
(ppc620_cost): Likewise.
|
||||
(ppc630_cost): Likewise.
|
||||
(ppccell_cost): Likewise.
|
||||
(ppc750_cost): Likewise.
|
||||
(ppc7450_cost): Likewise.
|
||||
(ppc8540_cost): Likewise.
|
||||
(ppce300c2c3_cost): Likewise.
|
||||
(ppce500mc_cost): Likewise.
|
||||
(ppce500mc64_cost): Likewise.
|
||||
(ppce5500_cost): Likewise.
|
||||
(ppce6500_cost): Likewise.
|
||||
(titan_cost): Likewise.
|
||||
(power4_cost): Likewise.
|
||||
(power6_cost): Likewise.
|
||||
(power7_cost): Likewise.
|
||||
(power8_cost): Likewise.
|
||||
(ppca2_cost): Likewise.
|
||||
(rs6000_rtx_costs): Make FLOAT_EXTEND use SFDF_convert field.
|
||||
|
||||
* config/rs6000/rs6000.md (extendsfdf2_fpr): Generate XSCPSGNDP
|
||||
instead of XXLOR to copy SFmode to clear out dirty bits created
|
||||
when SFmode denormals are generated.
|
||||
(mov<mode>_hardfloat, FMOVE32 case): Likewise.
|
||||
(truncdfsf2_fpr): Add support for ISA 2.07 XSRSP instruction.
|
||||
|
||||
2015-04-06 Evandro Menezes <e.menezes@samsung.com>
|
||||
|
||||
* doc/invoke.texi (AARCH64/mtune): Add exynos-m1 as an option.
|
||||
|
|
|
@ -457,6 +457,7 @@ struct processor_costs {
|
|||
const int l2_cache_size; /* size of l2 cache, in kilobytes. */
|
||||
const int simultaneous_prefetches; /* number of parallel prefetch
|
||||
operations. */
|
||||
const int sfdf_convert; /* cost of SF->DF conversion. */
|
||||
};
|
||||
|
||||
const struct processor_costs *rs6000_cost;
|
||||
|
@ -476,10 +477,11 @@ struct processor_costs size32_cost = {
|
|||
COSTS_N_INSNS (1), /* dmul */
|
||||
COSTS_N_INSNS (1), /* sdiv */
|
||||
COSTS_N_INSNS (1), /* ddiv */
|
||||
32,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
32, /* cache line size */
|
||||
0, /* l1 cache */
|
||||
0, /* l2 cache */
|
||||
0, /* streams */
|
||||
0, /* SF->DF convert */
|
||||
};
|
||||
|
||||
/* Instruction size costs on 64bit processors. */
|
||||
|
@ -495,10 +497,11 @@ struct processor_costs size64_cost = {
|
|||
COSTS_N_INSNS (1), /* dmul */
|
||||
COSTS_N_INSNS (1), /* sdiv */
|
||||
COSTS_N_INSNS (1), /* ddiv */
|
||||
128,
|
||||
0,
|
||||
0,
|
||||
0,
|
||||
128, /* cache line size */
|
||||
0, /* l1 cache */
|
||||
0, /* l2 cache */
|
||||
0, /* streams */
|
||||
0, /* SF->DF convert */
|
||||
};
|
||||
|
||||
/* Instruction costs on RS64A processors. */
|
||||
|
@ -518,6 +521,7 @@ struct processor_costs rs64a_cost = {
|
|||
128, /* l1 cache */
|
||||
2048, /* l2 cache */
|
||||
1, /* streams */
|
||||
0, /* SF->DF convert */
|
||||
};
|
||||
|
||||
/* Instruction costs on MPCCORE processors. */
|
||||
|
@ -537,6 +541,7 @@ struct processor_costs mpccore_cost = {
|
|||
4, /* l1 cache */
|
||||
16, /* l2 cache */
|
||||
1, /* streams */
|
||||
0, /* SF->DF convert */
|
||||
};
|
||||
|
||||
/* Instruction costs on PPC403 processors. */
|
||||
|
@ -556,6 +561,7 @@ struct processor_costs ppc403_cost = {
|
|||
4, /* l1 cache */
|
||||
16, /* l2 cache */
|
||||
1, /* streams */
|
||||
0, /* SF->DF convert */
|
||||
};
|
||||
|
||||
/* Instruction costs on PPC405 processors. */
|
||||
|
@ -575,6 +581,7 @@ struct processor_costs ppc405_cost = {
|
|||
16, /* l1 cache */
|
||||
128, /* l2 cache */
|
||||
1, /* streams */
|
||||
0, /* SF->DF convert */
|
||||
};
|
||||
|
||||
/* Instruction costs on PPC440 processors. */
|
||||
|
@ -594,6 +601,7 @@ struct processor_costs ppc440_cost = {
|
|||
32, /* l1 cache */
|
||||
256, /* l2 cache */
|
||||
1, /* streams */
|
||||
0, /* SF->DF convert */
|
||||
};
|
||||
|
||||
/* Instruction costs on PPC476 processors. */
|
||||
|
@ -613,6 +621,7 @@ struct processor_costs ppc476_cost = {
|
|||
32, /* l1 cache */
|
||||
512, /* l2 cache */
|
||||
1, /* streams */
|
||||
0, /* SF->DF convert */
|
||||
};
|
||||
|
||||
/* Instruction costs on PPC601 processors. */
|
||||
|
@ -632,6 +641,7 @@ struct processor_costs ppc601_cost = {
|
|||
32, /* l1 cache */
|
||||
256, /* l2 cache */
|
||||
1, /* streams */
|
||||
0, /* SF->DF convert */
|
||||
};
|
||||
|
||||
/* Instruction costs on PPC603 processors. */
|
||||
|
@ -651,6 +661,7 @@ struct processor_costs ppc603_cost = {
|
|||
8, /* l1 cache */
|
||||
64, /* l2 cache */
|
||||
1, /* streams */
|
||||
0, /* SF->DF convert */
|
||||
};
|
||||
|
||||
/* Instruction costs on PPC604 processors. */
|
||||
|
@ -670,6 +681,7 @@ struct processor_costs ppc604_cost = {
|
|||
16, /* l1 cache */
|
||||
512, /* l2 cache */
|
||||
1, /* streams */
|
||||
0, /* SF->DF convert */
|
||||
};
|
||||
|
||||
/* Instruction costs on PPC604e processors. */
|
||||
|
@ -689,6 +701,7 @@ struct processor_costs ppc604e_cost = {
|
|||
32, /* l1 cache */
|
||||
1024, /* l2 cache */
|
||||
1, /* streams */
|
||||
0, /* SF->DF convert */
|
||||
};
|
||||
|
||||
/* Instruction costs on PPC620 processors. */
|
||||
|
@ -708,6 +721,7 @@ struct processor_costs ppc620_cost = {
|
|||
32, /* l1 cache */
|
||||
1024, /* l2 cache */
|
||||
1, /* streams */
|
||||
0, /* SF->DF convert */
|
||||
};
|
||||
|
||||
/* Instruction costs on PPC630 processors. */
|
||||
|
@ -727,6 +741,7 @@ struct processor_costs ppc630_cost = {
|
|||
64, /* l1 cache */
|
||||
1024, /* l2 cache */
|
||||
1, /* streams */
|
||||
0, /* SF->DF convert */
|
||||
};
|
||||
|
||||
/* Instruction costs on Cell processor. */
|
||||
|
@ -747,6 +762,7 @@ struct processor_costs ppccell_cost = {
|
|||
32, /* l1 cache */
|
||||
512, /* l2 cache */
|
||||
6, /* streams */
|
||||
0, /* SF->DF convert */
|
||||
};
|
||||
|
||||
/* Instruction costs on PPC750 and PPC7400 processors. */
|
||||
|
@ -766,6 +782,7 @@ struct processor_costs ppc750_cost = {
|
|||
32, /* l1 cache */
|
||||
512, /* l2 cache */
|
||||
1, /* streams */
|
||||
0, /* SF->DF convert */
|
||||
};
|
||||
|
||||
/* Instruction costs on PPC7450 processors. */
|
||||
|
@ -785,6 +802,7 @@ struct processor_costs ppc7450_cost = {
|
|||
32, /* l1 cache */
|
||||
1024, /* l2 cache */
|
||||
1, /* streams */
|
||||
0, /* SF->DF convert */
|
||||
};
|
||||
|
||||
/* Instruction costs on PPC8540 processors. */
|
||||
|
@ -804,6 +822,7 @@ struct processor_costs ppc8540_cost = {
|
|||
32, /* l1 cache */
|
||||
256, /* l2 cache */
|
||||
1, /* prefetch streams /*/
|
||||
0, /* SF->DF convert */
|
||||
};
|
||||
|
||||
/* Instruction costs on E300C2 and E300C3 cores. */
|
||||
|
@ -823,6 +842,7 @@ struct processor_costs ppce300c2c3_cost = {
|
|||
16, /* l1 cache */
|
||||
16, /* l2 cache */
|
||||
1, /* prefetch streams /*/
|
||||
0, /* SF->DF convert */
|
||||
};
|
||||
|
||||
/* Instruction costs on PPCE500MC processors. */
|
||||
|
@ -842,6 +862,7 @@ struct processor_costs ppce500mc_cost = {
|
|||
32, /* l1 cache */
|
||||
128, /* l2 cache */
|
||||
1, /* prefetch streams /*/
|
||||
0, /* SF->DF convert */
|
||||
};
|
||||
|
||||
/* Instruction costs on PPCE500MC64 processors. */
|
||||
|
@ -861,6 +882,7 @@ struct processor_costs ppce500mc64_cost = {
|
|||
32, /* l1 cache */
|
||||
128, /* l2 cache */
|
||||
1, /* prefetch streams /*/
|
||||
0, /* SF->DF convert */
|
||||
};
|
||||
|
||||
/* Instruction costs on PPCE5500 processors. */
|
||||
|
@ -880,6 +902,7 @@ struct processor_costs ppce5500_cost = {
|
|||
32, /* l1 cache */
|
||||
128, /* l2 cache */
|
||||
1, /* prefetch streams /*/
|
||||
0, /* SF->DF convert */
|
||||
};
|
||||
|
||||
/* Instruction costs on PPCE6500 processors. */
|
||||
|
@ -899,6 +922,7 @@ struct processor_costs ppce6500_cost = {
|
|||
32, /* l1 cache */
|
||||
128, /* l2 cache */
|
||||
1, /* prefetch streams /*/
|
||||
0, /* SF->DF convert */
|
||||
};
|
||||
|
||||
/* Instruction costs on AppliedMicro Titan processors. */
|
||||
|
@ -918,6 +942,7 @@ struct processor_costs titan_cost = {
|
|||
32, /* l1 cache */
|
||||
512, /* l2 cache */
|
||||
1, /* prefetch streams /*/
|
||||
0, /* SF->DF convert */
|
||||
};
|
||||
|
||||
/* Instruction costs on POWER4 and POWER5 processors. */
|
||||
|
@ -937,6 +962,7 @@ struct processor_costs power4_cost = {
|
|||
32, /* l1 cache */
|
||||
1024, /* l2 cache */
|
||||
8, /* prefetch streams /*/
|
||||
0, /* SF->DF convert */
|
||||
};
|
||||
|
||||
/* Instruction costs on POWER6 processors. */
|
||||
|
@ -956,6 +982,7 @@ struct processor_costs power6_cost = {
|
|||
64, /* l1 cache */
|
||||
2048, /* l2 cache */
|
||||
16, /* prefetch streams */
|
||||
0, /* SF->DF convert */
|
||||
};
|
||||
|
||||
/* Instruction costs on POWER7 processors. */
|
||||
|
@ -975,6 +1002,7 @@ struct processor_costs power7_cost = {
|
|||
32, /* l1 cache */
|
||||
256, /* l2 cache */
|
||||
12, /* prefetch streams */
|
||||
COSTS_N_INSNS (3), /* SF->DF convert */
|
||||
};
|
||||
|
||||
/* Instruction costs on POWER8 processors. */
|
||||
|
@ -994,6 +1022,7 @@ struct processor_costs power8_cost = {
|
|||
32, /* l1 cache */
|
||||
256, /* l2 cache */
|
||||
12, /* prefetch streams */
|
||||
COSTS_N_INSNS (3), /* SF->DF convert */
|
||||
};
|
||||
|
||||
/* Instruction costs on POWER A2 processors. */
|
||||
|
@ -1013,6 +1042,7 @@ struct processor_costs ppca2_cost = {
|
|||
16, /* l1 cache */
|
||||
2048, /* l2 cache */
|
||||
16, /* prefetch streams */
|
||||
0, /* SF->DF convert */
|
||||
};
|
||||
|
||||
|
||||
|
@ -30480,7 +30510,7 @@ rs6000_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
|
|||
|
||||
case FLOAT_EXTEND:
|
||||
if (mode == DFmode)
|
||||
*total = 0;
|
||||
*total = rs6000_cost->sfdf_convert;
|
||||
else
|
||||
*total = rs6000_cost->fp;
|
||||
return false;
|
||||
|
|
|
@ -5222,7 +5222,7 @@
|
|||
fmr %0,%1
|
||||
lfs%U1%X1 %0,%1
|
||||
#
|
||||
xxlor %x0,%x1,%x1
|
||||
xscpsgndp %x0,%x1,%x1
|
||||
lxsspx %x0,%y1"
|
||||
"&& reload_completed && REG_P (operands[1]) && REGNO (operands[0]) == REGNO (operands[1])"
|
||||
[(const_int 0)]
|
||||
|
@ -5230,7 +5230,7 @@
|
|||
emit_note (NOTE_INSN_DELETED);
|
||||
DONE;
|
||||
}
|
||||
[(set_attr "type" "fp,fp,fpload,fp,vecsimple,fpload")])
|
||||
[(set_attr "type" "fp,fp,fpload,fp,fp,fpload")])
|
||||
|
||||
(define_expand "truncdfsf2"
|
||||
[(set (match_operand:SF 0 "gpc_reg_operand" "")
|
||||
|
@ -5239,10 +5239,12 @@
|
|||
"")
|
||||
|
||||
(define_insn "*truncdfsf2_fpr"
|
||||
[(set (match_operand:SF 0 "gpc_reg_operand" "=f")
|
||||
(float_truncate:SF (match_operand:DF 1 "gpc_reg_operand" "d")))]
|
||||
[(set (match_operand:SF 0 "gpc_reg_operand" "=f,wy")
|
||||
(float_truncate:SF (match_operand:DF 1 "gpc_reg_operand" "d,ws")))]
|
||||
"TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
|
||||
"frsp %0,%1"
|
||||
"@
|
||||
frsp %0,%1
|
||||
xsrsp %x0,%x1"
|
||||
[(set_attr "type" "fp")])
|
||||
|
||||
;; This expander is here to avoid FLOAT_WORDS_BIGENDIAN tests in
|
||||
|
@ -8058,7 +8060,7 @@
|
|||
lwz%U1%X1 %0,%1
|
||||
stw%U0%X0 %1,%0
|
||||
fmr %0,%1
|
||||
xxlor %x0,%x1,%x1
|
||||
xscpsgndp %x0,%x1,%x1
|
||||
xxlxor %x0,%x0,%x0
|
||||
li %0,0
|
||||
<f32_li>
|
||||
|
@ -8070,7 +8072,7 @@
|
|||
mt%0 %1
|
||||
mf%1 %0
|
||||
nop"
|
||||
[(set_attr "type" "*,load,store,fp,vecsimple,vecsimple,integer,fpload,fpstore,fpload,fpstore,mftgpr,mffgpr,mtjmpr,mfjmpr,*")
|
||||
[(set_attr "type" "*,load,store,fp,fp,vecsimple,integer,fpload,fpstore,fpload,fpstore,mftgpr,mffgpr,mtjmpr,mfjmpr,*")
|
||||
(set_attr "length" "4")])
|
||||
|
||||
(define_insn "*mov<mode>_softfloat"
|
||||
|
|
|
@ -1,3 +1,10 @@
|
|||
2015-04-06 Michael Meissner <meissner@linux.vnet.ibm.com>
|
||||
|
||||
PR target/65614
|
||||
* gcc.target/powerpc/compress-float-ppc-pic.c: Run test on power5
|
||||
to get floating point compression.
|
||||
* gcc.target/powerpc/compress-foat-ppc.c: Likewise.
|
||||
|
||||
2015-04-06 Jakub Jelinek <jakub@redhat.com>
|
||||
|
||||
PR preprocessor/61977
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
/* { dg-do compile { target powerpc_fprs } } */
|
||||
/* { dg-options "-O2 -fpic" } */
|
||||
/* { dg-options "-O2 -fpic -mcpu=power5" } */
|
||||
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power5" } } */
|
||||
|
||||
double foo (double x) {
|
||||
return x + 1.75;
|
||||
}
|
||||
|
|
|
@ -1,5 +1,7 @@
|
|||
/* { dg-do compile { target powerpc_fprs } } */
|
||||
/* { dg-options "-O2" } */
|
||||
/* { dg-options "-O2 -mcpu=power5" } */
|
||||
/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power5" } } */
|
||||
|
||||
double foo (double x) {
|
||||
return x + 1.75;
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue