config.gcc: Add options for arch and tune on SPU.
2007-07-13 Sa Liu <saliu@de.ibm.com> * config.gcc: Add options for arch and tune on SPU. * config/spu/predicates.md: Add constant operands 0 and 1. * config/spu/spu-builtins.def: Add builtins for double precision floating point comparison: si_dfceq, si_dfcmeq, si_dfcgt, si_dfcmgt, si_dftsv, spu_cmpeq_13, spu_cmpabseq_1, spu_cmpgt_13, spu_cmpabsgt_1, spu_testsv. * config/spu/spu-c.c: Define __SPU_EDP__ when builtins invoked with a CELLEDP target. * config/spu/spu-protos.h: Add new function prototypes. * config/spu/spu.c (spu_override_options): Check options -march and -mtune. (spu_comp_icode): Add comparison code for DFmode and vector mode. (spu_emit_branch_or_set): Use the new code for DFmode and vector mode comparison. (spu_const_from_int): New. Create a vector constant from 4 ints. (get_vec_cmp_insn): New. Get insn index of vector compare instruction. (spu_emit_vector_compare): New. Emit vector compare. (spu_emit_vector_cond_expr): New. Emit vector conditional expression. * config/spu/spu.h: Add options -march and -mtune. Define processor types PROCESSOR_CELL and PROCESSOR_CELLEDP. Define macro CANONICALIZE_COMPARISON. * config/spu/spu.md: Add new insns for double precision compare and double precision vector compare. Add vcond and smax/smin patterns to enable DFmode vector conditional expression. * config/spu/spu.opt: Add options -march and -mtune. * config/spu/spu_internals.h: Add builtins for CELLEDP target: si_dfceq, si_dfcmeq, si_dfcgt, si_dfcmgt, si_dftsv. Add builtin for both CELL and CELLEDP targets: spu_testsv. * config/spu/spu_intrinsics.h: Add flag mnemonics for test special values. testsuite/ * gcc.dg/vect/fast-math-vect-reduc-7.c: Switch on test for V2DFmode vector conditional expression. * gcc.target/spu/dfcmeq.c: New. Test combination of abs and dfceq patterns. * gcc.target/spu/dfcmgt.c: New. Test combination of abs and dfcgt patterns. * gcc.target/spu/intrinsics-2.c: New. Test intrinsics for V2DFmode comparison and test special values. * lib/target-supports.exp: Switch on test for V2DFmode vector conditional expression. From-SVN: r126626
This commit is contained in:
parent
2826df069f
commit
39aeae8573
15 changed files with 1065 additions and 70 deletions
|
@ -1,3 +1,36 @@
|
|||
2007-07-13 Sa Liu <saliu@de.ibm.com>
|
||||
|
||||
* config.gcc: Add options for arch and tune on SPU.
|
||||
* config/spu/predicates.md: Add constant operands 0 and 1.
|
||||
* config/spu/spu-builtins.def: Add builtins for double precision
|
||||
floating point comparison: si_dfceq, si_dfcmeq, si_dfcgt, si_dfcmgt,
|
||||
si_dftsv, spu_cmpeq_13, spu_cmpabseq_1, spu_cmpgt_13, spu_cmpabsgt_1,
|
||||
spu_testsv.
|
||||
* config/spu/spu-c.c: Define __SPU_EDP__ when builtins invoked with
|
||||
a CELLEDP target.
|
||||
* config/spu/spu-protos.h: Add new function prototypes.
|
||||
* config/spu/spu.c (spu_override_options): Check options -march and
|
||||
-mtune.
|
||||
(spu_comp_icode): Add comparison code for DFmode and vector mode.
|
||||
(spu_emit_branch_or_set): Use the new code for DFmode and vector
|
||||
mode comparison.
|
||||
(spu_const_from_int): New. Create a vector constant from 4 ints.
|
||||
(get_vec_cmp_insn): New. Get insn index of vector compare instruction.
|
||||
(spu_emit_vector_compare): New. Emit vector compare.
|
||||
(spu_emit_vector_cond_expr): New. Emit vector conditional expression.
|
||||
* config/spu/spu.h: Add options -march and -mtune. Define processor
|
||||
types PROCESSOR_CELL and PROCESSOR_CELLEDP. Define macro
|
||||
CANONICALIZE_COMPARISON.
|
||||
* config/spu/spu.md: Add new insns for double precision compare
|
||||
and double precision vector compare. Add vcond and smax/smin patterns
|
||||
to enable DFmode vector conditional expression.
|
||||
* config/spu/spu.opt: Add options -march and -mtune.
|
||||
* config/spu/spu_internals.h: Add builtins for CELLEDP target:
|
||||
si_dfceq, si_dfcmeq, si_dfcgt, si_dfcmgt, si_dftsv. Add builtin for
|
||||
both CELL and CELLEDP targets: spu_testsv.
|
||||
* config/spu/spu_intrinsics.h: Add flag mnemonics for test special
|
||||
values.
|
||||
|
||||
2007-07-13 Richard Guenther <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/32721
|
||||
|
|
|
@ -3142,6 +3142,23 @@ case "${target}" in
|
|||
esac
|
||||
;;
|
||||
|
||||
spu-*-*)
|
||||
supported_defaults="arch tune"
|
||||
|
||||
for which in arch tune; do
|
||||
eval "val=\$with_$which"
|
||||
case ${val} in
|
||||
"" | cell | celledp)
|
||||
# OK
|
||||
;;
|
||||
*)
|
||||
echo "Unknown cpu used in --with-$which=$val." 1>&2
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
;;
|
||||
|
||||
v850*-*-*)
|
||||
supported_defaults=cpu
|
||||
case ${with_cpu} in
|
||||
|
|
|
@ -16,6 +16,15 @@
|
|||
;; Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
|
||||
;; 02110-1301, USA.
|
||||
|
||||
;; Return 1 if operand is constant zero of its mode
|
||||
(define_predicate "const_zero_operand"
|
||||
(and (match_code "const_int,const,const_double,const_vector")
|
||||
(match_test "op == CONST0_RTX (mode)")))
|
||||
|
||||
(define_predicate "const_one_operand"
|
||||
(and (match_code "const_int,const,const_double,const_vector")
|
||||
(match_test "op == CONST1_RTX (mode)")))
|
||||
|
||||
(define_predicate "spu_reg_operand"
|
||||
(and (match_operand 0 "register_operand")
|
||||
(ior (not (match_code "subreg"))
|
||||
|
|
|
@ -189,9 +189,14 @@ DEF_BUILTIN (SI_CFLTU, CODE_FOR_spu_cfltu, "si_cfltu", B_INSN,
|
|||
DEF_BUILTIN (SI_FRDS, CODE_FOR_spu_frds, "si_frds", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
|
||||
DEF_BUILTIN (SI_FESD, CODE_FOR_spu_fesd, "si_fesd", B_INSN, _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
|
||||
DEF_BUILTIN (SI_FCEQ, CODE_FOR_ceq_v4sf, "si_fceq", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
|
||||
DEF_BUILTIN (SI_DFCEQ, CODE_FOR_ceq_v2df, "si_dfceq", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
|
||||
DEF_BUILTIN (SI_FCMEQ, CODE_FOR_cmeq_v4sf, "si_fcmeq", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
|
||||
DEF_BUILTIN (SI_DFCMEQ, CODE_FOR_cmeq_v2df, "si_dfcmeq", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
|
||||
DEF_BUILTIN (SI_FCGT, CODE_FOR_cgt_v4sf, "si_fcgt", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
|
||||
DEF_BUILTIN (SI_DFCGT, CODE_FOR_cgt_v2df, "si_dfcgt", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
|
||||
DEF_BUILTIN (SI_FCMGT, CODE_FOR_cmgt_v4sf, "si_fcmgt", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
|
||||
DEF_BUILTIN (SI_DFCMGT, CODE_FOR_cmgt_v2df, "si_dfcmgt", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
|
||||
DEF_BUILTIN (SI_DFTSV, CODE_FOR_dftsv, "si_dftsv", B_INSN, _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_U7))
|
||||
DEF_BUILTIN (SI_STOP, CODE_FOR_spu_stop, "si_stop", B_INSN, _A2(SPU_BTI_VOID, SPU_BTI_U14))
|
||||
DEF_BUILTIN (SI_STOPD, CODE_FOR_spu_stopd, "si_stopd", B_INSN, _A4(SPU_BTI_VOID, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
|
||||
DEF_BUILTIN (SI_LNOP, CODE_FOR_lnop, "si_lnop", B_INSN, _A1(SPU_BTI_VOID))
|
||||
|
@ -245,11 +250,10 @@ DEF_BUILTIN (SPU_SUMB, CODE_FOR_spu_sumb, "spu_sumb", B_INSN,
|
|||
DEF_BUILTIN (SPU_BISLED, CODE_FOR_spu_bisled, "spu_bisled", B_BISLED, _A3(SPU_BTI_VOID, SPU_BTI_PTR, SPU_BTI_PTR))
|
||||
DEF_BUILTIN (SPU_BISLED_D, CODE_FOR_spu_bisledd, "spu_bisled_d", B_BISLED, _A3(SPU_BTI_VOID, SPU_BTI_PTR, SPU_BTI_PTR))
|
||||
DEF_BUILTIN (SPU_BISLED_E, CODE_FOR_spu_bislede, "spu_bisled_e", B_BISLED, _A3(SPU_BTI_VOID, SPU_BTI_PTR, SPU_BTI_PTR))
|
||||
DEF_BUILTIN (SPU_CMPABSEQ, CODE_FOR_cmeq_v4sf, "spu_cmpabseq", B_INSN, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SF, SPU_BTI_V4SF))
|
||||
DEF_BUILTIN (SPU_CMPABSGT, CODE_FOR_cmgt_v4sf, "spu_cmpabsgt", B_INSN, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SF, SPU_BTI_V4SF))
|
||||
DEF_BUILTIN (SPU_IDISABLE, CODE_FOR_spu_idisable, "spu_idisable", B_INSN, _A1(SPU_BTI_VOID))
|
||||
DEF_BUILTIN (SPU_IENABLE, CODE_FOR_spu_ienable, "spu_ienable", B_INSN, _A1(SPU_BTI_VOID))
|
||||
DEF_BUILTIN (SPU_MASK_FOR_LOAD, CODE_FOR_spu_lvsr, "spu_lvsr", B_INSN, _A2(SPU_BTI_V16QI, SPU_BTI_PTR))
|
||||
DEF_BUILTIN (SPU_TESTSV, CODE_FOR_dftsv, "spu_testsv", B_INSN, _A3(SPU_BTI_UV2DI, SPU_BTI_V2DF, SPU_BTI_U7))
|
||||
|
||||
/* definitions to support overloaded generic builtin functions: */
|
||||
|
||||
|
@ -339,6 +343,10 @@ DEF_BUILTIN (SPU_CMPEQ_9, CODE_FOR_ceq_v8hi, "spu_cmpeq_9",
|
|||
DEF_BUILTIN (SPU_CMPEQ_10, CODE_FOR_ceq_v8hi, "spu_cmpeq_10", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_V8HI, SPU_BTI_INTHI))
|
||||
DEF_BUILTIN (SPU_CMPEQ_11, CODE_FOR_ceq_v4si, "spu_cmpeq_11", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UINTSI))
|
||||
DEF_BUILTIN (SPU_CMPEQ_12, CODE_FOR_ceq_v4si, "spu_cmpeq_12", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SI, SPU_BTI_INTSI))
|
||||
DEF_BUILTIN (SPU_CMPEQ_13, CODE_FOR_ceq_v2df, "spu_cmpeq_13", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_V2DF, SPU_BTI_V2DF))
|
||||
DEF_BUILTIN (SPU_CMPABSEQ, CODE_FOR_nothing, "spu_cmpabseq", B_OVERLOAD, _A1(SPU_BTI_VOID))
|
||||
DEF_BUILTIN (SPU_CMPABSEQ_0, CODE_FOR_cmeq_v4sf, "spu_cmpabseq_0", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SF, SPU_BTI_V4SF))
|
||||
DEF_BUILTIN (SPU_CMPABSEQ_1, CODE_FOR_cmeq_v2df, "spu_cmpabseq_1", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_V2DF, SPU_BTI_V2DF))
|
||||
DEF_BUILTIN (SPU_CMPGT, CODE_FOR_nothing, "spu_cmpgt", B_OVERLOAD, _A1(SPU_BTI_VOID))
|
||||
DEF_BUILTIN (SPU_CMPGT_0, CODE_FOR_clgt_v16qi, "spu_cmpgt_0", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_UV16QI, SPU_BTI_UV16QI))
|
||||
DEF_BUILTIN (SPU_CMPGT_1, CODE_FOR_cgt_v16qi, "spu_cmpgt_1", B_INTERNAL, _A3(SPU_BTI_UV16QI, SPU_BTI_V16QI, SPU_BTI_V16QI))
|
||||
|
@ -353,6 +361,10 @@ DEF_BUILTIN (SPU_CMPGT_9, CODE_FOR_clgt_v8hi, "spu_cmpgt_9",
|
|||
DEF_BUILTIN (SPU_CMPGT_10, CODE_FOR_cgt_v8hi, "spu_cmpgt_10", B_INTERNAL, _A3(SPU_BTI_UV8HI, SPU_BTI_V8HI, SPU_BTI_INTHI))
|
||||
DEF_BUILTIN (SPU_CMPGT_11, CODE_FOR_cgt_v4si, "spu_cmpgt_11", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SI, SPU_BTI_INTSI))
|
||||
DEF_BUILTIN (SPU_CMPGT_12, CODE_FOR_clgt_v4si, "spu_cmpgt_12", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_UV4SI, SPU_BTI_UINTSI))
|
||||
DEF_BUILTIN (SPU_CMPGT_13, CODE_FOR_cgt_v2df, "spu_cmpgt_13", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_V2DF, SPU_BTI_V2DF))
|
||||
DEF_BUILTIN (SPU_CMPABSGT, CODE_FOR_nothing, "spu_cmpabsgt", B_OVERLOAD, _A1(SPU_BTI_VOID))
|
||||
DEF_BUILTIN (SPU_CMPABSGT_0, CODE_FOR_cmgt_v4sf, "spu_cmpabsgt_0", B_INTERNAL, _A3(SPU_BTI_UV4SI, SPU_BTI_V4SF, SPU_BTI_V4SF))
|
||||
DEF_BUILTIN (SPU_CMPABSGT_1, CODE_FOR_cmgt_v2df, "spu_cmpabsgt_1", B_INTERNAL, _A3(SPU_BTI_UV2DI, SPU_BTI_V2DF, SPU_BTI_V2DF))
|
||||
DEF_BUILTIN (SPU_HCMPEQ, CODE_FOR_nothing, "spu_hcmpeq", B_OVERLOAD, _A1(SPU_BTI_VOID))
|
||||
DEF_BUILTIN (SPU_HCMPEQ_0, CODE_FOR_spu_heq, "spu_hcmpeq_0", B_INTERNAL, _A3(SPU_BTI_VOID, SPU_BTI_INTSI, SPU_BTI_INTSI))
|
||||
DEF_BUILTIN (SPU_HCMPEQ_1, CODE_FOR_spu_heq, "spu_hcmpeq_1", B_INTERNAL, _A3(SPU_BTI_VOID, SPU_BTI_UINTSI, SPU_BTI_UINTSI))
|
||||
|
|
|
@ -138,6 +138,8 @@ spu_cpu_cpp_builtins (struct cpp_reader *pfile)
|
|||
builtin_define_std ("__SPU__");
|
||||
cpp_assert (pfile, "cpu=spu");
|
||||
cpp_assert (pfile, "machine=spu");
|
||||
if (spu_arch == PROCESSOR_CELLEDP)
|
||||
builtin_define_std ("__SPU_EDP__");
|
||||
builtin_define_std ("__vector=__attribute__((__spu_vector__))");
|
||||
}
|
||||
|
||||
|
|
|
@ -32,6 +32,7 @@ extern void spu_expand_insv (rtx * ops);
|
|||
extern int spu_expand_block_move (rtx * ops);
|
||||
extern void spu_emit_branch_or_set (int is_set, enum rtx_code code,
|
||||
rtx * operands);
|
||||
extern int spu_emit_vector_cond_expr (rtx, rtx, rtx, rtx, rtx, rtx);
|
||||
extern HOST_WIDE_INT const_double_to_hwint (rtx x);
|
||||
extern rtx hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v);
|
||||
extern void print_operand_address (FILE * file, register rtx addr);
|
||||
|
@ -43,6 +44,8 @@ extern void spu_expand_prologue (void);
|
|||
extern void spu_expand_epilogue (unsigned char sibcall_p);
|
||||
extern rtx spu_return_addr (int count, rtx frame);
|
||||
extern rtx spu_const (enum machine_mode mode, HOST_WIDE_INT val);
|
||||
extern rtx spu_const_from_ints (enum machine_mode mode,
|
||||
int a, int b, int c, int d);
|
||||
extern struct rtx_def *spu_float_const (const char *string,
|
||||
enum machine_mode mode);
|
||||
extern int immediate_load_p (rtx op, enum machine_mode mode);
|
||||
|
|
|
@ -95,6 +95,8 @@ static void emit_nop_for_insn (rtx insn);
|
|||
static bool insn_clobbers_hbr (rtx insn);
|
||||
static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
|
||||
int distance);
|
||||
static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
|
||||
enum machine_mode dmode);
|
||||
static rtx get_branch_target (rtx branch);
|
||||
static void insert_branch_hints (void);
|
||||
static void insert_nops (void);
|
||||
|
@ -138,6 +140,11 @@ static int spu_builtin_vectorization_cost (bool);
|
|||
extern const char *reg_names[];
|
||||
rtx spu_compare_op0, spu_compare_op1;
|
||||
|
||||
/* Which instruction set architecture to use. */
|
||||
int spu_arch;
|
||||
/* Which cpu are we tuning for. */
|
||||
int spu_tune;
|
||||
|
||||
enum spu_immediate {
|
||||
SPU_NONE,
|
||||
SPU_IL,
|
||||
|
@ -298,6 +305,28 @@ spu_override_options (void)
|
|||
|
||||
if (spu_fixed_range_string)
|
||||
fix_range (spu_fixed_range_string);
|
||||
|
||||
/* Determine processor architectural level. */
|
||||
if (spu_arch_string)
|
||||
{
|
||||
if (strcmp (&spu_arch_string[0], "cell") == 0)
|
||||
spu_arch = PROCESSOR_CELL;
|
||||
else if (strcmp (&spu_arch_string[0], "celledp") == 0)
|
||||
spu_arch = PROCESSOR_CELLEDP;
|
||||
else
|
||||
error ("Unknown architecture '%s'", &spu_arch_string[0]);
|
||||
}
|
||||
|
||||
/* Determine processor to tune for. */
|
||||
if (spu_tune_string)
|
||||
{
|
||||
if (strcmp (&spu_tune_string[0], "cell") == 0)
|
||||
spu_tune = PROCESSOR_CELL;
|
||||
else if (strcmp (&spu_tune_string[0], "celledp") == 0)
|
||||
spu_tune = PROCESSOR_CELLEDP;
|
||||
else
|
||||
error ("Unknown architecture '%s'", &spu_tune_string[0]);
|
||||
}
|
||||
}
|
||||
|
||||
/* Handle an attribute requiring a FUNCTION_DECL; arguments as in
|
||||
|
@ -646,16 +675,19 @@ spu_expand_block_move (rtx ops[])
|
|||
enum spu_comp_code
|
||||
{ SPU_EQ, SPU_GT, SPU_GTU };
|
||||
|
||||
|
||||
int spu_comp_icode[8][3] = {
|
||||
{CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
|
||||
{CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
|
||||
{CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
|
||||
{CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
|
||||
{CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
|
||||
{CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
|
||||
{0, 0, 0},
|
||||
{CODE_FOR_ceq_vec, 0, 0},
|
||||
int spu_comp_icode[12][3] = {
|
||||
{CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
|
||||
{CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
|
||||
{CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
|
||||
{CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
|
||||
{CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
|
||||
{CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
|
||||
{CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
|
||||
{CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
|
||||
{CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
|
||||
{CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
|
||||
{CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
|
||||
{CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
|
||||
};
|
||||
|
||||
/* Generate a compare for CODE. Return a brand-new rtx that represents
|
||||
|
@ -786,13 +818,26 @@ spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
|
|||
index = 6;
|
||||
break;
|
||||
case V16QImode:
|
||||
case V8HImode:
|
||||
case V4SImode:
|
||||
case V2DImode:
|
||||
case V4SFmode:
|
||||
case V2DFmode:
|
||||
index = 7;
|
||||
comp_mode = op_mode;
|
||||
break;
|
||||
case V8HImode:
|
||||
index = 8;
|
||||
comp_mode = op_mode;
|
||||
break;
|
||||
case V4SImode:
|
||||
index = 9;
|
||||
comp_mode = op_mode;
|
||||
break;
|
||||
case V4SFmode:
|
||||
index = 10;
|
||||
comp_mode = V4SImode;
|
||||
break;
|
||||
case V2DFmode:
|
||||
index = 11;
|
||||
comp_mode = V2DImode;
|
||||
break;
|
||||
case V2DImode:
|
||||
default:
|
||||
abort ();
|
||||
}
|
||||
|
@ -800,16 +845,19 @@ spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
|
|||
if (GET_MODE (spu_compare_op1) == DFmode)
|
||||
{
|
||||
rtx reg = gen_reg_rtx (DFmode);
|
||||
if (!flag_unsafe_math_optimizations
|
||||
if ((!flag_unsafe_math_optimizations && spu_arch == PROCESSOR_CELL)
|
||||
|| (scode != SPU_GT && scode != SPU_EQ))
|
||||
abort ();
|
||||
if (reverse_compare)
|
||||
emit_insn (gen_subdf3 (reg, spu_compare_op1, spu_compare_op0));
|
||||
else
|
||||
emit_insn (gen_subdf3 (reg, spu_compare_op0, spu_compare_op1));
|
||||
reverse_compare = 0;
|
||||
spu_compare_op0 = reg;
|
||||
spu_compare_op1 = CONST0_RTX (DFmode);
|
||||
if (spu_arch == PROCESSOR_CELL)
|
||||
{
|
||||
if (reverse_compare)
|
||||
emit_insn (gen_subdf3 (reg, spu_compare_op1, spu_compare_op0));
|
||||
else
|
||||
emit_insn (gen_subdf3 (reg, spu_compare_op0, spu_compare_op1));
|
||||
reverse_compare = 0;
|
||||
spu_compare_op0 = reg;
|
||||
spu_compare_op1 = CONST0_RTX (DFmode);
|
||||
}
|
||||
}
|
||||
|
||||
if (is_set == 0 && spu_compare_op1 == const0_rtx
|
||||
|
@ -1884,6 +1932,30 @@ spu_const (enum machine_mode mode, HOST_WIDE_INT val)
|
|||
size.) */
|
||||
int spu_hint_dist = (8 * 4);
|
||||
|
||||
/* Create a MODE vector constant from 4 ints. */
|
||||
rtx
|
||||
spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
|
||||
{
|
||||
unsigned char arr[16];
|
||||
arr[0] = (a >> 24) & 0xff;
|
||||
arr[1] = (a >> 16) & 0xff;
|
||||
arr[2] = (a >> 8) & 0xff;
|
||||
arr[3] = (a >> 0) & 0xff;
|
||||
arr[4] = (b >> 24) & 0xff;
|
||||
arr[5] = (b >> 16) & 0xff;
|
||||
arr[6] = (b >> 8) & 0xff;
|
||||
arr[7] = (b >> 0) & 0xff;
|
||||
arr[8] = (c >> 24) & 0xff;
|
||||
arr[9] = (c >> 16) & 0xff;
|
||||
arr[10] = (c >> 8) & 0xff;
|
||||
arr[11] = (c >> 0) & 0xff;
|
||||
arr[12] = (d >> 24) & 0xff;
|
||||
arr[13] = (d >> 16) & 0xff;
|
||||
arr[14] = (d >> 8) & 0xff;
|
||||
arr[15] = (d >> 0) & 0xff;
|
||||
return array_to_constant(mode, arr);
|
||||
}
|
||||
|
||||
/* An array of these is used to propagate hints to predecessor blocks. */
|
||||
struct spu_bb_info
|
||||
{
|
||||
|
@ -4857,6 +4929,201 @@ spu_expand_vector_init (rtx target, rtx vals)
|
|||
}
|
||||
}
|
||||
|
||||
/* Return insn index for the vector compare instruction for given CODE,
|
||||
and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
|
||||
|
||||
static int
|
||||
get_vec_cmp_insn (enum rtx_code code,
|
||||
enum machine_mode dest_mode,
|
||||
enum machine_mode op_mode)
|
||||
|
||||
{
|
||||
switch (code)
|
||||
{
|
||||
case EQ:
|
||||
if (dest_mode == V16QImode && op_mode == V16QImode)
|
||||
return CODE_FOR_ceq_v16qi;
|
||||
if (dest_mode == V8HImode && op_mode == V8HImode)
|
||||
return CODE_FOR_ceq_v8hi;
|
||||
if (dest_mode == V4SImode && op_mode == V4SImode)
|
||||
return CODE_FOR_ceq_v4si;
|
||||
if (dest_mode == V4SImode && op_mode == V4SFmode)
|
||||
return CODE_FOR_ceq_v4sf;
|
||||
if (dest_mode == V2DImode && op_mode == V2DFmode)
|
||||
return CODE_FOR_ceq_v2df;
|
||||
break;
|
||||
case GT:
|
||||
if (dest_mode == V16QImode && op_mode == V16QImode)
|
||||
return CODE_FOR_cgt_v16qi;
|
||||
if (dest_mode == V8HImode && op_mode == V8HImode)
|
||||
return CODE_FOR_cgt_v8hi;
|
||||
if (dest_mode == V4SImode && op_mode == V4SImode)
|
||||
return CODE_FOR_cgt_v4si;
|
||||
if (dest_mode == V4SImode && op_mode == V4SFmode)
|
||||
return CODE_FOR_cgt_v4sf;
|
||||
if (dest_mode == V2DImode && op_mode == V2DFmode)
|
||||
return CODE_FOR_cgt_v2df;
|
||||
break;
|
||||
case GTU:
|
||||
if (dest_mode == V16QImode && op_mode == V16QImode)
|
||||
return CODE_FOR_clgt_v16qi;
|
||||
if (dest_mode == V8HImode && op_mode == V8HImode)
|
||||
return CODE_FOR_clgt_v8hi;
|
||||
if (dest_mode == V4SImode && op_mode == V4SImode)
|
||||
return CODE_FOR_clgt_v4si;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* Emit vector compare for operands OP0 and OP1 using code RCODE.
|
||||
DMODE is expected destination mode. This is a recursive function. */
|
||||
|
||||
static rtx
|
||||
spu_emit_vector_compare (enum rtx_code rcode,
|
||||
rtx op0, rtx op1,
|
||||
enum machine_mode dmode)
|
||||
{
|
||||
int vec_cmp_insn;
|
||||
rtx mask;
|
||||
enum machine_mode dest_mode;
|
||||
enum machine_mode op_mode = GET_MODE (op1);
|
||||
|
||||
gcc_assert (GET_MODE (op0) == GET_MODE (op1));
|
||||
|
||||
/* Floating point vector compare instructions uses destination V4SImode.
|
||||
Double floating point vector compare instructions uses destination V2DImode.
|
||||
Move destination to appropriate mode later. */
|
||||
if (dmode == V4SFmode)
|
||||
dest_mode = V4SImode;
|
||||
else if (dmode == V2DFmode)
|
||||
dest_mode = V2DImode;
|
||||
else
|
||||
dest_mode = dmode;
|
||||
|
||||
mask = gen_reg_rtx (dest_mode);
|
||||
vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
|
||||
|
||||
if (vec_cmp_insn == -1)
|
||||
{
|
||||
bool swap_operands = false;
|
||||
bool try_again = false;
|
||||
switch (rcode)
|
||||
{
|
||||
case LT:
|
||||
rcode = GT;
|
||||
swap_operands = true;
|
||||
try_again = true;
|
||||
break;
|
||||
case LTU:
|
||||
rcode = GTU;
|
||||
swap_operands = true;
|
||||
try_again = true;
|
||||
break;
|
||||
case NE:
|
||||
/* Treat A != B as ~(A==B). */
|
||||
{
|
||||
enum insn_code nor_code;
|
||||
rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
|
||||
nor_code = one_cmpl_optab->handlers[(int)dest_mode].insn_code;
|
||||
gcc_assert (nor_code != CODE_FOR_nothing);
|
||||
emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
|
||||
if (dmode != dest_mode)
|
||||
{
|
||||
rtx temp = gen_reg_rtx (dest_mode);
|
||||
convert_move (temp, mask, 0);
|
||||
return temp;
|
||||
}
|
||||
return mask;
|
||||
}
|
||||
break;
|
||||
case GE:
|
||||
case GEU:
|
||||
case LE:
|
||||
case LEU:
|
||||
/* Try GT/GTU/LT/LTU OR EQ */
|
||||
{
|
||||
rtx c_rtx, eq_rtx;
|
||||
enum insn_code ior_code;
|
||||
enum rtx_code new_code;
|
||||
|
||||
switch (rcode)
|
||||
{
|
||||
case GE: new_code = GT; break;
|
||||
case GEU: new_code = GTU; break;
|
||||
case LE: new_code = LT; break;
|
||||
case LEU: new_code = LTU; break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
|
||||
eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
|
||||
|
||||
ior_code = ior_optab->handlers[(int)dest_mode].insn_code;
|
||||
gcc_assert (ior_code != CODE_FOR_nothing);
|
||||
emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
|
||||
if (dmode != dest_mode)
|
||||
{
|
||||
rtx temp = gen_reg_rtx (dest_mode);
|
||||
convert_move (temp, mask, 0);
|
||||
return temp;
|
||||
}
|
||||
return mask;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
gcc_unreachable ();
|
||||
}
|
||||
|
||||
/* You only get two chances. */
|
||||
if (try_again)
|
||||
vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
|
||||
|
||||
gcc_assert (vec_cmp_insn != -1);
|
||||
|
||||
if (swap_operands)
|
||||
{
|
||||
rtx tmp;
|
||||
tmp = op0;
|
||||
op0 = op1;
|
||||
op1 = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
|
||||
if (dmode != dest_mode)
|
||||
{
|
||||
rtx temp = gen_reg_rtx (dest_mode);
|
||||
convert_move (temp, mask, 0);
|
||||
return temp;
|
||||
}
|
||||
return mask;
|
||||
}
|
||||
|
||||
|
||||
/* Emit vector conditional expression.
|
||||
DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
|
||||
CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
|
||||
|
||||
int
|
||||
spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
|
||||
rtx cond, rtx cc_op0, rtx cc_op1)
|
||||
{
|
||||
enum machine_mode dest_mode = GET_MODE (dest);
|
||||
enum rtx_code rcode = GET_CODE (cond);
|
||||
rtx mask;
|
||||
|
||||
/* Get the vector mask for the given relational operations. */
|
||||
mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
|
||||
|
||||
emit_insn(gen_selb (dest, op2, op1, mask));
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
static rtx
|
||||
spu_force_reg (enum machine_mode mode, rtx op)
|
||||
{
|
||||
|
|
|
@ -32,6 +32,23 @@
|
|||
extern int target_flags;
|
||||
extern const char *spu_fixed_range_string;
|
||||
|
||||
/* Which processor to generate code or schedule for. */
|
||||
enum processor_type
|
||||
{
|
||||
PROCESSOR_CELL,
|
||||
PROCESSOR_CELLEDP
|
||||
};
|
||||
|
||||
extern GTY(()) int spu_arch;
|
||||
extern GTY(()) int spu_tune;
|
||||
|
||||
/* Support for a compile-time default architecture and tuning. The rules are:
|
||||
--with-arch is ignored if -march is specified.
|
||||
--with-tune is ignored if -mtune is specified. */
|
||||
#define OPTION_DEFAULT_SPECS \
|
||||
{"arch", "%{!march=*:-march=%(VALUE)}" }, \
|
||||
{"tune", "%{!mtune=*:-mtune=%(VALUE)}" }
|
||||
|
||||
/* Default target_flags if no switches specified. */
|
||||
#ifndef TARGET_DEFAULT
|
||||
#define TARGET_DEFAULT (MASK_ERROR_RELOC | MASK_SAFE_DMA | MASK_BRANCH_HINTS)
|
||||
|
@ -605,7 +622,18 @@ targetm.resolve_overloaded_builtin = spu_resolve_overloaded_builtin; \
|
|||
#define NO_IMPLICIT_EXTERN_C 1
|
||||
|
||||
#define HANDLE_PRAGMA_PACK_PUSH_POP 1
|
||||
|
||||
|
||||
/* Canonicalize a comparison from one we don't have to one we do have. */
|
||||
#define CANONICALIZE_COMPARISON(CODE,OP0,OP1) \
|
||||
do { \
|
||||
if (((CODE) == LE || (CODE) == LT || (CODE) == LEU || (CODE) == LTU)) \
|
||||
{ \
|
||||
rtx tem = (OP0); \
|
||||
(OP0) = (OP1); \
|
||||
(OP1) = tem; \
|
||||
(CODE) = swap_condition (CODE); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
/* These are set by the cmp patterns and used while expanding
|
||||
conditional branches. */
|
||||
|
|
|
@ -29,6 +29,7 @@
|
|||
(define_attr "length" ""
|
||||
(const_int 4))
|
||||
|
||||
(define_attr "tune" "cell,celledp" (const (symbol_ref "spu_tune")))
|
||||
;; Processor type -- this attribute must exactly match the processor_type
|
||||
;; enumeration in spu.h.
|
||||
|
||||
|
@ -59,9 +60,17 @@
|
|||
;; for 6 cycles and the rest of the operation pipelines for
|
||||
;; 7 cycles. The simplest way to model this is to simply ignore
|
||||
;; the 6 cyle stall.
|
||||
(define_insn_reservation "FPD" 7 (eq_attr "type" "fpd")
|
||||
(define_insn_reservation "FPD" 7
|
||||
(and (eq_attr "tune" "cell")
|
||||
(eq_attr "type" "fpd"))
|
||||
"pipe0 + pipe1, fp, nothing*5")
|
||||
|
||||
;; Tune for CELLEDP, 9 cycles, dual-issuable, fully pipelined
|
||||
(define_insn_reservation "FPD_CELLEDP" 9
|
||||
(and (eq_attr "tune" "celledp")
|
||||
(eq_attr "type" "fpd"))
|
||||
"pipe0 + fp, nothing*8")
|
||||
|
||||
(define_insn_reservation "LNOP" 1 (eq_attr "type" "lnop")
|
||||
"pipe1")
|
||||
|
||||
|
@ -144,6 +153,7 @@
|
|||
(UNSPEC_WRCH 48)
|
||||
(UNSPEC_SPU_REALIGN_LOAD 49)
|
||||
(UNSPEC_SPU_MASK_FOR_LOAD 50)
|
||||
(UNSPEC_DFTSV 51)
|
||||
])
|
||||
|
||||
(include "predicates.md")
|
||||
|
@ -192,6 +202,16 @@
|
|||
(define_mode_macro VSF [SF V4SF])
|
||||
(define_mode_macro VDF [DF V2DF])
|
||||
|
||||
(define_mode_macro VCMP [V16QI
|
||||
V8HI
|
||||
V4SI
|
||||
V4SF
|
||||
V2DF])
|
||||
|
||||
(define_mode_macro VCMPU [V16QI
|
||||
V8HI
|
||||
V4SI])
|
||||
|
||||
(define_mode_attr bh [(QI "b") (V16QI "b")
|
||||
(HI "h") (V8HI "h")
|
||||
(SI "") (V4SI "")])
|
||||
|
@ -200,9 +220,14 @@
|
|||
(DF "d") (V2DF "d")])
|
||||
(define_mode_attr d6 [(SF "6") (V4SF "6")
|
||||
(DF "d") (V2DF "d")])
|
||||
(define_mode_attr f2i [(SF "SI") (V4SF "V4SI")
|
||||
|
||||
(define_mode_attr f2i [(SF "si") (V4SF "v4si")
|
||||
(DF "di") (V2DF "v2di")])
|
||||
(define_mode_attr F2I [(SF "SI") (V4SF "V4SI")
|
||||
(DF "DI") (V2DF "V2DI")])
|
||||
|
||||
(define_mode_attr DF2I [(DF "SI") (V2DF "V2DI")])
|
||||
|
||||
(define_mode_attr umask [(HI "f") (V8HI "f")
|
||||
(SI "g") (V4SI "g")])
|
||||
(define_mode_attr nmask [(HI "F") (V8HI "F")
|
||||
|
@ -990,8 +1015,8 @@
|
|||
(neg:VSF (match_operand:VSF 1 "spu_reg_operand" "")))
|
||||
(use (match_dup 2))])]
|
||||
""
|
||||
"operands[2] = gen_reg_rtx (<f2i>mode);
|
||||
emit_move_insn (operands[2], spu_const (<f2i>mode, -0x80000000ull));")
|
||||
"operands[2] = gen_reg_rtx (<F2I>mode);
|
||||
emit_move_insn (operands[2], spu_const (<F2I>mode, -0x80000000ull));")
|
||||
|
||||
(define_expand "neg<mode>2"
|
||||
[(parallel
|
||||
|
@ -999,22 +1024,22 @@
|
|||
(neg:VDF (match_operand:VDF 1 "spu_reg_operand" "")))
|
||||
(use (match_dup 2))])]
|
||||
""
|
||||
"operands[2] = gen_reg_rtx (<f2i>mode);
|
||||
emit_move_insn (operands[2], spu_const (<f2i>mode, -0x8000000000000000ull));")
|
||||
"operands[2] = gen_reg_rtx (<F2I>mode);
|
||||
emit_move_insn (operands[2], spu_const (<F2I>mode, -0x8000000000000000ull));")
|
||||
|
||||
(define_insn_and_split "_neg<mode>2"
|
||||
[(set (match_operand:VSDF 0 "spu_reg_operand" "=r")
|
||||
(neg:VSDF (match_operand:VSDF 1 "spu_reg_operand" "r")))
|
||||
(use (match_operand:<f2i> 2 "spu_reg_operand" "r"))]
|
||||
(use (match_operand:<F2I> 2 "spu_reg_operand" "r"))]
|
||||
""
|
||||
"#"
|
||||
""
|
||||
[(set (match_dup:<f2i> 3)
|
||||
(xor:<f2i> (match_dup:<f2i> 4)
|
||||
(match_dup:<f2i> 2)))]
|
||||
[(set (match_dup:<F2I> 3)
|
||||
(xor:<F2I> (match_dup:<F2I> 4)
|
||||
(match_dup:<F2I> 2)))]
|
||||
{
|
||||
operands[3] = spu_gen_subreg (<f2i>mode, operands[0]);
|
||||
operands[4] = spu_gen_subreg (<f2i>mode, operands[1]);
|
||||
operands[3] = spu_gen_subreg (<F2I>mode, operands[0]);
|
||||
operands[4] = spu_gen_subreg (<F2I>mode, operands[1]);
|
||||
})
|
||||
|
||||
|
||||
|
@ -1026,8 +1051,8 @@
|
|||
(abs:VSF (match_operand:VSF 1 "spu_reg_operand" "")))
|
||||
(use (match_dup 2))])]
|
||||
""
|
||||
"operands[2] = gen_reg_rtx (<f2i>mode);
|
||||
emit_move_insn (operands[2], spu_const (<f2i>mode, 0x7fffffffull));")
|
||||
"operands[2] = gen_reg_rtx (<F2I>mode);
|
||||
emit_move_insn (operands[2], spu_const (<F2I>mode, 0x7fffffffull));")
|
||||
|
||||
(define_expand "abs<mode>2"
|
||||
[(parallel
|
||||
|
@ -1035,22 +1060,22 @@
|
|||
(abs:VDF (match_operand:VDF 1 "spu_reg_operand" "")))
|
||||
(use (match_dup 2))])]
|
||||
""
|
||||
"operands[2] = gen_reg_rtx (<f2i>mode);
|
||||
emit_move_insn (operands[2], spu_const (<f2i>mode, 0x7fffffffffffffffull));")
|
||||
"operands[2] = gen_reg_rtx (<F2I>mode);
|
||||
emit_move_insn (operands[2], spu_const (<F2I>mode, 0x7fffffffffffffffull));")
|
||||
|
||||
(define_insn_and_split "_abs<mode>2"
|
||||
[(set (match_operand:VSDF 0 "spu_reg_operand" "=r")
|
||||
(abs:VSDF (match_operand:VSDF 1 "spu_reg_operand" "r")))
|
||||
(use (match_operand:<f2i> 2 "spu_reg_operand" "r"))]
|
||||
(use (match_operand:<F2I> 2 "spu_reg_operand" "r"))]
|
||||
""
|
||||
"#"
|
||||
""
|
||||
[(set (match_dup:<f2i> 3)
|
||||
(and:<f2i> (match_dup:<f2i> 4)
|
||||
(match_dup:<f2i> 2)))]
|
||||
[(set (match_dup:<F2I> 3)
|
||||
(and:<F2I> (match_dup:<F2I> 4)
|
||||
(match_dup:<F2I> 2)))]
|
||||
{
|
||||
operands[3] = spu_gen_subreg (<f2i>mode, operands[0]);
|
||||
operands[4] = spu_gen_subreg (<f2i>mode, operands[1]);
|
||||
operands[3] = spu_gen_subreg (<F2I>mode, operands[0]);
|
||||
operands[4] = spu_gen_subreg (<F2I>mode, operands[1]);
|
||||
})
|
||||
|
||||
|
||||
|
@ -2493,27 +2518,173 @@
|
|||
(set_attr "length" "12")])
|
||||
|
||||
(define_insn "ceq_<mode>"
|
||||
[(set (match_operand:<f2i> 0 "spu_reg_operand" "=r")
|
||||
(eq:<f2i> (match_operand:VSF 1 "spu_reg_operand" "r")
|
||||
[(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
|
||||
(eq:<F2I> (match_operand:VSF 1 "spu_reg_operand" "r")
|
||||
(match_operand:VSF 2 "spu_reg_operand" "r")))]
|
||||
""
|
||||
"fceq\t%0,%1,%2")
|
||||
|
||||
(define_insn "cmeq_<mode>"
|
||||
[(set (match_operand:<f2i> 0 "spu_reg_operand" "=r")
|
||||
(eq:<f2i> (abs:VSF (match_operand:VSF 1 "spu_reg_operand" "r"))
|
||||
[(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
|
||||
(eq:<F2I> (abs:VSF (match_operand:VSF 1 "spu_reg_operand" "r"))
|
||||
(abs:VSF (match_operand:VSF 2 "spu_reg_operand" "r"))))]
|
||||
""
|
||||
"fcmeq\t%0,%1,%2")
|
||||
|
||||
(define_insn "ceq_vec"
|
||||
;; These implementations of ceq_df and cgt_df do not correctly handle
|
||||
;; NAN or INF. We will also get incorrect results when the result
|
||||
;; of the double subtract is too small.
|
||||
(define_expand "ceq_df"
|
||||
[(set (match_operand:SI 0 "spu_reg_operand" "=r")
|
||||
(eq:SI (match_operand 1 "spu_reg_operand" "r")
|
||||
(match_operand 2 "spu_reg_operand" "r")))]
|
||||
"VECTOR_MODE_P(GET_MODE(operands[1]))
|
||||
&& GET_MODE(operands[1]) == GET_MODE(operands[2])"
|
||||
"ceq\t%0,%1,%2\;gb\t%0,%0\;ceqi\t%0,%0,15"
|
||||
[(set_attr "length" "12")])
|
||||
(eq:SI (match_operand:DF 1 "spu_reg_operand" "r")
|
||||
(match_operand:DF 2 "const_zero_operand" "i")))]
|
||||
""
|
||||
{
|
||||
if (flag_unsafe_math_optimizations && spu_arch == PROCESSOR_CELL)
|
||||
{
|
||||
rtx s0_ti = gen_reg_rtx(TImode);
|
||||
rtx s1_v4 = gen_reg_rtx(V4SImode);
|
||||
rtx s0_v4 = spu_gen_subreg(V4SImode, s0_ti);
|
||||
rtx to_ti = gen_reg_rtx(TImode);
|
||||
rtx to_v4 = gen_reg_rtx(V4SImode);
|
||||
rtx l_v4 = gen_reg_rtx(V4SImode);
|
||||
emit_insn (gen_spu_convert (l_v4, operands[1]));
|
||||
emit_insn (gen_movv4si(s1_v4, spu_const(V4SImode, -0x80000000ll)));
|
||||
emit_insn (gen_ceq_v4si(s0_v4, l_v4, CONST0_RTX(V4SImode)));
|
||||
emit_insn (gen_ceq_v4si(s1_v4, l_v4, s1_v4));
|
||||
emit_insn (gen_rotqby_ti(to_ti, s0_ti, GEN_INT(4)));
|
||||
emit_insn (gen_spu_convert (to_v4, to_ti));
|
||||
emit_insn (gen_iorv4si3(s1_v4, s0_v4, s1_v4));
|
||||
emit_insn (gen_andv4si3(to_v4, to_v4, s1_v4));
|
||||
emit_insn (gen_spu_convert (operands[0], to_v4));
|
||||
DONE;
|
||||
}
|
||||
})
|
||||
|
||||
(define_insn "ceq_<mode>_celledp"
|
||||
[(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r")
|
||||
(eq:<DF2I> (match_operand:VDF 1 "spu_reg_operand" "r")
|
||||
(match_operand:VDF 2 "spu_reg_operand" "r")))]
|
||||
"spu_arch == PROCESSOR_CELLEDP"
|
||||
"dfceq\t%0,%1,%2"
|
||||
[(set_attr "type" "fpd")])
|
||||
|
||||
(define_insn "cmeq_<mode>_celledp"
|
||||
[(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r")
|
||||
(eq:<DF2I> (abs:VDF (match_operand:VDF 1 "spu_reg_operand" "r"))
|
||||
(abs:VDF (match_operand:VDF 2 "spu_reg_operand" "r"))))]
|
||||
"spu_arch == PROCESSOR_CELLEDP"
|
||||
"dfcmeq\t%0,%1,%2"
|
||||
[(set_attr "type" "fpd")])
|
||||
|
||||
(define_expand "ceq_v2df"
|
||||
[(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
|
||||
(eq:V2DI (match_operand:V2DF 1 "spu_reg_operand" "r")
|
||||
(match_operand:V2DF 2 "spu_reg_operand" "r")))]
|
||||
""
|
||||
{
|
||||
if (spu_arch == PROCESSOR_CELL)
|
||||
{
|
||||
rtx ra = spu_gen_subreg (V4SImode, operands[1]);
|
||||
rtx rb = spu_gen_subreg (V4SImode, operands[2]);
|
||||
rtx temp = gen_reg_rtx (TImode);
|
||||
rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
|
||||
rtx temp2 = gen_reg_rtx (V4SImode);
|
||||
rtx biteq = gen_reg_rtx (V4SImode);
|
||||
rtx ahi_inf = gen_reg_rtx (V4SImode);
|
||||
rtx a_nan = gen_reg_rtx (V4SImode);
|
||||
rtx a_abs = gen_reg_rtx (V4SImode);
|
||||
rtx b_abs = gen_reg_rtx (V4SImode);
|
||||
rtx iszero = gen_reg_rtx (V4SImode);
|
||||
rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
|
||||
0x7FFFFFFF, 0xFFFFFFFF);
|
||||
rtx sign_mask = gen_reg_rtx (V4SImode);
|
||||
rtx nan_mask = gen_reg_rtx (V4SImode);
|
||||
rtx hihi_promote = gen_reg_rtx (TImode);
|
||||
|
||||
emit_move_insn (sign_mask, pat);
|
||||
pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
|
||||
0x7FF00000, 0x0);
|
||||
emit_move_insn (nan_mask, pat);
|
||||
pat = spu_const_from_ints (TImode, 0x00010203, 0x10111213,
|
||||
0x08090A0B, 0x18191A1B);
|
||||
emit_move_insn (hihi_promote, pat);
|
||||
|
||||
emit_insn (gen_ceq_v4si (biteq, ra, rb));
|
||||
emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, biteq),
|
||||
GEN_INT (4 * 8)));
|
||||
emit_insn (gen_andv4si3 (biteq, biteq, temp_v4si));
|
||||
emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
|
||||
emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
|
||||
emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
|
||||
emit_insn (gen_ceq_v4si (ahi_inf, a_abs, nan_mask));
|
||||
emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
|
||||
GEN_INT (4 * 8)));
|
||||
emit_insn (gen_andv4si3 (temp2, temp_v4si, ahi_inf));
|
||||
emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
|
||||
emit_insn (gen_iorv4si3 (temp2, a_abs, b_abs));
|
||||
emit_insn (gen_ceq_v4si (iszero, temp2, CONST0_RTX (V4SImode)));
|
||||
emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, iszero),
|
||||
GEN_INT (4 * 8)));
|
||||
emit_insn (gen_andv4si3 (iszero, iszero, temp_v4si));
|
||||
emit_insn (gen_iorv4si3 (temp2, biteq, iszero));
|
||||
emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
|
||||
emit_insn (gen_shufb (operands[0], temp2, temp2, hihi_promote));
|
||||
DONE;
|
||||
}
|
||||
})
|
||||
|
||||
(define_expand "cmeq_v2df"
|
||||
[(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
|
||||
(eq:V2DI (abs:V2DF (match_operand:V2DF 1 "spu_reg_operand" "r"))
|
||||
(abs:V2DF (match_operand:V2DF 2 "spu_reg_operand" "r"))))]
|
||||
""
|
||||
{
|
||||
if(spu_arch == PROCESSOR_CELL)
|
||||
{
|
||||
rtx ra = spu_gen_subreg (V4SImode, operands[1]);
|
||||
rtx rb = spu_gen_subreg (V4SImode, operands[2]);
|
||||
rtx temp = gen_reg_rtx (TImode);
|
||||
rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
|
||||
rtx temp2 = gen_reg_rtx (V4SImode);
|
||||
rtx biteq = gen_reg_rtx (V4SImode);
|
||||
rtx ahi_inf = gen_reg_rtx (V4SImode);
|
||||
rtx a_nan = gen_reg_rtx (V4SImode);
|
||||
rtx a_abs = gen_reg_rtx (V4SImode);
|
||||
rtx b_abs = gen_reg_rtx (V4SImode);
|
||||
|
||||
rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
|
||||
0x7FFFFFFF, 0xFFFFFFFF);
|
||||
rtx sign_mask = gen_reg_rtx (V4SImode);
|
||||
rtx nan_mask = gen_reg_rtx (V4SImode);
|
||||
rtx hihi_promote = gen_reg_rtx (TImode);
|
||||
|
||||
emit_move_insn (sign_mask, pat);
|
||||
|
||||
pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
|
||||
0x7FF00000, 0x0);
|
||||
emit_move_insn (nan_mask, pat);
|
||||
pat = spu_const_from_ints (TImode, 0x00010203, 0x10111213,
|
||||
0x08090A0B, 0x18191A1B);
|
||||
emit_move_insn (hihi_promote, pat);
|
||||
|
||||
emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
|
||||
emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
|
||||
emit_insn (gen_ceq_v4si (biteq, a_abs, b_abs));
|
||||
emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, biteq),
|
||||
GEN_INT (4 * 8)));
|
||||
emit_insn (gen_andv4si3 (biteq, biteq, temp_v4si));
|
||||
emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
|
||||
emit_insn (gen_ceq_v4si (ahi_inf, a_abs, nan_mask));
|
||||
emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
|
||||
GEN_INT (4 * 8)));
|
||||
emit_insn (gen_andv4si3 (temp2, temp_v4si, ahi_inf));
|
||||
emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
|
||||
emit_insn (gen_andc_v4si (temp2, biteq, a_nan));
|
||||
emit_insn (gen_shufb (operands[0], temp2, temp2, hihi_promote));
|
||||
DONE;
|
||||
}
|
||||
})
|
||||
|
||||
|
||||
;; cgt
|
||||
|
@ -2584,19 +2755,215 @@ selb\t%0,%5,%0,%3"
|
|||
(set_attr "length" "36")])
|
||||
|
||||
(define_insn "cgt_<mode>"
|
||||
[(set (match_operand:<f2i> 0 "spu_reg_operand" "=r")
|
||||
(gt:<f2i> (match_operand:VSF 1 "spu_reg_operand" "r")
|
||||
[(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
|
||||
(gt:<F2I> (match_operand:VSF 1 "spu_reg_operand" "r")
|
||||
(match_operand:VSF 2 "spu_reg_operand" "r")))]
|
||||
""
|
||||
"fcgt\t%0,%1,%2")
|
||||
|
||||
(define_insn "cmgt_<mode>"
|
||||
[(set (match_operand:<f2i> 0 "spu_reg_operand" "=r")
|
||||
(gt:<f2i> (abs:VSF (match_operand:VSF 1 "spu_reg_operand" "r"))
|
||||
[(set (match_operand:<F2I> 0 "spu_reg_operand" "=r")
|
||||
(gt:<F2I> (abs:VSF (match_operand:VSF 1 "spu_reg_operand" "r"))
|
||||
(abs:VSF (match_operand:VSF 2 "spu_reg_operand" "r"))))]
|
||||
""
|
||||
"fcmgt\t%0,%1,%2")
|
||||
|
||||
(define_expand "cgt_df"
|
||||
[(set (match_operand:SI 0 "spu_reg_operand" "=r")
|
||||
(gt:SI (match_operand:DF 1 "spu_reg_operand" "r")
|
||||
(match_operand:DF 2 "const_zero_operand" "i")))]
|
||||
""
|
||||
{
|
||||
if (flag_unsafe_math_optimizations && spu_arch == PROCESSOR_CELL)
|
||||
{
|
||||
rtx s0_ti = gen_reg_rtx(TImode);
|
||||
rtx s1_v4 = gen_reg_rtx(V4SImode);
|
||||
rtx s0_v4 = spu_gen_subreg(V4SImode, s0_ti);
|
||||
rtx to_ti = gen_reg_rtx(TImode);
|
||||
rtx to_v4 = gen_reg_rtx(V4SImode);
|
||||
rtx l_v4 = gen_reg_rtx(V4SImode);
|
||||
emit_insn (gen_spu_convert(l_v4, operands[1]));
|
||||
emit_insn (gen_ceq_v4si(s0_v4, l_v4, const0_rtx));
|
||||
emit_insn (gen_cgt_v4si(s1_v4, l_v4, const0_rtx));
|
||||
emit_insn (gen_rotqby_ti(to_ti, s0_ti, GEN_INT(4)));
|
||||
emit_insn (gen_spu_convert(to_v4, to_ti));
|
||||
emit_insn (gen_andc_v4si(to_v4, s0_v4, to_v4));
|
||||
emit_insn (gen_iorv4si3(to_v4, to_v4, s1_v4));
|
||||
emit_insn (gen_spu_convert(operands[0], to_v4));
|
||||
DONE;
|
||||
}
|
||||
})
|
||||
|
||||
(define_insn "cgt_<mode>_celledp"
|
||||
[(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r")
|
||||
(gt:<DF2I> (match_operand:VDF 1 "spu_reg_operand" "r")
|
||||
(match_operand:VDF 2 "spu_reg_operand" "r")))]
|
||||
"spu_arch == PROCESSOR_CELLEDP"
|
||||
"dfcgt\t%0,%1,%2"
|
||||
[(set_attr "type" "fpd")])
|
||||
|
||||
(define_insn "cmgt_<mode>_celledp"
|
||||
[(set (match_operand:<DF2I> 0 "spu_reg_operand" "=r")
|
||||
(gt:<DF2I> (abs:VDF (match_operand:VDF 1 "spu_reg_operand" "r"))
|
||||
(abs:VDF (match_operand:VDF 2 "spu_reg_operand" "r"))))]
|
||||
"spu_arch == PROCESSOR_CELLEDP"
|
||||
"dfcmgt\t%0,%1,%2"
|
||||
[(set_attr "type" "fpd")])
|
||||
|
||||
(define_expand "cgt_v2df"
|
||||
[(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
|
||||
(gt:V2DI (match_operand:V2DF 1 "spu_reg_operand" "r")
|
||||
(match_operand:V2DF 2 "spu_reg_operand" "r")))]
|
||||
""
|
||||
{
|
||||
if(spu_arch == PROCESSOR_CELL)
|
||||
{
|
||||
rtx ra = spu_gen_subreg (V4SImode, operands[1]);
|
||||
rtx rb = spu_gen_subreg (V4SImode, operands[2]);
|
||||
rtx zero = gen_reg_rtx (V4SImode);
|
||||
rtx temp = gen_reg_rtx (TImode);
|
||||
rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
|
||||
rtx temp2 = gen_reg_rtx (V4SImode);
|
||||
rtx hi_inf = gen_reg_rtx (V4SImode);
|
||||
rtx a_nan = gen_reg_rtx (V4SImode);
|
||||
rtx b_nan = gen_reg_rtx (V4SImode);
|
||||
rtx a_abs = gen_reg_rtx (V4SImode);
|
||||
rtx b_abs = gen_reg_rtx (V4SImode);
|
||||
rtx asel = gen_reg_rtx (V4SImode);
|
||||
rtx bsel = gen_reg_rtx (V4SImode);
|
||||
rtx abor = gen_reg_rtx (V4SImode);
|
||||
rtx bbor = gen_reg_rtx (V4SImode);
|
||||
rtx gt_hi = gen_reg_rtx (V4SImode);
|
||||
rtx gt_lo = gen_reg_rtx (V4SImode);
|
||||
rtx sign_mask = gen_reg_rtx (V4SImode);
|
||||
rtx nan_mask = gen_reg_rtx (V4SImode);
|
||||
rtx hi_promote = gen_reg_rtx (TImode);
|
||||
rtx borrow_shuffle = gen_reg_rtx (TImode);
|
||||
rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
|
||||
0x7FFFFFFF, 0xFFFFFFFF);
|
||||
emit_move_insn (sign_mask, pat);
|
||||
pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
|
||||
0x7FF00000, 0x0);
|
||||
emit_move_insn (nan_mask, pat);
|
||||
pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203,
|
||||
0x08090A0B, 0x08090A0B);
|
||||
emit_move_insn (hi_promote, pat);
|
||||
pat = spu_const_from_ints (TImode, 0x04050607, 0xC0C0C0C0,
|
||||
0x0C0D0E0F, 0xC0C0C0C0);
|
||||
emit_move_insn (borrow_shuffle, pat);
|
||||
|
||||
emit_insn (gen_andv4si3 (a_nan, ra, sign_mask));
|
||||
emit_insn (gen_ceq_v4si (hi_inf, a_nan, nan_mask));
|
||||
emit_insn (gen_clgt_v4si (a_nan, a_nan, nan_mask));
|
||||
emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
|
||||
GEN_INT (4 * 8)));
|
||||
emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
|
||||
emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
|
||||
emit_insn (gen_shufb (a_nan, a_nan, a_nan, hi_promote));
|
||||
emit_insn (gen_andv4si3 (b_nan, rb, sign_mask));
|
||||
emit_insn (gen_ceq_v4si (hi_inf, b_nan, nan_mask));
|
||||
emit_insn (gen_clgt_v4si (b_nan, b_nan, nan_mask));
|
||||
emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, b_nan),
|
||||
GEN_INT (4 * 8)));
|
||||
emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
|
||||
emit_insn (gen_iorv4si3 (b_nan, b_nan, temp2));
|
||||
emit_insn (gen_shufb (b_nan, b_nan, b_nan, hi_promote));
|
||||
emit_insn (gen_iorv4si3 (a_nan, a_nan, b_nan));
|
||||
emit_move_insn (zero, CONST0_RTX (V4SImode));
|
||||
emit_insn (gen_ashrv4si3 (asel, ra, spu_const (V4SImode, 31)));
|
||||
emit_insn (gen_shufb (asel, asel, asel, hi_promote));
|
||||
emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
|
||||
emit_insn (gen_bg_v4si (abor, zero, a_abs));
|
||||
emit_insn (gen_shufb (abor, abor, abor, borrow_shuffle));
|
||||
emit_insn (gen_sfx_v4si (abor, zero, a_abs, abor));
|
||||
emit_insn (gen_selb (abor, a_abs, abor, asel));
|
||||
emit_insn (gen_ashrv4si3 (bsel, rb, spu_const (V4SImode, 31)));
|
||||
emit_insn (gen_shufb (bsel, bsel, bsel, hi_promote));
|
||||
emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
|
||||
emit_insn (gen_bg_v4si (bbor, zero, b_abs));
|
||||
emit_insn (gen_shufb (bbor, bbor, bbor, borrow_shuffle));
|
||||
emit_insn (gen_sfx_v4si (bbor, zero, b_abs, bbor));
|
||||
emit_insn (gen_selb (bbor, b_abs, bbor, bsel));
|
||||
emit_insn (gen_cgt_v4si (gt_hi, abor, bbor));
|
||||
emit_insn (gen_clgt_v4si (gt_lo, abor, bbor));
|
||||
emit_insn (gen_ceq_v4si (temp2, abor, bbor));
|
||||
emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, gt_lo),
|
||||
GEN_INT (4 * 8)));
|
||||
emit_insn (gen_andv4si3 (temp2, temp2, temp_v4si));
|
||||
emit_insn (gen_iorv4si3 (temp2, gt_hi, temp2));
|
||||
|
||||
emit_insn (gen_shufb (temp2, temp2, temp2, hi_promote));
|
||||
emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
|
||||
emit_move_insn (operands[0], spu_gen_subreg (V2DImode, temp2));
|
||||
DONE;
|
||||
}
|
||||
})
|
||||
|
||||
(define_expand "cmgt_v2df"
|
||||
[(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
|
||||
(gt:V2DI (abs:V2DF (match_operand:V2DF 1 "spu_reg_operand" "r"))
|
||||
(abs:V2DF (match_operand:V2DF 2 "spu_reg_operand" "r"))))]
|
||||
""
|
||||
{
|
||||
if(spu_arch == PROCESSOR_CELL)
|
||||
{
|
||||
rtx ra = spu_gen_subreg (V4SImode, operands[1]);
|
||||
rtx rb = spu_gen_subreg (V4SImode, operands[2]);
|
||||
rtx temp = gen_reg_rtx (TImode);
|
||||
rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
|
||||
rtx temp2 = gen_reg_rtx (V4SImode);
|
||||
rtx hi_inf = gen_reg_rtx (V4SImode);
|
||||
rtx a_nan = gen_reg_rtx (V4SImode);
|
||||
rtx b_nan = gen_reg_rtx (V4SImode);
|
||||
rtx a_abs = gen_reg_rtx (V4SImode);
|
||||
rtx b_abs = gen_reg_rtx (V4SImode);
|
||||
rtx gt_hi = gen_reg_rtx (V4SImode);
|
||||
rtx gt_lo = gen_reg_rtx (V4SImode);
|
||||
rtx sign_mask = gen_reg_rtx (V4SImode);
|
||||
rtx nan_mask = gen_reg_rtx (V4SImode);
|
||||
rtx hi_promote = gen_reg_rtx (TImode);
|
||||
rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
|
||||
0x7FFFFFFF, 0xFFFFFFFF);
|
||||
emit_move_insn (sign_mask, pat);
|
||||
pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
|
||||
0x7FF00000, 0x0);
|
||||
emit_move_insn (nan_mask, pat);
|
||||
pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203,
|
||||
0x08090A0B, 0x08090A0B);
|
||||
emit_move_insn (hi_promote, pat);
|
||||
|
||||
emit_insn (gen_andv4si3 (a_abs, ra, sign_mask));
|
||||
emit_insn (gen_ceq_v4si (hi_inf, a_abs, nan_mask));
|
||||
emit_insn (gen_clgt_v4si (a_nan, a_abs, nan_mask));
|
||||
emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, a_nan),
|
||||
GEN_INT (4 * 8)));
|
||||
emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
|
||||
emit_insn (gen_iorv4si3 (a_nan, a_nan, temp2));
|
||||
emit_insn (gen_shufb (a_nan, a_nan, a_nan, hi_promote));
|
||||
emit_insn (gen_andv4si3 (b_abs, rb, sign_mask));
|
||||
emit_insn (gen_ceq_v4si (hi_inf, b_abs, nan_mask));
|
||||
emit_insn (gen_clgt_v4si (b_nan, b_abs, nan_mask));
|
||||
emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, b_nan),
|
||||
GEN_INT (4 * 8)));
|
||||
emit_insn (gen_andv4si3 (temp2, temp_v4si, hi_inf));
|
||||
emit_insn (gen_iorv4si3 (b_nan, b_nan, temp2));
|
||||
emit_insn (gen_shufb (b_nan, b_nan, b_nan, hi_promote));
|
||||
emit_insn (gen_iorv4si3 (a_nan, a_nan, b_nan));
|
||||
|
||||
emit_insn (gen_clgt_v4si (gt_hi, a_abs, b_abs));
|
||||
emit_insn (gen_clgt_v4si (gt_lo, a_abs, b_abs));
|
||||
emit_insn (gen_ceq_v4si (temp2, a_abs, b_abs));
|
||||
emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, gt_lo),
|
||||
GEN_INT (4 * 8)));
|
||||
emit_insn (gen_andv4si3 (temp2, temp2, temp_v4si));
|
||||
emit_insn (gen_iorv4si3 (temp2, gt_hi, temp2));
|
||||
emit_insn (gen_shufb (temp2, temp2, temp2, hi_promote));
|
||||
emit_insn (gen_andc_v4si (temp2, temp2, a_nan));
|
||||
emit_move_insn (operands[0], spu_gen_subreg (V2DImode, temp2));
|
||||
DONE;
|
||||
}
|
||||
})
|
||||
|
||||
|
||||
;; clgt
|
||||
|
||||
|
@ -2656,6 +3023,150 @@ selb\t%0,%4,%0,%3"
|
|||
(set_attr "length" "32")])
|
||||
|
||||
|
||||
;; dftsv
|
||||
(define_insn "dftsv_celledp"
|
||||
[(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
|
||||
(unspec [(match_operand:V2DF 1 "spu_reg_operand" "r")
|
||||
(match_operand:SI 2 "const_int_operand" "i")] UNSPEC_DFTSV))]
|
||||
"spu_arch == PROCESSOR_CELLEDP"
|
||||
"dftsv\t%0,%1,%2"
|
||||
[(set_attr "type" "fpd")])
|
||||
|
||||
(define_expand "dftsv"
|
||||
[(set (match_operand:V2DI 0 "spu_reg_operand" "=r")
|
||||
(unspec [(match_operand:V2DF 1 "spu_reg_operand" "r")
|
||||
(match_operand:SI 2 "const_int_operand" "i")] UNSPEC_DFTSV))]
|
||||
""
|
||||
{
|
||||
if(spu_arch == PROCESSOR_CELL)
|
||||
{
|
||||
rtx result = gen_reg_rtx (V4SImode);
|
||||
emit_move_insn (result, CONST0_RTX (V4SImode));
|
||||
|
||||
if (INTVAL (operands[2]))
|
||||
{
|
||||
rtx ra = spu_gen_subreg (V4SImode, operands[1]);
|
||||
rtx abs = gen_reg_rtx (V4SImode);
|
||||
rtx sign = gen_reg_rtx (V4SImode);
|
||||
rtx temp = gen_reg_rtx (TImode);
|
||||
rtx temp_v4si = spu_gen_subreg (V4SImode, temp);
|
||||
rtx temp2 = gen_reg_rtx (V4SImode);
|
||||
rtx pat = spu_const_from_ints (V4SImode, 0x7FFFFFFF, 0xFFFFFFFF,
|
||||
0x7FFFFFFF, 0xFFFFFFFF);
|
||||
rtx sign_mask = gen_reg_rtx (V4SImode);
|
||||
rtx hi_promote = gen_reg_rtx (TImode);
|
||||
emit_move_insn (sign_mask, pat);
|
||||
pat = spu_const_from_ints (TImode, 0x00010203, 0x00010203,
|
||||
0x08090A0B, 0x08090A0B);
|
||||
emit_move_insn (hi_promote, pat);
|
||||
|
||||
emit_insn (gen_ashrv4si3 (sign, ra, spu_const (V4SImode, 31)));
|
||||
emit_insn (gen_shufb (sign, sign, sign, hi_promote));
|
||||
emit_insn (gen_andv4si3 (abs, ra, sign_mask));
|
||||
|
||||
/* NaN or +inf or -inf */
|
||||
if (INTVAL (operands[2]) & 0x70)
|
||||
{
|
||||
rtx nan_mask = gen_reg_rtx (V4SImode);
|
||||
rtx isinf = gen_reg_rtx (V4SImode);
|
||||
pat = spu_const_from_ints (V4SImode, 0x7FF00000, 0x0,
|
||||
0x7FF00000, 0x0);
|
||||
emit_move_insn (nan_mask, pat);
|
||||
emit_insn (gen_ceq_v4si (isinf, abs, nan_mask));
|
||||
|
||||
/* NaN */
|
||||
if (INTVAL (operands[2]) & 0x40)
|
||||
{
|
||||
rtx isnan = gen_reg_rtx (V4SImode);
|
||||
emit_insn (gen_clgt_v4si (isnan, abs, nan_mask));
|
||||
emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, isnan),
|
||||
GEN_INT (4 * 8)));
|
||||
emit_insn (gen_andv4si3 (temp2, temp_v4si, isinf));
|
||||
emit_insn (gen_iorv4si3 (isnan, isnan, temp2));
|
||||
emit_insn (gen_shufb (isnan, isnan, isnan, hi_promote));
|
||||
emit_insn (gen_iorv4si3 (result, result, isnan));
|
||||
}
|
||||
/* +inf or -inf */
|
||||
if (INTVAL (operands[2]) & 0x30)
|
||||
{
|
||||
emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, isinf),
|
||||
GEN_INT (4 * 8)));
|
||||
emit_insn (gen_andv4si3 (isinf, isinf, temp_v4si));
|
||||
emit_insn (gen_shufb (isinf, isinf, isinf, hi_promote));
|
||||
|
||||
/* +inf */
|
||||
if (INTVAL (operands[2]) & 0x20)
|
||||
{
|
||||
emit_insn (gen_andc_v4si (temp2, isinf, sign));
|
||||
emit_insn (gen_iorv4si3 (result, result, temp2));
|
||||
}
|
||||
/* -inf */
|
||||
if (INTVAL (operands[2]) & 0x10)
|
||||
{
|
||||
emit_insn (gen_andv4si3 (temp2, isinf, sign));
|
||||
emit_insn (gen_iorv4si3 (result, result, temp2));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/* 0 or denorm */
|
||||
if (INTVAL (operands[2]) & 0xF)
|
||||
{
|
||||
rtx iszero = gen_reg_rtx (V4SImode);
|
||||
emit_insn (gen_ceq_v4si (iszero, abs, CONST0_RTX (V4SImode)));
|
||||
emit_insn (gen_rotlti3 (temp, spu_gen_subreg (TImode, iszero),
|
||||
GEN_INT (4 * 8)));
|
||||
emit_insn (gen_andv4si3 (iszero, iszero, temp_v4si));
|
||||
|
||||
/* denorm */
|
||||
if (INTVAL (operands[2]) & 0x3)
|
||||
{
|
||||
rtx isdenorm = gen_reg_rtx (V4SImode);
|
||||
rtx denorm_mask = gen_reg_rtx (V4SImode);
|
||||
emit_move_insn (denorm_mask, spu_const (V4SImode, 0xFFFFF));
|
||||
emit_insn (gen_clgt_v4si (isdenorm, abs, denorm_mask));
|
||||
emit_insn (gen_nor_v4si (isdenorm, isdenorm, iszero));
|
||||
emit_insn (gen_shufb (isdenorm, isdenorm,
|
||||
isdenorm, hi_promote));
|
||||
/* +denorm */
|
||||
if (INTVAL (operands[2]) & 0x2)
|
||||
{
|
||||
emit_insn (gen_andc_v4si (temp2, isdenorm, sign));
|
||||
emit_insn (gen_iorv4si3 (result, result, temp2));
|
||||
}
|
||||
/* -denorm */
|
||||
if (INTVAL (operands[2]) & 0x1)
|
||||
{
|
||||
emit_insn (gen_andv4si3 (temp2, isdenorm, sign));
|
||||
emit_insn (gen_iorv4si3 (result, result, temp2));
|
||||
}
|
||||
}
|
||||
|
||||
/* 0 */
|
||||
if (INTVAL (operands[2]) & 0xC)
|
||||
{
|
||||
emit_insn (gen_shufb (iszero, iszero, iszero, hi_promote));
|
||||
/* +0 */
|
||||
if (INTVAL (operands[2]) & 0x8)
|
||||
{
|
||||
emit_insn (gen_andc_v4si (temp2, iszero, sign));
|
||||
emit_insn (gen_iorv4si3 (result, result, temp2));
|
||||
}
|
||||
/* -0 */
|
||||
if (INTVAL (operands[2]) & 0x4)
|
||||
{
|
||||
emit_insn (gen_andv4si3 (temp2, iszero, sign));
|
||||
emit_insn (gen_iorv4si3 (result, result, temp2));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
emit_move_insn (operands[0], spu_gen_subreg (V2DImode, result));
|
||||
DONE;
|
||||
}
|
||||
})
|
||||
|
||||
|
||||
;; branches
|
||||
|
||||
(define_insn ""
|
||||
|
@ -2747,6 +3258,53 @@ selb\t%0,%4,%0,%3"
|
|||
DONE;
|
||||
})
|
||||
|
||||
(define_expand "cmpdf"
|
||||
[(set (cc0)
|
||||
(compare (match_operand:DF 0 "register_operand" "")
|
||||
(match_operand:DF 1 "register_operand" "")))]
|
||||
"(flag_unsafe_math_optimizations && spu_arch == PROCESSOR_CELL)
|
||||
|| spu_arch == PROCESSOR_CELLEDP "
|
||||
"{
|
||||
spu_compare_op0 = operands[0];
|
||||
spu_compare_op1 = operands[1];
|
||||
DONE;
|
||||
}")
|
||||
|
||||
;; vector conditional compare patterns
|
||||
(define_expand "vcond<mode>"
|
||||
[(set (match_operand:VCMP 0 "spu_reg_operand" "=r")
|
||||
(if_then_else:VCMP
|
||||
(match_operator 3 "comparison_operator"
|
||||
[(match_operand:VCMP 4 "spu_reg_operand" "r")
|
||||
(match_operand:VCMP 5 "spu_reg_operand" "r")])
|
||||
(match_operand:VCMP 1 "spu_reg_operand" "r")
|
||||
(match_operand:VCMP 2 "spu_reg_operand" "r")))]
|
||||
""
|
||||
{
|
||||
if (spu_emit_vector_cond_expr (operands[0], operands[1], operands[2],
|
||||
operands[3], operands[4], operands[5]))
|
||||
DONE;
|
||||
else
|
||||
FAIL;
|
||||
})
|
||||
|
||||
(define_expand "vcondu<mode>"
|
||||
[(set (match_operand:VCMPU 0 "spu_reg_operand" "=r")
|
||||
(if_then_else:VCMPU
|
||||
(match_operator 3 "comparison_operator"
|
||||
[(match_operand:VCMPU 4 "spu_reg_operand" "r")
|
||||
(match_operand:VCMPU 5 "spu_reg_operand" "r")])
|
||||
(match_operand:VCMPU 1 "spu_reg_operand" "r")
|
||||
(match_operand:VCMPU 2 "spu_reg_operand" "r")))]
|
||||
""
|
||||
{
|
||||
if (spu_emit_vector_cond_expr (operands[0], operands[1], operands[2],
|
||||
operands[3], operands[4], operands[5]))
|
||||
DONE;
|
||||
else
|
||||
FAIL;
|
||||
})
|
||||
|
||||
|
||||
;; branch on condition
|
||||
|
||||
|
@ -3376,7 +3934,7 @@ selb\t%0,%4,%0,%3"
|
|||
|
||||
(define_expand "sminv4sf3"
|
||||
[(set (match_operand:V4SF 0 "register_operand" "=r")
|
||||
(smax:V4SF (match_operand:V4SF 1 "register_operand" "r")
|
||||
(smin:V4SF (match_operand:V4SF 1 "register_operand" "r")
|
||||
(match_operand:V4SF 2 "register_operand" "r")))]
|
||||
""
|
||||
"
|
||||
|
@ -3388,6 +3946,34 @@ selb\t%0,%4,%0,%3"
|
|||
DONE;
|
||||
}")
|
||||
|
||||
(define_expand "smaxv2df3"
|
||||
[(set (match_operand:V2DF 0 "register_operand" "=r")
|
||||
(smax:V2DF (match_operand:V2DF 1 "register_operand" "r")
|
||||
(match_operand:V2DF 2 "register_operand" "r")))]
|
||||
""
|
||||
"
|
||||
{
|
||||
rtx mask = gen_reg_rtx (V2DImode);
|
||||
emit_insn (gen_cgt_v2df (mask, operands[1], operands[2]));
|
||||
emit_insn (gen_selb (operands[0], operands[2], operands[1],
|
||||
spu_gen_subreg (V4SImode, mask)));
|
||||
DONE;
|
||||
}")
|
||||
|
||||
(define_expand "sminv2df3"
|
||||
[(set (match_operand:V2DF 0 "register_operand" "=r")
|
||||
(smin:V2DF (match_operand:V2DF 1 "register_operand" "r")
|
||||
(match_operand:V2DF 2 "register_operand" "r")))]
|
||||
""
|
||||
"
|
||||
{
|
||||
rtx mask = gen_reg_rtx (V2DImode);
|
||||
emit_insn (gen_cgt_v2df (mask, operands[1], operands[2]));
|
||||
emit_insn (gen_selb (operands[0], operands[1], operands[2],
|
||||
spu_gen_subreg (V4SImode, mask)));
|
||||
DONE;
|
||||
}")
|
||||
|
||||
(define_expand "vec_widen_umult_hi_v8hi"
|
||||
[(set (match_operand:V4SI 0 "register_operand" "=r")
|
||||
(mult:V4SI
|
||||
|
|
|
@ -55,3 +55,11 @@ Generate code for 32 bit addressing
|
|||
mfixed-range=
|
||||
Target RejectNegative Joined Var(spu_fixed_range_string)
|
||||
Specify range of registers to make fixed
|
||||
|
||||
march=
|
||||
Target RejectNegative Joined Var(spu_arch_string)
|
||||
Generate code for given CPU
|
||||
|
||||
mtune=
|
||||
Target RejectNegative Joined Var(spu_tune_string)
|
||||
Schedule code for given CPU
|
||||
|
|
|
@ -233,6 +233,15 @@
|
|||
#define si_rchcnt(imm) __builtin_si_rchcnt(imm)
|
||||
#define si_wrch(imm,ra) __builtin_si_wrch(imm,ra)
|
||||
|
||||
/* celledp only instructions */
|
||||
#ifdef __SPU_EDP__
|
||||
#define si_dfceq(ra,rb) __builtin_si_dfceq(ra,rb)
|
||||
#define si_dfcmeq(ra,rb) __builtin_si_dfcmeq(ra,rb)
|
||||
#define si_dfcgt(ra,rb) __builtin_si_dfcgt(ra,rb)
|
||||
#define si_dfcmgt(ra,rb) __builtin_si_dfcmgt(ra,rb)
|
||||
#define si_dftsv(ra,imm) __builtin_si_dftsv(ra,imm)
|
||||
#endif /* __SPU_EDP__ */
|
||||
|
||||
#define si_from_char(scalar) __builtin_si_from_char(scalar)
|
||||
#define si_from_uchar(scalar) __builtin_si_from_uchar(scalar)
|
||||
#define si_from_short(scalar) __builtin_si_from_short(scalar)
|
||||
|
@ -295,6 +304,7 @@
|
|||
#define spu_cmpabsgt(ra,rb) __builtin_spu_cmpabsgt(ra,rb)
|
||||
#define spu_cmpeq(ra,rb) __builtin_spu_cmpeq(ra,rb)
|
||||
#define spu_cmpgt(ra,rb) __builtin_spu_cmpgt(ra,rb)
|
||||
#define spu_testsv(ra,imm) __builtin_spu_testsv(ra,imm)
|
||||
#define spu_hcmpeq(ra,rb) __builtin_spu_hcmpeq(ra,rb)
|
||||
#define spu_hcmpgt(ra,rb) __builtin_spu_hcmpgt(ra,rb)
|
||||
#define spu_cntb(ra) __builtin_spu_cntb(ra)
|
||||
|
|
|
@ -70,6 +70,16 @@
|
|||
#define MFC_WrListStallAck 26
|
||||
#define MFC_RdAtomicStat 27
|
||||
|
||||
/* Bit flag mnemonics for test special value.
|
||||
*/
|
||||
#define SPU_SV_NEG_DENORM 0x01 /* negative denormalized number */
|
||||
#define SPU_SV_POS_DENORM 0x02 /* positive denormalized number */
|
||||
#define SPU_SV_NEG_ZERO 0x04 /* negative zero */
|
||||
#define SPU_SV_POS_ZERO 0x08 /* positive zero */
|
||||
#define SPU_SV_NEG_INFINITY 0x10 /* negative infinity */
|
||||
#define SPU_SV_POS_INFINITY 0x20 /* positive infinity */
|
||||
#define SPU_SV_NAN 0x40 /* not a number */
|
||||
|
||||
#include <spu_internals.h>
|
||||
|
||||
#endif /* _SPU_INTRINSICS_H */
|
||||
|
|
|
@ -1,3 +1,16 @@
|
|||
2007-07-13 Sa Liu <saliu@de.ibm.com>
|
||||
|
||||
* gcc.dg/vect/fast-math-vect-reduc-7.c: Switch on test
|
||||
for V2DFmode vector conditional expression.
|
||||
* gcc.target/spu/dfcmeq.c: New. Test combination of abs
|
||||
and dfceq patterns.
|
||||
* gcc.target/spu/dfcmgt.c: New. Test combination of abs
|
||||
and dfcgt patterns.
|
||||
* gcc.target/spu/intrinsics-2.c: New. Test intrinsics for
|
||||
V2DFmode comparison and test special values.
|
||||
* lib/target-supports.exp: Switch on test for V2DFmode
|
||||
vector conditional expression.
|
||||
|
||||
2007-07-13 Richard Guenther <rguenther@suse.de>
|
||||
|
||||
PR tree-optimization/32721
|
||||
|
|
|
@ -50,6 +50,5 @@ int main (void)
|
|||
return 0;
|
||||
}
|
||||
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" { xfail vect_no_compare_double } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" { target vect_no_compare_double } } } */
|
||||
/* { dg-final { scan-tree-dump-times "vectorized 3 loops" 1 "vect" } } */
|
||||
/* { dg-final { cleanup-tree-dump "vect" } } */
|
||||
|
|
|
@ -1659,7 +1659,7 @@ proc check_effective_target_vect_double { } {
|
|||
return $et_vect_double_saved
|
||||
}
|
||||
|
||||
# Return 0 if the target supports hardware comparison of vectors of double, 0 otherwise.
|
||||
# Return 1 if the target supports hardware comparison of vectors of double, 0 otherwise.
|
||||
#
|
||||
# This won't change for different subtargets so cache the result.
|
||||
|
||||
|
@ -1670,9 +1670,6 @@ proc check_effective_target_vect_no_compare_double { } {
|
|||
verbose "check_effective_target_vect_no_compare_double: using cached result" 2
|
||||
} else {
|
||||
set et_vect_no_compare_double_saved 0
|
||||
if { [istarget spu-*-*] } {
|
||||
set et_vect_no_compare_double_saved 1
|
||||
}
|
||||
}
|
||||
|
||||
verbose "check_effective_target_vect_no_compare_double: returning $et_vect_no_compare_double_saved" 2
|
||||
|
@ -2025,6 +2022,7 @@ proc check_effective_target_vect_condition { } {
|
|||
if { [istarget powerpc*-*-*]
|
||||
|| [istarget ia64-*-*]
|
||||
|| [istarget i?86-*-*]
|
||||
|| [istarget spu-*-*]
|
||||
|| [istarget x86_64-*-*] } {
|
||||
set et_vect_cond_saved 1
|
||||
}
|
||||
|
|
Loading…
Add table
Reference in a new issue