From 6c72ea127ca314f378861522d5720d5655278ddc Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Wed, 19 Dec 2012 17:04:11 +0100 Subject: [PATCH] i386.h (enum ix86_tune_indices): Add X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE. * config/i386/i386.h (enum ix86_tune_indices): Add X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE. (TARGET_AVOID_MEM_OPND_FOR_CMOVE): New define. * config/i386/i386.c (initial_ix86_tune_features) : Initialize. * config/i386/i386.md (splitters to avoid cmove memory operands): New. (peephole2s to avoid cmove memory operands): New. From-SVN: r194614 --- gcc/ChangeLog | 43 ++++++++------- gcc/config/i386/i386.c | 6 ++- gcc/config/i386/i386.h | 3 ++ gcc/config/i386/i386.md | 114 +++++++++++++++++++++++++++++++++++++--- 4 files changed, 138 insertions(+), 28 deletions(-) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e60c4d902e5..be21ba93aaf 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,14 @@ +2012-12-19 Uros Bizjak + Yuri Rumyantsev + + * config/i386/i386.h (enum ix86_tune_indices): Add + X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE. + (TARGET_AVOID_MEM_OPND_FOR_CMOVE): New define. + * config/i386/i386.c (initial_ix86_tune_features) + : Initialize. + * config/i386/i386.md (splitters to avoid cmove memory operands): New. + (peephole2s to avoid cmove memory operands): New. + 2012-12-19 Kyrylo Tkachov * config/arm/arm.md (f_minmaxs, f_minmaxd): New types. @@ -34,34 +45,26 @@ * combine.c (try_combine): Adjust to use the target hook. * config/alpha/alpha.h (CANONICALIZE_COMPARISON): Remove macro definition. - * config/alpha/alpha.c (alpha_canonicalize_comparison): New - function. + * config/alpha/alpha.c (alpha_canonicalize_comparison): New function. (TARGET_CANONICALIZE_COMPARISON): New macro definition. * config/arm/arm-protos.h (arm_canonicalize_comparison): Remove prototype. - * config/arm/arm.c (arm_canonicalize_comparison): Add new - parameter. + * config/arm/arm.c (arm_canonicalize_comparison): Add new parameter. (TARGET_CANONICALIZE_COMPARISON): New macro definition. - * config/arm/arm.h (CANONICALIZE_COMPARISON): Remove macro - definition. + * config/arm/arm.h (CANONICALIZE_COMPARISON): Remove macro definition. * config/s390/s390-protos.h (s390_canonicalize_comparison): Remove prototype. - * config/s390/s390.c (s390_canonicalize_comparison): Add new - parameter. + * config/s390/s390.c (s390_canonicalize_comparison): Add new parameter. (TARGET_CANONICALIZE_COMPARISON): New macro definition. - * config/s390/s390.h (CANONICALIZE_COMPARISON): Remove macro - definition. - * config/sh/sh-protos.h (sh_canonicalize_comparison): Remove - prototype. + * config/s390/s390.h (CANONICALIZE_COMPARISON): Remove macro definition. + * config/sh/sh-protos.h (sh_canonicalize_comparison): Remove prototype. * config/sh/sh.c (sh_canonicalize_comparison): Add new prototype. New function overloading the old one. (TARGET_CANONICALIZE_COMPARISON): New macro definition. - * config/sh/sh.h (CANONICALIZE_COMPARISON): Remove macro - definition. + * config/sh/sh.h (CANONICALIZE_COMPARISON): Remove macro definition. * config/spu/spu.c (spu_canonicalize_comparison): New function. (TARGET_CANONICALIZE_COMPARISON): New macro definition. - * config/spu/spu.h (CANONICALIZE_COMPARISON): Remove macro - definition. + * config/spu/spu.h (CANONICALIZE_COMPARISON): Remove macro definition. 2012-12-19 Jakub Jelinek @@ -74,7 +77,8 @@ 2012-12-18 Jan Hubicka PR tree-optimization/55683 - * ipa-prop.c (try_make_edge_direct_virtual_call): Look into constants for binfo. + * ipa-prop.c (try_make_edge_direct_virtual_call): Look into constants + for binfo. 2012-12-19 Terry Guo @@ -100,8 +104,7 @@ 2012-12-18 Kyrylo Tkachov - * config/arm/driver-arm.c (arm_cpu_table): - Add Cortex-A7. + * config/arm/driver-arm.c (arm_cpu_table): Add Cortex-A7. 2012-12-18 Aldy Hernandez @@ -152,7 +155,7 @@ gen_lowpart_if_possible. (gen_lowpart_no_emit_general): Remove prototype. * rtlhooks.c (gen_lowpart_no_emit_general): Removed. - * simplify-rtx.c (simplify_unary_operation_1, + * simplify-rtx.c (simplify_unary_operation_1, simplify_binary_operation_1): Continue simplifying if rtl_hooks.gen_lowpart_no_emit returns NULL_RTX. * dwarf2out.c (mem_loc_descriptor) : Handle diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c index 69f44aa6086..b466a4fbbdf 100644 --- a/gcc/config/i386/i386.c +++ b/gcc/config/i386/i386.c @@ -2026,7 +2026,11 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { /* X86_TUNE_GENERAL_REGS_SSE_SPILL: Try to spill general regs to SSE regs instead of memory. */ - m_COREI7 | m_CORE2I7 + m_COREI7 | m_CORE2I7, + + /* X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE: Try to avoid memory operands for + a conditional move. */ + m_ATOM }; /* Feature tests against the various architecture variations. */ diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index 3ac345172c8..d2f535a7566 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -331,6 +331,7 @@ enum ix86_tune_indices { X86_TUNE_REASSOC_INT_TO_PARALLEL, X86_TUNE_REASSOC_FP_TO_PARALLEL, X86_TUNE_GENERAL_REGS_SSE_SPILL, + X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE, X86_TUNE_LAST }; @@ -436,6 +437,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST]; ix86_tune_features[X86_TUNE_REASSOC_FP_TO_PARALLEL] #define TARGET_GENERAL_REGS_SSE_SPILL \ ix86_tune_features[X86_TUNE_GENERAL_REGS_SSE_SPILL] +#define TARGET_AVOID_MEM_OPND_FOR_CMOVE \ + ix86_tune_features[X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE] /* Feature tests against the various architecture variations. */ enum ix86_arch_indices { diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 3846065066b..95a52cdd1a7 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -16093,6 +16093,28 @@ [(set_attr "type" "icmov") (set_attr "mode" "")]) +;; Don't do conditional moves with memory inputs. This splitter helps +;; register starved x86_32 by forcing inputs into registers before reload. +(define_split + [(set (match_operand:SWI248 0 "register_operand") + (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:SWI248 2 "nonimmediate_operand") + (match_operand:SWI248 3 "nonimmediate_operand")))] + "!TARGET_64BIT && TARGET_CMOVE + && TARGET_AVOID_MEM_OPND_FOR_CMOVE + && (MEM_P (operands[2]) || MEM_P (operands[3])) + && can_create_pseudo_p () + && optimize_insn_for_speed_p ()" + [(set (match_dup 0) + (if_then_else:SWI248 (match_dup 1) (match_dup 2) (match_dup 3)))] +{ + if (MEM_P (operands[2])) + operands[2] = force_reg (mode, operands[2]); + if (MEM_P (operands[3])) + operands[3] = force_reg (mode, operands[3]); +}) + (define_insn "*movqicc_noc" [(set (match_operand:QI 0 "register_operand" "=r,r") (if_then_else:QI (match_operator 1 "ix86_comparison_operator" @@ -16105,14 +16127,12 @@ (set_attr "mode" "QI")]) (define_split - [(set (match_operand 0 "register_operand") - (if_then_else (match_operator 1 "ix86_comparison_operator" - [(reg FLAGS_REG) (const_int 0)]) - (match_operand 2 "register_operand") - (match_operand 3 "register_operand")))] + [(set (match_operand:SWI12 0 "register_operand") + (if_then_else:SWI12 (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:SWI12 2 "register_operand") + (match_operand:SWI12 3 "register_operand")))] "TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL - && (GET_MODE (operands[0]) == QImode - || GET_MODE (operands[0]) == HImode) && reload_completed" [(set (match_dup 0) (if_then_else:SI (match_dup 1) (match_dup 2) (match_dup 3)))] @@ -16122,6 +16142,33 @@ operands[3] = gen_lowpart (SImode, operands[3]); }) +;; Don't do conditional moves with memory inputs +(define_peephole2 + [(match_scratch:SWI248 2 "r") + (set (match_operand:SWI248 0 "register_operand") + (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_dup 0) + (match_operand:SWI248 3 "memory_operand")))] + "TARGET_CMOVE && TARGET_AVOID_MEM_OPND_FOR_CMOVE + && optimize_insn_for_speed_p ()" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 0) + (if_then_else:SWI248 (match_dup 1) (match_dup 0) (match_dup 2)))]) + +(define_peephole2 + [(match_scratch:SWI248 2 "r") + (set (match_operand:SWI248 0 "register_operand") + (if_then_else:SWI248 (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:SWI248 3 "memory_operand") + (match_dup 0)))] + "TARGET_CMOVE && TARGET_AVOID_MEM_OPND_FOR_CMOVE + && optimize_insn_for_speed_p ()" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 0) + (if_then_else:SWI248 (match_dup 1) (match_dup 2) (match_dup 0)))]) + (define_expand "movcc" [(set (match_operand:X87MODEF 0 "register_operand") (if_then_else:X87MODEF @@ -16209,6 +16256,59 @@ [(set_attr "type" "fcmov,fcmov,icmov,icmov") (set_attr "mode" "SF,SF,SI,SI")]) +;; Don't do conditional moves with memory inputs. This splitter helps +;; register starved x86_32 by forcing inputs into registers before reload. +(define_split + [(set (match_operand:MODEF 0 "register_operand") + (if_then_else:MODEF (match_operator 1 "ix86_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:MODEF 2 "nonimmediate_operand") + (match_operand:MODEF 3 "nonimmediate_operand")))] + "!TARGET_64BIT && TARGET_80387 && TARGET_CMOVE + && TARGET_AVOID_MEM_OPND_FOR_CMOVE + && (MEM_P (operands[2]) || MEM_P (operands[3])) + && can_create_pseudo_p () + && optimize_insn_for_speed_p ()" + [(set (match_dup 0) + (if_then_else:MODEF (match_dup 1) (match_dup 2) (match_dup 3)))] +{ + if (MEM_P (operands[2])) + operands[2] = force_reg (mode, operands[2]); + if (MEM_P (operands[3])) + operands[3] = force_reg (mode, operands[3]); +}) + +;; Don't do conditional moves with memory inputs +(define_peephole2 + [(match_scratch:MODEF 2 "r") + (set (match_operand:MODEF 0 "register_and_not_any_fp_reg_operand") + (if_then_else:MODEF (match_operator 1 "fcmov_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_dup 0) + (match_operand:MODEF 3 "memory_operand")))] + "(mode != DFmode || TARGET_64BIT) + && TARGET_80387 && TARGET_CMOVE + && TARGET_AVOID_MEM_OPND_FOR_CMOVE + && optimize_insn_for_speed_p ()" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 0) + (if_then_else:MODEF (match_dup 1) (match_dup 0) (match_dup 2)))]) + +(define_peephole2 + [(match_scratch:MODEF 2 "r") + (set (match_operand:MODEF 0 "register_and_not_any_fp_reg_operand") + (if_then_else:MODEF (match_operator 1 "fcmov_comparison_operator" + [(reg FLAGS_REG) (const_int 0)]) + (match_operand:MODEF 3 "memory_operand") + (match_dup 0)))] + "(mode != DFmode || TARGET_64BIT) + && TARGET_80387 && TARGET_CMOVE + && TARGET_AVOID_MEM_OPND_FOR_CMOVE + && optimize_insn_for_speed_p ()" + [(set (match_dup 2) (match_dup 3)) + (set (match_dup 0) + (if_then_else:MODEF (match_dup 1) (match_dup 2) (match_dup 0)))]) + ;; All moves in XOP pcmov instructions are 128 bits and hence we restrict ;; the scalar versions to have only XMM registers as operands.