x86: Update branch hint for Redwood Cove.
According to Intel® 64 and IA-32 Architectures Optimization Reference Manual[1], Branch Hint is updated for Redwood Cove. --------cut from [1]------------------------- Starting with the Redwood Cove microarchitecture, if the predictor has no stored information about a branch, the branch has the Intel® SSE2 branch taken hint (i.e., instruction prefix 3EH), When the codec decodes the branch, it flips the branch’s prediction from not-taken to taken. It then flushes the pipeline in front of it and steers this pipeline to fetch the taken path of the branch. --------cut end ----------------------------- Split tune branch_prediction_hints into branch_prediction_hints_taken and branch_prediction_hints_not_taken, always generate branch hint for conditional branches, both tunes are disabled by default. [1] https://www.intel.com/content/www/us/en/content-details/821612/intel-64-and-ia-32-architectures-optimization-reference-manual-volume-1.html gcc/ * config/i386/i386.cc (ix86_print_operand): Always generate branch hint for conditional branches. * config/i386/i386.h (TARGET_BRANCH_PREDICTION_HINTS): Split into .. (TARGET_BRANCH_PREDICTION_HINTS_TAKEN): .. this, and .. (TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN): .. this. * config/i386/x86-tune.def (X86_TUNE_BRANCH_PREDICTION_HINTS): Split into .. (X86_TUNE_BRANCH_PREDICTION_HINTS_TAKEN): .. this, and .. (X86_TUNE_BRANCH_PREDICTION_HINTS_NOT_TAKEN): .. this. (cherry picked from commit a910c30c7c27cd0f6d2d2694544a09fb11d611b9)
This commit is contained in:
parent
0fcadb3d51
commit
1fff665a51
3 changed files with 24 additions and 24 deletions
|
@ -14203,7 +14203,8 @@ ix86_print_operand (FILE *file, rtx x, int code)
|
|||
|
||||
if (!optimize
|
||||
|| optimize_function_for_size_p (cfun)
|
||||
|| !TARGET_BRANCH_PREDICTION_HINTS)
|
||||
|| (!TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN
|
||||
&& !TARGET_BRANCH_PREDICTION_HINTS_TAKEN))
|
||||
return;
|
||||
|
||||
x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
|
||||
|
@ -14212,25 +14213,13 @@ ix86_print_operand (FILE *file, rtx x, int code)
|
|||
int pred_val = profile_probability::from_reg_br_prob_note
|
||||
(XINT (x, 0)).to_reg_br_prob_base ();
|
||||
|
||||
if (pred_val < REG_BR_PROB_BASE * 45 / 100
|
||||
|| pred_val > REG_BR_PROB_BASE * 55 / 100)
|
||||
{
|
||||
bool taken = pred_val > REG_BR_PROB_BASE / 2;
|
||||
bool cputaken
|
||||
= final_forward_branch_p (current_output_insn) == 0;
|
||||
|
||||
/* Emit hints only in the case default branch prediction
|
||||
heuristics would fail. */
|
||||
if (taken != cputaken)
|
||||
{
|
||||
/* We use 3e (DS) prefix for taken branches and
|
||||
2e (CS) prefix for not taken branches. */
|
||||
if (taken)
|
||||
fputs ("ds ; ", file);
|
||||
else
|
||||
fputs ("cs ; ", file);
|
||||
}
|
||||
}
|
||||
bool taken = pred_val > REG_BR_PROB_BASE / 2;
|
||||
/* We use 3e (DS) prefix for taken branches and
|
||||
2e (CS) prefix for not taken branches. */
|
||||
if (taken && TARGET_BRANCH_PREDICTION_HINTS_TAKEN)
|
||||
fputs ("ds ; ", file);
|
||||
else if (!taken && TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN)
|
||||
fputs ("cs ; ", file);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
|
|
@ -306,8 +306,10 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
|
|||
#define TARGET_ZERO_EXTEND_WITH_AND \
|
||||
ix86_tune_features[X86_TUNE_ZERO_EXTEND_WITH_AND]
|
||||
#define TARGET_UNROLL_STRLEN ix86_tune_features[X86_TUNE_UNROLL_STRLEN]
|
||||
#define TARGET_BRANCH_PREDICTION_HINTS \
|
||||
ix86_tune_features[X86_TUNE_BRANCH_PREDICTION_HINTS]
|
||||
#define TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN \
|
||||
ix86_tune_features[X86_TUNE_BRANCH_PREDICTION_HINTS_NOT_TAKEN]
|
||||
#define TARGET_BRANCH_PREDICTION_HINTS_TAKEN \
|
||||
ix86_tune_features[X86_TUNE_BRANCH_PREDICTION_HINTS_TAKEN]
|
||||
#define TARGET_DOUBLE_WITH_ADD ix86_tune_features[X86_TUNE_DOUBLE_WITH_ADD]
|
||||
#define TARGET_USE_SAHF ix86_tune_features[X86_TUNE_USE_SAHF]
|
||||
#define TARGET_MOVX ix86_tune_features[X86_TUNE_MOVX]
|
||||
|
|
|
@ -690,15 +690,24 @@ DEF_TUNE (X86_TUNE_NOT_VECTORMODE, "not_vectormode", m_K6)
|
|||
DEF_TUNE (X86_TUNE_AVOID_VECTOR_DECODE, "avoid_vector_decode",
|
||||
m_K8)
|
||||
|
||||
/* X86_TUNE_BRANCH_PREDICTION_HINTS_TAKEN, starting with the Redwood Cove
|
||||
microarchitecture, if the predictor has no stored information about a branch,
|
||||
the branch has the Intel® SSE2 branch taken hint
|
||||
(i.e., instruction prefix 3EH), When the codec decodes the branch, it flips
|
||||
the branch’s prediction from not-taken to taken. It then flushes the pipeline
|
||||
in front of it and steers this pipeline to fetch the taken path of the
|
||||
branch. */
|
||||
DEF_TUNE (X86_TUNE_BRANCH_PREDICTION_HINTS_TAKEN, "branch_prediction_hints_taken", m_NONE)
|
||||
|
||||
/*****************************************************************************/
|
||||
/* This never worked well before. */
|
||||
/*****************************************************************************/
|
||||
|
||||
/* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
|
||||
/* X86_TUNE_BRANCH_PREDICTION_HINTS_NOT_TAKEN: Branch hints were put in P4 based
|
||||
on simulation result. But after P4 was made, no performance benefit
|
||||
was observed with branch hints. It also increases the code size.
|
||||
As a result, icc never generates branch hints. */
|
||||
DEF_TUNE (X86_TUNE_BRANCH_PREDICTION_HINTS, "branch_prediction_hints", m_NONE)
|
||||
DEF_TUNE (X86_TUNE_BRANCH_PREDICTION_HINTS_NOT_TAKEN, "branch_prediction_hints_not_taken", m_NONE)
|
||||
|
||||
/* X86_TUNE_QIMODE_MATH: Enable use of 8bit arithmetic. */
|
||||
DEF_TUNE (X86_TUNE_QIMODE_MATH, "qimode_math", m_ALL)
|
||||
|
|
Loading…
Add table
Reference in a new issue