x86: Update branch hint for Redwood Cove.

According to Intel® 64 and IA-32 Architectures Optimization Reference
Manual[1], Branch Hint is updated for Redwood Cove.

--------cut from [1]-------------------------
Starting with the Redwood Cove microarchitecture, if the predictor has
no stored information about a branch, the branch has the Intel® SSE2
branch taken hint (i.e., instruction prefix 3EH), When the codec
decodes the branch, it flips the branch’s prediction from not-taken to
taken. It then flushes the pipeline in front of it and steers this
pipeline to fetch the taken path of the branch.
--------cut end -----------------------------

Split tune branch_prediction_hints into branch_prediction_hints_taken
and branch_prediction_hints_not_taken, always generate branch hint for
conditional branches, both tunes are disabled by default.

[1] https://www.intel.com/content/www/us/en/content-details/821612/intel-64-and-ia-32-architectures-optimization-reference-manual-volume-1.html

gcc/

	* config/i386/i386.cc (ix86_print_operand): Always generate
	branch hint for conditional branches.
	* config/i386/i386.h (TARGET_BRANCH_PREDICTION_HINTS): Split
	into ..
	(TARGET_BRANCH_PREDICTION_HINTS_TAKEN): .. this, and ..
	(TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN): .. this.
	* config/i386/x86-tune.def (X86_TUNE_BRANCH_PREDICTION_HINTS):
	Split into ..
	(X86_TUNE_BRANCH_PREDICTION_HINTS_TAKEN): .. this, and ..
	(X86_TUNE_BRANCH_PREDICTION_HINTS_NOT_TAKEN): .. this.

(cherry picked from commit a910c30c7c27cd0f6d2d2694544a09fb11d611b9)
This commit is contained in:
H.J. Lu 2022-04-26 11:08:55 -07:00 committed by liuhongt
parent 0fcadb3d51
commit 1fff665a51
3 changed files with 24 additions and 24 deletions

View file

@ -14203,7 +14203,8 @@ ix86_print_operand (FILE *file, rtx x, int code)
if (!optimize
|| optimize_function_for_size_p (cfun)
|| !TARGET_BRANCH_PREDICTION_HINTS)
|| (!TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN
&& !TARGET_BRANCH_PREDICTION_HINTS_TAKEN))
return;
x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
@ -14212,25 +14213,13 @@ ix86_print_operand (FILE *file, rtx x, int code)
int pred_val = profile_probability::from_reg_br_prob_note
(XINT (x, 0)).to_reg_br_prob_base ();
if (pred_val < REG_BR_PROB_BASE * 45 / 100
|| pred_val > REG_BR_PROB_BASE * 55 / 100)
{
bool taken = pred_val > REG_BR_PROB_BASE / 2;
bool cputaken
= final_forward_branch_p (current_output_insn) == 0;
/* Emit hints only in the case default branch prediction
heuristics would fail. */
if (taken != cputaken)
{
/* We use 3e (DS) prefix for taken branches and
2e (CS) prefix for not taken branches. */
if (taken)
fputs ("ds ; ", file);
else
fputs ("cs ; ", file);
}
}
bool taken = pred_val > REG_BR_PROB_BASE / 2;
/* We use 3e (DS) prefix for taken branches and
2e (CS) prefix for not taken branches. */
if (taken && TARGET_BRANCH_PREDICTION_HINTS_TAKEN)
fputs ("ds ; ", file);
else if (!taken && TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN)
fputs ("cs ; ", file);
}
return;
}

View file

@ -306,8 +306,10 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
#define TARGET_ZERO_EXTEND_WITH_AND \
ix86_tune_features[X86_TUNE_ZERO_EXTEND_WITH_AND]
#define TARGET_UNROLL_STRLEN ix86_tune_features[X86_TUNE_UNROLL_STRLEN]
#define TARGET_BRANCH_PREDICTION_HINTS \
ix86_tune_features[X86_TUNE_BRANCH_PREDICTION_HINTS]
#define TARGET_BRANCH_PREDICTION_HINTS_NOT_TAKEN \
ix86_tune_features[X86_TUNE_BRANCH_PREDICTION_HINTS_NOT_TAKEN]
#define TARGET_BRANCH_PREDICTION_HINTS_TAKEN \
ix86_tune_features[X86_TUNE_BRANCH_PREDICTION_HINTS_TAKEN]
#define TARGET_DOUBLE_WITH_ADD ix86_tune_features[X86_TUNE_DOUBLE_WITH_ADD]
#define TARGET_USE_SAHF ix86_tune_features[X86_TUNE_USE_SAHF]
#define TARGET_MOVX ix86_tune_features[X86_TUNE_MOVX]

View file

@ -690,15 +690,24 @@ DEF_TUNE (X86_TUNE_NOT_VECTORMODE, "not_vectormode", m_K6)
DEF_TUNE (X86_TUNE_AVOID_VECTOR_DECODE, "avoid_vector_decode",
m_K8)
/* X86_TUNE_BRANCH_PREDICTION_HINTS_TAKEN, starting with the Redwood Cove
microarchitecture, if the predictor has no stored information about a branch,
the branch has the Intel® SSE2 branch taken hint
(i.e., instruction prefix 3EH), When the codec decodes the branch, it flips
the branchs prediction from not-taken to taken. It then flushes the pipeline
in front of it and steers this pipeline to fetch the taken path of the
branch. */
DEF_TUNE (X86_TUNE_BRANCH_PREDICTION_HINTS_TAKEN, "branch_prediction_hints_taken", m_NONE)
/*****************************************************************************/
/* This never worked well before. */
/*****************************************************************************/
/* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
/* X86_TUNE_BRANCH_PREDICTION_HINTS_NOT_TAKEN: Branch hints were put in P4 based
on simulation result. But after P4 was made, no performance benefit
was observed with branch hints. It also increases the code size.
As a result, icc never generates branch hints. */
DEF_TUNE (X86_TUNE_BRANCH_PREDICTION_HINTS, "branch_prediction_hints", m_NONE)
DEF_TUNE (X86_TUNE_BRANCH_PREDICTION_HINTS_NOT_TAKEN, "branch_prediction_hints_not_taken", m_NONE)
/* X86_TUNE_QIMODE_MATH: Enable use of 8bit arithmetic. */
DEF_TUNE (X86_TUNE_QIMODE_MATH, "qimode_math", m_ALL)