Enable prefetching at -O3 for AMD cpus.
2010-06-25 Changpeng Fang <changpeng.fang@amd.com> * common.opt (fprefetch-loop-arrays): Re-define -fprefetch-loop-arrays as a tri-state option with the initial value of -1. * tree-ssa-loop.c (gate_tree_ssa_loop_prefetch): Invoke prefetch pass only when flag_prefetch_loop_arrays > 0. * toplev.c (process_options): Note that, with tri-states, flag_prefetch_loop_arrays>0 means prefetching is enabled. * config/i386/i386.c (override_options): Enable prefetching at -O3 for a set of CPUs that sw prefetching is helpful. (software_prefetching_beneficial_p): New. Return TRUE if software prefetching is beneficial for the given CPU. From-SVN: r161391
This commit is contained in:
parent
c14420e173
commit
1fbb509aac
5 changed files with 46 additions and 5 deletions
|
@ -1,3 +1,17 @@
|
|||
2010-06-25 Changpeng Fang <changpeng.fang@amd.com>
|
||||
|
||||
* common.opt (fprefetch-loop-arrays): Re-define
|
||||
-fprefetch-loop-arrays as a tri-state option with the initial
|
||||
value of -1.
|
||||
* tree-ssa-loop.c (gate_tree_ssa_loop_prefetch): Invoke prefetch
|
||||
pass only when flag_prefetch_loop_arrays > 0.
|
||||
* toplev.c (process_options): Note that, with tri-states,
|
||||
flag_prefetch_loop_arrays>0 means prefetching is enabled.
|
||||
* config/i386/i386.c (override_options): Enable prefetching at -O3
|
||||
for a set of CPUs that sw prefetching is helpful.
|
||||
(software_prefetching_beneficial_p): New. Return TRUE if software
|
||||
prefetching is beneficial for the given CPU.
|
||||
|
||||
2010-06-25 H.J. Lu <hongjiu.lu@intel.com>
|
||||
|
||||
PR rtl-optimization/44326
|
||||
|
|
|
@ -949,7 +949,7 @@ Common Report Var(flag_predictive_commoning) Optimization
|
|||
Run predictive commoning optimization.
|
||||
|
||||
fprefetch-loop-arrays
|
||||
Common Report Var(flag_prefetch_loop_arrays) Optimization
|
||||
Common Report Var(flag_prefetch_loop_arrays) Init(-1) Optimization
|
||||
Generate prefetch instructions, if available, for arrays in loops
|
||||
|
||||
fprofile
|
||||
|
|
|
@ -2691,6 +2691,26 @@ ix86_target_string (int isa, int flags, const char *arch, const char *tune,
|
|||
return ret;
|
||||
}
|
||||
|
||||
/* Return TRUE if software prefetching is beneficial for the
|
||||
given CPU. */
|
||||
|
||||
static bool
|
||||
software_prefetching_beneficial_p (void)
|
||||
{
|
||||
switch (ix86_tune)
|
||||
{
|
||||
case PROCESSOR_GEODE:
|
||||
case PROCESSOR_K6:
|
||||
case PROCESSOR_ATHLON:
|
||||
case PROCESSOR_K8:
|
||||
case PROCESSOR_AMDFAM10:
|
||||
return true;
|
||||
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/* Function that is callable from the debugger to print the current
|
||||
options. */
|
||||
void
|
||||
|
@ -3535,6 +3555,13 @@ override_options (bool main_args_p)
|
|||
if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
|
||||
set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
|
||||
|
||||
/* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
|
||||
if (flag_prefetch_loop_arrays < 0
|
||||
&& HAVE_prefetch
|
||||
&& optimize >= 3
|
||||
&& software_prefetching_beneficial_p ())
|
||||
flag_prefetch_loop_arrays = 1;
|
||||
|
||||
/* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
|
||||
can be optimized to ap = __builtin_next_arg (0). */
|
||||
if (!TARGET_64BIT)
|
||||
|
|
|
@ -2013,13 +2013,13 @@ process_options (void)
|
|||
}
|
||||
|
||||
#ifndef HAVE_prefetch
|
||||
if (flag_prefetch_loop_arrays)
|
||||
if (flag_prefetch_loop_arrays > 0)
|
||||
{
|
||||
warning (0, "-fprefetch-loop-arrays not supported for this target");
|
||||
flag_prefetch_loop_arrays = 0;
|
||||
}
|
||||
#else
|
||||
if (flag_prefetch_loop_arrays && !HAVE_prefetch)
|
||||
if (flag_prefetch_loop_arrays > 0 && !HAVE_prefetch)
|
||||
{
|
||||
warning (0, "-fprefetch-loop-arrays not supported for this target (try -march switches)");
|
||||
flag_prefetch_loop_arrays = 0;
|
||||
|
@ -2028,7 +2028,7 @@ process_options (void)
|
|||
|
||||
/* This combination of options isn't handled for i386 targets and doesn't
|
||||
make much sense anyway, so don't allow it. */
|
||||
if (flag_prefetch_loop_arrays && optimize_size)
|
||||
if (flag_prefetch_loop_arrays > 0 && optimize_size)
|
||||
{
|
||||
warning (0, "-fprefetch-loop-arrays is not supported with -Os");
|
||||
flag_prefetch_loop_arrays = 0;
|
||||
|
|
|
@ -600,7 +600,7 @@ tree_ssa_loop_prefetch (void)
|
|||
static bool
|
||||
gate_tree_ssa_loop_prefetch (void)
|
||||
{
|
||||
return flag_prefetch_loop_arrays != 0;
|
||||
return flag_prefetch_loop_arrays > 0;
|
||||
}
|
||||
|
||||
struct gimple_opt_pass pass_loop_prefetch =
|
||||
|
|
Loading…
Add table
Reference in a new issue