Enable prefetching at -O3 for AMD cpus.
2010-06-25 Changpeng Fang <changpeng.fang@amd.com> * common.opt (fprefetch-loop-arrays): Re-define -fprefetch-loop-arrays as a tri-state option with the initial value of -1. * tree-ssa-loop.c (gate_tree_ssa_loop_prefetch): Invoke prefetch pass only when flag_prefetch_loop_arrays > 0. * toplev.c (process_options): Note that, with tri-states, flag_prefetch_loop_arrays>0 means prefetching is enabled. * config/i386/i386.c (override_options): Enable prefetching at -O3 for a set of CPUs that sw prefetching is helpful. (software_prefetching_beneficial_p): New. Return TRUE if software prefetching is beneficial for the given CPU. From-SVN: r161391
This commit is contained in:
parent
c14420e173
commit
1fbb509aac
5 changed files with 46 additions and 5 deletions
|
@ -1,3 +1,17 @@
|
||||||
|
2010-06-25 Changpeng Fang <changpeng.fang@amd.com>
|
||||||
|
|
||||||
|
* common.opt (fprefetch-loop-arrays): Re-define
|
||||||
|
-fprefetch-loop-arrays as a tri-state option with the initial
|
||||||
|
value of -1.
|
||||||
|
* tree-ssa-loop.c (gate_tree_ssa_loop_prefetch): Invoke prefetch
|
||||||
|
pass only when flag_prefetch_loop_arrays > 0.
|
||||||
|
* toplev.c (process_options): Note that, with tri-states,
|
||||||
|
flag_prefetch_loop_arrays>0 means prefetching is enabled.
|
||||||
|
* config/i386/i386.c (override_options): Enable prefetching at -O3
|
||||||
|
for a set of CPUs that sw prefetching is helpful.
|
||||||
|
(software_prefetching_beneficial_p): New. Return TRUE if software
|
||||||
|
prefetching is beneficial for the given CPU.
|
||||||
|
|
||||||
2010-06-25 H.J. Lu <hongjiu.lu@intel.com>
|
2010-06-25 H.J. Lu <hongjiu.lu@intel.com>
|
||||||
|
|
||||||
PR rtl-optimization/44326
|
PR rtl-optimization/44326
|
||||||
|
|
|
@ -949,7 +949,7 @@ Common Report Var(flag_predictive_commoning) Optimization
|
||||||
Run predictive commoning optimization.
|
Run predictive commoning optimization.
|
||||||
|
|
||||||
fprefetch-loop-arrays
|
fprefetch-loop-arrays
|
||||||
Common Report Var(flag_prefetch_loop_arrays) Optimization
|
Common Report Var(flag_prefetch_loop_arrays) Init(-1) Optimization
|
||||||
Generate prefetch instructions, if available, for arrays in loops
|
Generate prefetch instructions, if available, for arrays in loops
|
||||||
|
|
||||||
fprofile
|
fprofile
|
||||||
|
|
|
@ -2691,6 +2691,26 @@ ix86_target_string (int isa, int flags, const char *arch, const char *tune,
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Return TRUE if software prefetching is beneficial for the
|
||||||
|
given CPU. */
|
||||||
|
|
||||||
|
static bool
|
||||||
|
software_prefetching_beneficial_p (void)
|
||||||
|
{
|
||||||
|
switch (ix86_tune)
|
||||||
|
{
|
||||||
|
case PROCESSOR_GEODE:
|
||||||
|
case PROCESSOR_K6:
|
||||||
|
case PROCESSOR_ATHLON:
|
||||||
|
case PROCESSOR_K8:
|
||||||
|
case PROCESSOR_AMDFAM10:
|
||||||
|
return true;
|
||||||
|
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Function that is callable from the debugger to print the current
|
/* Function that is callable from the debugger to print the current
|
||||||
options. */
|
options. */
|
||||||
void
|
void
|
||||||
|
@ -3535,6 +3555,13 @@ override_options (bool main_args_p)
|
||||||
if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
|
if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
|
||||||
set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
|
set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
|
||||||
|
|
||||||
|
/* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
|
||||||
|
if (flag_prefetch_loop_arrays < 0
|
||||||
|
&& HAVE_prefetch
|
||||||
|
&& optimize >= 3
|
||||||
|
&& software_prefetching_beneficial_p ())
|
||||||
|
flag_prefetch_loop_arrays = 1;
|
||||||
|
|
||||||
/* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
|
/* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
|
||||||
can be optimized to ap = __builtin_next_arg (0). */
|
can be optimized to ap = __builtin_next_arg (0). */
|
||||||
if (!TARGET_64BIT)
|
if (!TARGET_64BIT)
|
||||||
|
|
|
@ -2013,13 +2013,13 @@ process_options (void)
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef HAVE_prefetch
|
#ifndef HAVE_prefetch
|
||||||
if (flag_prefetch_loop_arrays)
|
if (flag_prefetch_loop_arrays > 0)
|
||||||
{
|
{
|
||||||
warning (0, "-fprefetch-loop-arrays not supported for this target");
|
warning (0, "-fprefetch-loop-arrays not supported for this target");
|
||||||
flag_prefetch_loop_arrays = 0;
|
flag_prefetch_loop_arrays = 0;
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
if (flag_prefetch_loop_arrays && !HAVE_prefetch)
|
if (flag_prefetch_loop_arrays > 0 && !HAVE_prefetch)
|
||||||
{
|
{
|
||||||
warning (0, "-fprefetch-loop-arrays not supported for this target (try -march switches)");
|
warning (0, "-fprefetch-loop-arrays not supported for this target (try -march switches)");
|
||||||
flag_prefetch_loop_arrays = 0;
|
flag_prefetch_loop_arrays = 0;
|
||||||
|
@ -2028,7 +2028,7 @@ process_options (void)
|
||||||
|
|
||||||
/* This combination of options isn't handled for i386 targets and doesn't
|
/* This combination of options isn't handled for i386 targets and doesn't
|
||||||
make much sense anyway, so don't allow it. */
|
make much sense anyway, so don't allow it. */
|
||||||
if (flag_prefetch_loop_arrays && optimize_size)
|
if (flag_prefetch_loop_arrays > 0 && optimize_size)
|
||||||
{
|
{
|
||||||
warning (0, "-fprefetch-loop-arrays is not supported with -Os");
|
warning (0, "-fprefetch-loop-arrays is not supported with -Os");
|
||||||
flag_prefetch_loop_arrays = 0;
|
flag_prefetch_loop_arrays = 0;
|
||||||
|
|
|
@ -600,7 +600,7 @@ tree_ssa_loop_prefetch (void)
|
||||||
static bool
|
static bool
|
||||||
gate_tree_ssa_loop_prefetch (void)
|
gate_tree_ssa_loop_prefetch (void)
|
||||||
{
|
{
|
||||||
return flag_prefetch_loop_arrays != 0;
|
return flag_prefetch_loop_arrays > 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct gimple_opt_pass pass_loop_prefetch =
|
struct gimple_opt_pass pass_loop_prefetch =
|
||||||
|
|
Loading…
Add table
Reference in a new issue