Enable prefetching at -O3 for AMD cpus.

2010-06-25  Changpeng Fang  <changpeng.fang@amd.com>

	* common.opt (fprefetch-loop-arrays): Re-define
	-fprefetch-loop-arrays as a tri-state option with the initial
	value of -1.
	* tree-ssa-loop.c (gate_tree_ssa_loop_prefetch): Invoke prefetch
	pass only when flag_prefetch_loop_arrays > 0.
	* toplev.c (process_options): Note that, with tri-states,
	flag_prefetch_loop_arrays>0 means prefetching is enabled.
	* config/i386/i386.c (override_options): Enable prefetching at -O3
	for a set of CPUs that sw prefetching is helpful.
	(software_prefetching_beneficial_p): New.  Return TRUE if software
	prefetching is beneficial for the given CPU.

From-SVN: r161391
This commit is contained in:
Changpeng Fang 2010-06-25 18:25:00 +00:00 committed by Sebastian Pop
parent c14420e173
commit 1fbb509aac
5 changed files with 46 additions and 5 deletions

View file

@ -1,3 +1,17 @@
2010-06-25 Changpeng Fang <changpeng.fang@amd.com>
* common.opt (fprefetch-loop-arrays): Re-define
-fprefetch-loop-arrays as a tri-state option with the initial
value of -1.
* tree-ssa-loop.c (gate_tree_ssa_loop_prefetch): Invoke prefetch
pass only when flag_prefetch_loop_arrays > 0.
* toplev.c (process_options): Note that, with tri-states,
flag_prefetch_loop_arrays>0 means prefetching is enabled.
* config/i386/i386.c (override_options): Enable prefetching at -O3
for a set of CPUs that sw prefetching is helpful.
(software_prefetching_beneficial_p): New. Return TRUE if software
prefetching is beneficial for the given CPU.
2010-06-25 H.J. Lu <hongjiu.lu@intel.com>
PR rtl-optimization/44326

View file

@ -949,7 +949,7 @@ Common Report Var(flag_predictive_commoning) Optimization
Run predictive commoning optimization.
fprefetch-loop-arrays
Common Report Var(flag_prefetch_loop_arrays) Optimization
Common Report Var(flag_prefetch_loop_arrays) Init(-1) Optimization
Generate prefetch instructions, if available, for arrays in loops
fprofile

View file

@ -2691,6 +2691,26 @@ ix86_target_string (int isa, int flags, const char *arch, const char *tune,
return ret;
}
/* Return TRUE if software prefetching is beneficial for the
given CPU. */
static bool
software_prefetching_beneficial_p (void)
{
switch (ix86_tune)
{
case PROCESSOR_GEODE:
case PROCESSOR_K6:
case PROCESSOR_ATHLON:
case PROCESSOR_K8:
case PROCESSOR_AMDFAM10:
return true;
default:
return false;
}
}
/* Function that is callable from the debugger to print the current
options. */
void
@ -3535,6 +3555,13 @@ override_options (bool main_args_p)
if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
/* Enable sw prefetching at -O3 for CPUS that prefetching is helpful. */
if (flag_prefetch_loop_arrays < 0
&& HAVE_prefetch
&& optimize >= 3
&& software_prefetching_beneficial_p ())
flag_prefetch_loop_arrays = 1;
/* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
can be optimized to ap = __builtin_next_arg (0). */
if (!TARGET_64BIT)

View file

@ -2013,13 +2013,13 @@ process_options (void)
}
#ifndef HAVE_prefetch
if (flag_prefetch_loop_arrays)
if (flag_prefetch_loop_arrays > 0)
{
warning (0, "-fprefetch-loop-arrays not supported for this target");
flag_prefetch_loop_arrays = 0;
}
#else
if (flag_prefetch_loop_arrays && !HAVE_prefetch)
if (flag_prefetch_loop_arrays > 0 && !HAVE_prefetch)
{
warning (0, "-fprefetch-loop-arrays not supported for this target (try -march switches)");
flag_prefetch_loop_arrays = 0;
@ -2028,7 +2028,7 @@ process_options (void)
/* This combination of options isn't handled for i386 targets and doesn't
make much sense anyway, so don't allow it. */
if (flag_prefetch_loop_arrays && optimize_size)
if (flag_prefetch_loop_arrays > 0 && optimize_size)
{
warning (0, "-fprefetch-loop-arrays is not supported with -Os");
flag_prefetch_loop_arrays = 0;

View file

@ -600,7 +600,7 @@ tree_ssa_loop_prefetch (void)
static bool
gate_tree_ssa_loop_prefetch (void)
{
return flag_prefetch_loop_arrays != 0;
return flag_prefetch_loop_arrays > 0;
}
struct gimple_opt_pass pass_loop_prefetch =