re PR tree-optimization/46728 (GCC does not generate fmadd for pow (x, 0.75)+y on powerpc)

2011-05-24  Bill Schmidt  <wschmidt@linux.vnet.ibm.com>
	
	PR tree-optimization/46728
	* tree-ssa-math-opts.c (powi_table): New.
	(powi_lookup_cost): New.
	(powi_cost): New.
	(powi_as_mults_1): New.
	(powi_as_mults): New.
	(gimple_expand_builtin_powi): New.
	(execute_cse_sincos): Add switch case for BUILT_IN_POWI.
	(gate_cse_sincos): Remove sincos/cexp restriction.

From-SVN: r174129
This commit is contained in:
Bill Schmidt 2011-05-24 18:02:22 +00:00 committed by William Schmidt
parent caee412b75
commit 78be79d534
2 changed files with 269 additions and 6 deletions

View file

@ -1,3 +1,15 @@
2011-05-24 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
PR tree-optimization/46728
* tree-ssa-math-opts.c (powi_table): New.
(powi_lookup_cost): New.
(powi_cost): New.
(powi_as_mults_1): New.
(powi_as_mults): New.
(gimple_expand_builtin_powi): New.
(execute_cse_sincos): Add switch case for BUILT_IN_POWI.
(gate_cse_sincos): Remove sincos/cexp restriction.
2011-05-24 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
PR target/3746

View file

@ -795,8 +795,238 @@ execute_cse_sincos_1 (tree name)
return cfg_changed;
}
/* To evaluate powi(x,n), the floating point value x raised to the
constant integer exponent n, we use a hybrid algorithm that
combines the "window method" with look-up tables. For an
introduction to exponentiation algorithms and "addition chains",
see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
"Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */
/* Provide a default value for POWI_MAX_MULTS, the maximum number of
multiplications to inline before calling the system library's pow
function. powi(x,n) requires at worst 2*bits(n)-2 multiplications,
so this default never requires calling pow, powf or powl. */
#ifndef POWI_MAX_MULTS
#define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2)
#endif
/* The size of the "optimal power tree" lookup table. All
exponents less than this value are simply looked up in the
powi_table below. This threshold is also used to size the
cache of pseudo registers that hold intermediate results. */
#define POWI_TABLE_SIZE 256
/* The size, in bits of the window, used in the "window method"
exponentiation algorithm. This is equivalent to a radix of
(1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */
#define POWI_WINDOW_SIZE 3
/* The following table is an efficient representation of an
"optimal power tree". For each value, i, the corresponding
value, j, in the table states than an optimal evaluation
sequence for calculating pow(x,i) can be found by evaluating
pow(x,j)*pow(x,i-j). An optimal power tree for the first
100 integers is given in Knuth's "Seminumerical algorithms". */
static const unsigned char powi_table[POWI_TABLE_SIZE] =
{
0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */
4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */
8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */
12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */
16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */
20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */
24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */
28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */
32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */
36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */
40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */
44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */
48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */
52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */
56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */
60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */
64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */
68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */
72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */
76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */
80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */
84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */
88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */
92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */
96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */
100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */
104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */
108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */
112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */
116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */
120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */
124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */
};
/* Return the number of multiplications required to calculate
powi(x,n) where n is less than POWI_TABLE_SIZE. This is a
subroutine of powi_cost. CACHE is an array indicating
which exponents have already been calculated. */
static int
powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
{
/* If we've already calculated this exponent, then this evaluation
doesn't require any additional multiplications. */
if (cache[n])
return 0;
cache[n] = true;
return powi_lookup_cost (n - powi_table[n], cache)
+ powi_lookup_cost (powi_table[n], cache) + 1;
}
/* Return the number of multiplications required to calculate
powi(x,n) for an arbitrary x, given the exponent N. This
function needs to be kept in sync with powi_as_mults below. */
static int
powi_cost (HOST_WIDE_INT n)
{
bool cache[POWI_TABLE_SIZE];
unsigned HOST_WIDE_INT digit;
unsigned HOST_WIDE_INT val;
int result;
if (n == 0)
return 0;
/* Ignore the reciprocal when calculating the cost. */
val = (n < 0) ? -n : n;
/* Initialize the exponent cache. */
memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
cache[1] = true;
result = 0;
while (val >= POWI_TABLE_SIZE)
{
if (val & 1)
{
digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
result += powi_lookup_cost (digit, cache)
+ POWI_WINDOW_SIZE + 1;
val >>= POWI_WINDOW_SIZE;
}
else
{
val >>= 1;
result++;
}
}
return result + powi_lookup_cost (val, cache);
}
/* Recursive subroutine of powi_as_mults. This function takes the
array, CACHE, of already calculated exponents and an exponent N and
returns a tree that corresponds to CACHE[1]**N, with type TYPE. */
static tree
powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
HOST_WIDE_INT n, tree *cache, tree target)
{
tree op0, op1, ssa_target;
unsigned HOST_WIDE_INT digit;
gimple mult_stmt;
if (n < POWI_TABLE_SIZE && cache[n])
return cache[n];
ssa_target = make_ssa_name (target, NULL);
if (n < POWI_TABLE_SIZE)
{
cache[n] = ssa_target;
op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache, target);
op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache, target);
}
else if (n & 1)
{
digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache, target);
op1 = powi_as_mults_1 (gsi, loc, type, digit, cache, target);
}
else
{
op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache, target);
op1 = op0;
}
mult_stmt = gimple_build_assign_with_ops (MULT_EXPR, ssa_target, op0, op1);
gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
return ssa_target;
}
/* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
This function needs to be kept in sync with powi_cost above. */
static tree
powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
tree arg0, HOST_WIDE_INT n)
{
tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0), target;
gimple div_stmt;
if (n == 0)
return build_real (type, dconst1);
memset (cache, 0, sizeof (cache));
cache[1] = arg0;
target = create_tmp_var (type, "powmult");
add_referenced_var (target);
result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache, target);
if (n >= 0)
return result;
/* If the original exponent was negative, reciprocate the result. */
target = make_ssa_name (target, NULL);
div_stmt = gimple_build_assign_with_ops (RDIV_EXPR, target,
build_real (type, dconst1),
result);
gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
return target;
}
/* ARG0 and N are the two arguments to a powi builtin in GSI with
location info LOC. If the arguments are appropriate, create an
equivalent sequence of statements prior to GSI using an optimal
number of multiplications, and return an expession holding the
result. */
static tree
gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
tree arg0, HOST_WIDE_INT n)
{
/* Avoid largest negative number. */
if (n != -n
&& ((n >= -1 && n <= 2)
|| (optimize_function_for_speed_p (cfun)
&& powi_cost (n) <= POWI_MAX_MULTS)))
return powi_as_mults (gsi, loc, arg0, n);
return NULL_TREE;
}
/* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
on the SSA_NAME argument of each of them. */
on the SSA_NAME argument of each of them. Also expand powi(x,n) into
an optimal number of multiplies, when n is a constant. */
static unsigned int
execute_cse_sincos (void)
@ -821,7 +1051,9 @@ execute_cse_sincos (void)
&& (fndecl = gimple_call_fndecl (stmt))
&& DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
{
tree arg;
tree arg, arg0, arg1, result;
HOST_WIDE_INT n;
location_t loc;
switch (DECL_FUNCTION_CODE (fndecl))
{
@ -833,6 +1065,26 @@ execute_cse_sincos (void)
cfg_changed |= execute_cse_sincos_1 (arg);
break;
CASE_FLT_FN (BUILT_IN_POWI):
arg0 = gimple_call_arg (stmt, 0);
arg1 = gimple_call_arg (stmt, 1);
if (!host_integerp (arg1, 0))
break;
n = TREE_INT_CST_LOW (arg1);
loc = gimple_location (stmt);
result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
if (result)
{
tree lhs = gimple_get_lhs (stmt);
gimple new_stmt = gimple_build_assign (lhs, result);
gimple_set_location (new_stmt, loc);
unlink_stmt_vdef (stmt);
gsi_replace (&gsi, new_stmt, true);
}
break;
default:;
}
}
@ -849,10 +1101,9 @@ execute_cse_sincos (void)
static bool
gate_cse_sincos (void)
{
/* Make sure we have either sincos or cexp. */
return (TARGET_HAS_SINCOS
|| TARGET_C99_FUNCTIONS)
&& optimize;
/* We no longer require either sincos or cexp, since powi expansion
piggybacks on this pass. */
return optimize;
}
struct gimple_opt_pass pass_cse_sincos =