re PR tree-optimization/46728 (GCC does not generate fmadd for pow (x, 0.75)+y on powerpc)
2011-05-24 Bill Schmidt <wschmidt@linux.vnet.ibm.com> PR tree-optimization/46728 * tree-ssa-math-opts.c (powi_table): New. (powi_lookup_cost): New. (powi_cost): New. (powi_as_mults_1): New. (powi_as_mults): New. (gimple_expand_builtin_powi): New. (execute_cse_sincos): Add switch case for BUILT_IN_POWI. (gate_cse_sincos): Remove sincos/cexp restriction. From-SVN: r174129
This commit is contained in:
parent
caee412b75
commit
78be79d534
2 changed files with 269 additions and 6 deletions
|
@ -1,3 +1,15 @@
|
|||
2011-05-24 Bill Schmidt <wschmidt@linux.vnet.ibm.com>
|
||||
|
||||
PR tree-optimization/46728
|
||||
* tree-ssa-math-opts.c (powi_table): New.
|
||||
(powi_lookup_cost): New.
|
||||
(powi_cost): New.
|
||||
(powi_as_mults_1): New.
|
||||
(powi_as_mults): New.
|
||||
(gimple_expand_builtin_powi): New.
|
||||
(execute_cse_sincos): Add switch case for BUILT_IN_POWI.
|
||||
(gate_cse_sincos): Remove sincos/cexp restriction.
|
||||
|
||||
2011-05-24 Rainer Orth <ro@CeBiTec.Uni-Bielefeld.DE>
|
||||
|
||||
PR target/3746
|
||||
|
|
|
@ -795,8 +795,238 @@ execute_cse_sincos_1 (tree name)
|
|||
return cfg_changed;
|
||||
}
|
||||
|
||||
/* To evaluate powi(x,n), the floating point value x raised to the
|
||||
constant integer exponent n, we use a hybrid algorithm that
|
||||
combines the "window method" with look-up tables. For an
|
||||
introduction to exponentiation algorithms and "addition chains",
|
||||
see section 4.6.3, "Evaluation of Powers" of Donald E. Knuth,
|
||||
"Seminumerical Algorithms", Vol. 2, "The Art of Computer Programming",
|
||||
3rd Edition, 1998, and Daniel M. Gordon, "A Survey of Fast Exponentiation
|
||||
Methods", Journal of Algorithms, Vol. 27, pp. 129-146, 1998. */
|
||||
|
||||
/* Provide a default value for POWI_MAX_MULTS, the maximum number of
|
||||
multiplications to inline before calling the system library's pow
|
||||
function. powi(x,n) requires at worst 2*bits(n)-2 multiplications,
|
||||
so this default never requires calling pow, powf or powl. */
|
||||
|
||||
#ifndef POWI_MAX_MULTS
|
||||
#define POWI_MAX_MULTS (2*HOST_BITS_PER_WIDE_INT-2)
|
||||
#endif
|
||||
|
||||
/* The size of the "optimal power tree" lookup table. All
|
||||
exponents less than this value are simply looked up in the
|
||||
powi_table below. This threshold is also used to size the
|
||||
cache of pseudo registers that hold intermediate results. */
|
||||
#define POWI_TABLE_SIZE 256
|
||||
|
||||
/* The size, in bits of the window, used in the "window method"
|
||||
exponentiation algorithm. This is equivalent to a radix of
|
||||
(1<<POWI_WINDOW_SIZE) in the corresponding "m-ary method". */
|
||||
#define POWI_WINDOW_SIZE 3
|
||||
|
||||
/* The following table is an efficient representation of an
|
||||
"optimal power tree". For each value, i, the corresponding
|
||||
value, j, in the table states than an optimal evaluation
|
||||
sequence for calculating pow(x,i) can be found by evaluating
|
||||
pow(x,j)*pow(x,i-j). An optimal power tree for the first
|
||||
100 integers is given in Knuth's "Seminumerical algorithms". */
|
||||
|
||||
static const unsigned char powi_table[POWI_TABLE_SIZE] =
|
||||
{
|
||||
0, 1, 1, 2, 2, 3, 3, 4, /* 0 - 7 */
|
||||
4, 6, 5, 6, 6, 10, 7, 9, /* 8 - 15 */
|
||||
8, 16, 9, 16, 10, 12, 11, 13, /* 16 - 23 */
|
||||
12, 17, 13, 18, 14, 24, 15, 26, /* 24 - 31 */
|
||||
16, 17, 17, 19, 18, 33, 19, 26, /* 32 - 39 */
|
||||
20, 25, 21, 40, 22, 27, 23, 44, /* 40 - 47 */
|
||||
24, 32, 25, 34, 26, 29, 27, 44, /* 48 - 55 */
|
||||
28, 31, 29, 34, 30, 60, 31, 36, /* 56 - 63 */
|
||||
32, 64, 33, 34, 34, 46, 35, 37, /* 64 - 71 */
|
||||
36, 65, 37, 50, 38, 48, 39, 69, /* 72 - 79 */
|
||||
40, 49, 41, 43, 42, 51, 43, 58, /* 80 - 87 */
|
||||
44, 64, 45, 47, 46, 59, 47, 76, /* 88 - 95 */
|
||||
48, 65, 49, 66, 50, 67, 51, 66, /* 96 - 103 */
|
||||
52, 70, 53, 74, 54, 104, 55, 74, /* 104 - 111 */
|
||||
56, 64, 57, 69, 58, 78, 59, 68, /* 112 - 119 */
|
||||
60, 61, 61, 80, 62, 75, 63, 68, /* 120 - 127 */
|
||||
64, 65, 65, 128, 66, 129, 67, 90, /* 128 - 135 */
|
||||
68, 73, 69, 131, 70, 94, 71, 88, /* 136 - 143 */
|
||||
72, 128, 73, 98, 74, 132, 75, 121, /* 144 - 151 */
|
||||
76, 102, 77, 124, 78, 132, 79, 106, /* 152 - 159 */
|
||||
80, 97, 81, 160, 82, 99, 83, 134, /* 160 - 167 */
|
||||
84, 86, 85, 95, 86, 160, 87, 100, /* 168 - 175 */
|
||||
88, 113, 89, 98, 90, 107, 91, 122, /* 176 - 183 */
|
||||
92, 111, 93, 102, 94, 126, 95, 150, /* 184 - 191 */
|
||||
96, 128, 97, 130, 98, 133, 99, 195, /* 192 - 199 */
|
||||
100, 128, 101, 123, 102, 164, 103, 138, /* 200 - 207 */
|
||||
104, 145, 105, 146, 106, 109, 107, 149, /* 208 - 215 */
|
||||
108, 200, 109, 146, 110, 170, 111, 157, /* 216 - 223 */
|
||||
112, 128, 113, 130, 114, 182, 115, 132, /* 224 - 231 */
|
||||
116, 200, 117, 132, 118, 158, 119, 206, /* 232 - 239 */
|
||||
120, 240, 121, 162, 122, 147, 123, 152, /* 240 - 247 */
|
||||
124, 166, 125, 214, 126, 138, 127, 153, /* 248 - 255 */
|
||||
};
|
||||
|
||||
|
||||
/* Return the number of multiplications required to calculate
|
||||
powi(x,n) where n is less than POWI_TABLE_SIZE. This is a
|
||||
subroutine of powi_cost. CACHE is an array indicating
|
||||
which exponents have already been calculated. */
|
||||
|
||||
static int
|
||||
powi_lookup_cost (unsigned HOST_WIDE_INT n, bool *cache)
|
||||
{
|
||||
/* If we've already calculated this exponent, then this evaluation
|
||||
doesn't require any additional multiplications. */
|
||||
if (cache[n])
|
||||
return 0;
|
||||
|
||||
cache[n] = true;
|
||||
return powi_lookup_cost (n - powi_table[n], cache)
|
||||
+ powi_lookup_cost (powi_table[n], cache) + 1;
|
||||
}
|
||||
|
||||
/* Return the number of multiplications required to calculate
|
||||
powi(x,n) for an arbitrary x, given the exponent N. This
|
||||
function needs to be kept in sync with powi_as_mults below. */
|
||||
|
||||
static int
|
||||
powi_cost (HOST_WIDE_INT n)
|
||||
{
|
||||
bool cache[POWI_TABLE_SIZE];
|
||||
unsigned HOST_WIDE_INT digit;
|
||||
unsigned HOST_WIDE_INT val;
|
||||
int result;
|
||||
|
||||
if (n == 0)
|
||||
return 0;
|
||||
|
||||
/* Ignore the reciprocal when calculating the cost. */
|
||||
val = (n < 0) ? -n : n;
|
||||
|
||||
/* Initialize the exponent cache. */
|
||||
memset (cache, 0, POWI_TABLE_SIZE * sizeof (bool));
|
||||
cache[1] = true;
|
||||
|
||||
result = 0;
|
||||
|
||||
while (val >= POWI_TABLE_SIZE)
|
||||
{
|
||||
if (val & 1)
|
||||
{
|
||||
digit = val & ((1 << POWI_WINDOW_SIZE) - 1);
|
||||
result += powi_lookup_cost (digit, cache)
|
||||
+ POWI_WINDOW_SIZE + 1;
|
||||
val >>= POWI_WINDOW_SIZE;
|
||||
}
|
||||
else
|
||||
{
|
||||
val >>= 1;
|
||||
result++;
|
||||
}
|
||||
}
|
||||
|
||||
return result + powi_lookup_cost (val, cache);
|
||||
}
|
||||
|
||||
/* Recursive subroutine of powi_as_mults. This function takes the
|
||||
array, CACHE, of already calculated exponents and an exponent N and
|
||||
returns a tree that corresponds to CACHE[1]**N, with type TYPE. */
|
||||
|
||||
static tree
|
||||
powi_as_mults_1 (gimple_stmt_iterator *gsi, location_t loc, tree type,
|
||||
HOST_WIDE_INT n, tree *cache, tree target)
|
||||
{
|
||||
tree op0, op1, ssa_target;
|
||||
unsigned HOST_WIDE_INT digit;
|
||||
gimple mult_stmt;
|
||||
|
||||
if (n < POWI_TABLE_SIZE && cache[n])
|
||||
return cache[n];
|
||||
|
||||
ssa_target = make_ssa_name (target, NULL);
|
||||
|
||||
if (n < POWI_TABLE_SIZE)
|
||||
{
|
||||
cache[n] = ssa_target;
|
||||
op0 = powi_as_mults_1 (gsi, loc, type, n - powi_table[n], cache, target);
|
||||
op1 = powi_as_mults_1 (gsi, loc, type, powi_table[n], cache, target);
|
||||
}
|
||||
else if (n & 1)
|
||||
{
|
||||
digit = n & ((1 << POWI_WINDOW_SIZE) - 1);
|
||||
op0 = powi_as_mults_1 (gsi, loc, type, n - digit, cache, target);
|
||||
op1 = powi_as_mults_1 (gsi, loc, type, digit, cache, target);
|
||||
}
|
||||
else
|
||||
{
|
||||
op0 = powi_as_mults_1 (gsi, loc, type, n >> 1, cache, target);
|
||||
op1 = op0;
|
||||
}
|
||||
|
||||
mult_stmt = gimple_build_assign_with_ops (MULT_EXPR, ssa_target, op0, op1);
|
||||
gsi_insert_before (gsi, mult_stmt, GSI_SAME_STMT);
|
||||
|
||||
return ssa_target;
|
||||
}
|
||||
|
||||
/* Convert ARG0**N to a tree of multiplications of ARG0 with itself.
|
||||
This function needs to be kept in sync with powi_cost above. */
|
||||
|
||||
static tree
|
||||
powi_as_mults (gimple_stmt_iterator *gsi, location_t loc,
|
||||
tree arg0, HOST_WIDE_INT n)
|
||||
{
|
||||
tree cache[POWI_TABLE_SIZE], result, type = TREE_TYPE (arg0), target;
|
||||
gimple div_stmt;
|
||||
|
||||
if (n == 0)
|
||||
return build_real (type, dconst1);
|
||||
|
||||
memset (cache, 0, sizeof (cache));
|
||||
cache[1] = arg0;
|
||||
|
||||
target = create_tmp_var (type, "powmult");
|
||||
add_referenced_var (target);
|
||||
|
||||
result = powi_as_mults_1 (gsi, loc, type, (n < 0) ? -n : n, cache, target);
|
||||
|
||||
if (n >= 0)
|
||||
return result;
|
||||
|
||||
/* If the original exponent was negative, reciprocate the result. */
|
||||
target = make_ssa_name (target, NULL);
|
||||
div_stmt = gimple_build_assign_with_ops (RDIV_EXPR, target,
|
||||
build_real (type, dconst1),
|
||||
result);
|
||||
gsi_insert_before (gsi, div_stmt, GSI_SAME_STMT);
|
||||
|
||||
return target;
|
||||
}
|
||||
|
||||
/* ARG0 and N are the two arguments to a powi builtin in GSI with
|
||||
location info LOC. If the arguments are appropriate, create an
|
||||
equivalent sequence of statements prior to GSI using an optimal
|
||||
number of multiplications, and return an expession holding the
|
||||
result. */
|
||||
|
||||
static tree
|
||||
gimple_expand_builtin_powi (gimple_stmt_iterator *gsi, location_t loc,
|
||||
tree arg0, HOST_WIDE_INT n)
|
||||
{
|
||||
/* Avoid largest negative number. */
|
||||
if (n != -n
|
||||
&& ((n >= -1 && n <= 2)
|
||||
|| (optimize_function_for_speed_p (cfun)
|
||||
&& powi_cost (n) <= POWI_MAX_MULTS)))
|
||||
return powi_as_mults (gsi, loc, arg0, n);
|
||||
|
||||
return NULL_TREE;
|
||||
}
|
||||
|
||||
/* Go through all calls to sin, cos and cexpi and call execute_cse_sincos_1
|
||||
on the SSA_NAME argument of each of them. */
|
||||
on the SSA_NAME argument of each of them. Also expand powi(x,n) into
|
||||
an optimal number of multiplies, when n is a constant. */
|
||||
|
||||
static unsigned int
|
||||
execute_cse_sincos (void)
|
||||
|
@ -821,7 +1051,9 @@ execute_cse_sincos (void)
|
|||
&& (fndecl = gimple_call_fndecl (stmt))
|
||||
&& DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
|
||||
{
|
||||
tree arg;
|
||||
tree arg, arg0, arg1, result;
|
||||
HOST_WIDE_INT n;
|
||||
location_t loc;
|
||||
|
||||
switch (DECL_FUNCTION_CODE (fndecl))
|
||||
{
|
||||
|
@ -833,6 +1065,26 @@ execute_cse_sincos (void)
|
|||
cfg_changed |= execute_cse_sincos_1 (arg);
|
||||
break;
|
||||
|
||||
CASE_FLT_FN (BUILT_IN_POWI):
|
||||
arg0 = gimple_call_arg (stmt, 0);
|
||||
arg1 = gimple_call_arg (stmt, 1);
|
||||
if (!host_integerp (arg1, 0))
|
||||
break;
|
||||
|
||||
n = TREE_INT_CST_LOW (arg1);
|
||||
loc = gimple_location (stmt);
|
||||
result = gimple_expand_builtin_powi (&gsi, loc, arg0, n);
|
||||
|
||||
if (result)
|
||||
{
|
||||
tree lhs = gimple_get_lhs (stmt);
|
||||
gimple new_stmt = gimple_build_assign (lhs, result);
|
||||
gimple_set_location (new_stmt, loc);
|
||||
unlink_stmt_vdef (stmt);
|
||||
gsi_replace (&gsi, new_stmt, true);
|
||||
}
|
||||
break;
|
||||
|
||||
default:;
|
||||
}
|
||||
}
|
||||
|
@ -849,10 +1101,9 @@ execute_cse_sincos (void)
|
|||
static bool
|
||||
gate_cse_sincos (void)
|
||||
{
|
||||
/* Make sure we have either sincos or cexp. */
|
||||
return (TARGET_HAS_SINCOS
|
||||
|| TARGET_C99_FUNCTIONS)
|
||||
&& optimize;
|
||||
/* We no longer require either sincos or cexp, since powi expansion
|
||||
piggybacks on this pass. */
|
||||
return optimize;
|
||||
}
|
||||
|
||||
struct gimple_opt_pass pass_cse_sincos =
|
||||
|
|
Loading…
Add table
Reference in a new issue