Avoid compile time hog on vect_peel_nonlinear_iv_init for nonlinear induction vec_step_op_mul when iteration count is too big.

There's loop in vect_peel_nonlinear_iv_init to get init_expr *
pow (step_expr, skip_niters). When skipn_iters is too big, compile time
hogs. To avoid that, optimize init_expr * pow (step_expr, skip_niters) to
init_expr << (exact_log2 (step_expr) * skip_niters) when step_expr is
pow of 2, otherwise give up vectorization when skip_niters >=
TYPE_PRECISION (TREE_TYPE (init_expr)).

Also give up vectorization when niters_skip is negative which will be
used for fully masked loop.

gcc/ChangeLog:

	PR tree-optimization/111820
	PR tree-optimization/111833
	* tree-vect-loop-manip.cc (vect_can_peel_nonlinear_iv_p): Give
	up vectorization for nonlinear iv vect_step_op_mul when
	step_expr is not exact_log2 and niters is greater than
	TYPE_PRECISION (TREE_TYPE (step_expr)). Also don't vectorize
	for nagative niters_skip which will be used by fully masked
	loop.
	(vect_can_advance_ivs_p): Pass whole phi_info to
	vect_can_peel_nonlinear_iv_p.
	* tree-vect-loop.cc (vect_peel_nonlinear_iv_init): Optimize
	init_expr * pow (step_expr, skipn) to init_expr
	<< (log2 (step_expr) * skipn) when step_expr is exact_log2.

gcc/testsuite/ChangeLog:

	* gcc.target/i386/pr111820-1.c: New test.
	* gcc.target/i386/pr111820-2.c: New test.
	* gcc.target/i386/pr111820-3.c: New test.
	* gcc.target/i386/pr103144-mul-1.c: Adjust testcase.
	* gcc.target/i386/pr103144-mul-2.c: Adjust testcase.
This commit is contained in:
liuhongt 2023-10-18 10:08:24 +08:00
parent 93a65e0adb
commit dbde384bd5
7 changed files with 90 additions and 15 deletions

View file

@ -11,7 +11,7 @@ foo_mul (int* a, int b)
for (int i = 0; i != N; i++)
{
a[i] = b;
b *= 3;
b *= 4;
}
}
@ -23,7 +23,7 @@ foo_mul_const (int* a)
for (int i = 0; i != N; i++)
{
a[i] = b;
b *= 3;
b *= 4;
}
}
@ -34,7 +34,7 @@ foo_mul_peel (int* a, int b)
for (int i = 0; i != 39; i++)
{
a[i] = b;
b *= 3;
b *= 4;
}
}
@ -46,6 +46,6 @@ foo_mul_peel_const (int* a)
for (int i = 0; i != 39; i++)
{
a[i] = b;
b *= 3;
b *= 4;
}
}

View file

@ -16,12 +16,12 @@ avx2_test (void)
__builtin_memset (epi32_exp, 0, N * sizeof (int));
int b = 8;
v8si init = __extension__(v8si) { b, b * 3, b * 9, b * 27, b * 81, b * 243, b * 729, b * 2187 };
v8si init = __extension__(v8si) { b, b * 4, b * 16, b * 64, b * 256, b * 1024, b * 4096, b * 16384 };
for (int i = 0; i != N / 8; i++)
{
memcpy (epi32_exp + i * 8, &init, 32);
init *= 6561;
init *= 65536;
}
foo_mul (epi32_dst, b);
@ -32,11 +32,11 @@ avx2_test (void)
if (__builtin_memcmp (epi32_dst, epi32_exp, 39 * 4) != 0)
__builtin_abort ();
init = __extension__(v8si) { 1, 3, 9, 27, 81, 243, 729, 2187 };
init = __extension__(v8si) { 1, 4, 16, 64, 256, 1024, 4096, 16384 };
for (int i = 0; i != N / 8; i++)
{
memcpy (epi32_exp + i * 8, &init, 32);
init *= 6561;
init *= 65536;
}
foo_mul_const (epi32_dst);

View file

@ -0,0 +1,16 @@
/* { dg-do compile } */
/* { dg-options "-O3 -mavx2 -fno-tree-vrp -Wno-aggressive-loop-optimizations -fdump-tree-vect-details" } */
/* { dg-final { scan-tree-dump "Avoid compile time hog on vect_peel_nonlinear_iv_init for nonlinear induction vec_step_op_mul when iteration count is too big" "vect" } } */
int r;
int r_0;
void f1 (void)
{
int n = 0;
while (-- n)
{
r_0 += r;
r *= 3;
}
}

View file

@ -0,0 +1,16 @@
/* { dg-do compile } */
/* { dg-options "-O3 -mavx2 -fno-tree-vrp -fdump-tree-vect-details -Wno-aggressive-loop-optimizations" } */
/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
int r;
int r_0;
void f (void)
{
int n = 0;
while (-- n)
{
r_0 += r ;
r *= 2;
}
}

View file

@ -0,0 +1,16 @@
/* { dg-do compile } */
/* { dg-options "-O3 -mavx2 -fno-tree-vrp -fdump-tree-vect-details -Wno-aggressive-loop-optimizations" } */
/* { dg-final { scan-tree-dump "LOOP VECTORIZED" "vect" } } */
int r;
int r_0;
void f (void)
{
int n = 14;
while (-- n)
{
r_0 += r ;
r *= 3;
}
}

View file

@ -1904,8 +1904,10 @@ iv_phi_p (stmt_vec_info stmt_info)
/* Return true if vectorizer can peel for nonlinear iv. */
static bool
vect_can_peel_nonlinear_iv_p (loop_vec_info loop_vinfo,
enum vect_induction_op_type induction_type)
stmt_vec_info stmt_info)
{
enum vect_induction_op_type induction_type
= STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (stmt_info);
tree niters_skip;
/* Init_expr will be update by vect_update_ivs_after_vectorizer,
if niters or vf is unkown:
@ -1926,11 +1928,31 @@ vect_can_peel_nonlinear_iv_p (loop_vec_info loop_vinfo,
return false;
}
/* Avoid compile time hog on vect_peel_nonlinear_iv_init. */
if (induction_type == vect_step_op_mul)
{
tree step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info);
tree type = TREE_TYPE (step_expr);
if (wi::exact_log2 (wi::to_wide (step_expr)) == -1
&& LOOP_VINFO_INT_NITERS(loop_vinfo) >= TYPE_PRECISION (type))
{
if (dump_enabled_p ())
dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
"Avoid compile time hog on"
" vect_peel_nonlinear_iv_init"
" for nonlinear induction vec_step_op_mul"
" when iteration count is too big.\n");
return false;
}
}
/* Also doens't support peel for neg when niter is variable.
??? generate something like niter_expr & 1 ? init_expr : -init_expr? */
niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
if ((niters_skip != NULL_TREE
&& TREE_CODE (niters_skip) != INTEGER_CST)
&& (TREE_CODE (niters_skip) != INTEGER_CST
|| (HOST_WIDE_INT) TREE_INT_CST_LOW (niters_skip) < 0))
|| (!vect_use_loop_mask_for_alignment_p (loop_vinfo)
&& LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0))
{
@ -1991,7 +2013,7 @@ vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
induction_type = STMT_VINFO_LOOP_PHI_EVOLUTION_TYPE (phi_info);
if (induction_type != vect_step_op_add)
{
if (!vect_can_peel_nonlinear_iv_p (loop_vinfo, induction_type))
if (!vect_can_peel_nonlinear_iv_p (loop_vinfo, phi_info))
return false;
continue;

View file

@ -9222,12 +9222,17 @@ vect_peel_nonlinear_iv_init (gimple_seq* stmts, tree init_expr,
{
tree utype = unsigned_type_for (type);
init_expr = gimple_convert (stmts, utype, init_expr);
unsigned skipn = TREE_INT_CST_LOW (skip_niters);
wide_int skipn = wi::to_wide (skip_niters);
wide_int begin = wi::to_wide (step_expr);
for (unsigned i = 0; i != skipn - 1; i++)
begin = wi::mul (begin, wi::to_wide (step_expr));
auto_mpz base, exp, mod, res;
wi::to_mpz (begin, base, TYPE_SIGN (type));
wi::to_mpz (skipn, exp, UNSIGNED);
mpz_ui_pow_ui (mod, 2, TYPE_PRECISION (type));
mpz_powm (res, base, exp, mod);
begin = wi::from_mpz (type, res, TYPE_SIGN (type));
tree mult_expr = wide_int_to_tree (utype, begin);
init_expr = gimple_build (stmts, MULT_EXPR, utype, init_expr, mult_expr);
init_expr = gimple_build (stmts, MULT_EXPR, utype,
init_expr, mult_expr);
init_expr = gimple_convert (stmts, type, init_expr);
}
break;