openmp: Add support for strict modifier on grainsize/num_tasks clauses
With strict: modifier on these clauses, the standard is explicit about how many iterations (and which) each generated task of taskloop directive should contain. For num_tasks it actually matches what we were already implementing, but for grainsize it does not (and even violates the old rule - without strict it requires that the number of iterations (unspecified which exactly) handled by each generated task is >= grainsize argument and < 2 * grainsize argument, with strict: it requires that each generated task handles exactly == grainsize argument iterations, except for the generated task handling the last iteration which can handles <= grainsize iterations). The following patch implements it for C and C++. 2021-08-23 Jakub Jelinek <jakub@redhat.com> gcc/ * tree.h (OMP_CLAUSE_GRAINSIZE_STRICT): Define. (OMP_CLAUSE_NUM_TASKS_STRICT): Define. * tree-pretty-print.c (dump_omp_clause) <case OMP_CLAUSE_GRAINSIZE, case OMP_CLAUSE_NUM_TASKS>: Print strict: modifier. * omp-expand.c (expand_task_call): Use GOMP_TASK_FLAG_STRICT in iflags if either grainsize or num_tasks clause has the strict modifier. gcc/c/ * c-parser.c (c_parser_omp_clause_num_tasks, c_parser_omp_clause_grainsize): Parse the optional strict: modifier. gcc/cp/ * parser.c (cp_parser_omp_clause_num_tasks, cp_parser_omp_clause_grainsize): Parse the optional strict: modifier. include/ * gomp-constants.h (GOMP_TASK_FLAG_STRICT): Define. libgomp/ * taskloop.c (GOMP_taskloop): Handle GOMP_TASK_FLAG_STRICT. * testsuite/libgomp.c-c++-common/taskloop-4.c (main): Fix up comment. * testsuite/libgomp.c-c++-common/taskloop-5.c: New test.
This commit is contained in:
parent
6f1a3668f5
commit
3bc75533d1
9 changed files with 245 additions and 10 deletions
|
@ -13786,7 +13786,10 @@ c_parser_omp_clause_num_threads (c_parser *parser, tree list)
|
|||
}
|
||||
|
||||
/* OpenMP 4.5:
|
||||
num_tasks ( expression ) */
|
||||
num_tasks ( expression )
|
||||
|
||||
OpenMP 5.1:
|
||||
num_tasks ( strict : expression ) */
|
||||
|
||||
static tree
|
||||
c_parser_omp_clause_num_tasks (c_parser *parser, tree list)
|
||||
|
@ -13795,6 +13798,17 @@ c_parser_omp_clause_num_tasks (c_parser *parser, tree list)
|
|||
matching_parens parens;
|
||||
if (parens.require_open (parser))
|
||||
{
|
||||
bool strict = false;
|
||||
if (c_parser_next_token_is (parser, CPP_NAME)
|
||||
&& c_parser_peek_2nd_token (parser)->type == CPP_COLON
|
||||
&& strcmp (IDENTIFIER_POINTER (c_parser_peek_token (parser)->value),
|
||||
"strict") == 0)
|
||||
{
|
||||
strict = true;
|
||||
c_parser_consume_token (parser);
|
||||
c_parser_consume_token (parser);
|
||||
}
|
||||
|
||||
location_t expr_loc = c_parser_peek_token (parser)->location;
|
||||
c_expr expr = c_parser_expr_no_commas (parser, NULL);
|
||||
expr = convert_lvalue_to_rvalue (expr_loc, expr, false, true);
|
||||
|
@ -13824,6 +13838,7 @@ c_parser_omp_clause_num_tasks (c_parser *parser, tree list)
|
|||
|
||||
c = build_omp_clause (num_tasks_loc, OMP_CLAUSE_NUM_TASKS);
|
||||
OMP_CLAUSE_NUM_TASKS_EXPR (c) = t;
|
||||
OMP_CLAUSE_NUM_TASKS_STRICT (c) = strict;
|
||||
OMP_CLAUSE_CHAIN (c) = list;
|
||||
list = c;
|
||||
}
|
||||
|
@ -13832,7 +13847,10 @@ c_parser_omp_clause_num_tasks (c_parser *parser, tree list)
|
|||
}
|
||||
|
||||
/* OpenMP 4.5:
|
||||
grainsize ( expression ) */
|
||||
grainsize ( expression )
|
||||
|
||||
OpenMP 5.1:
|
||||
grainsize ( strict : expression ) */
|
||||
|
||||
static tree
|
||||
c_parser_omp_clause_grainsize (c_parser *parser, tree list)
|
||||
|
@ -13841,6 +13859,17 @@ c_parser_omp_clause_grainsize (c_parser *parser, tree list)
|
|||
matching_parens parens;
|
||||
if (parens.require_open (parser))
|
||||
{
|
||||
bool strict = false;
|
||||
if (c_parser_next_token_is (parser, CPP_NAME)
|
||||
&& c_parser_peek_2nd_token (parser)->type == CPP_COLON
|
||||
&& strcmp (IDENTIFIER_POINTER (c_parser_peek_token (parser)->value),
|
||||
"strict") == 0)
|
||||
{
|
||||
strict = true;
|
||||
c_parser_consume_token (parser);
|
||||
c_parser_consume_token (parser);
|
||||
}
|
||||
|
||||
location_t expr_loc = c_parser_peek_token (parser)->location;
|
||||
c_expr expr = c_parser_expr_no_commas (parser, NULL);
|
||||
expr = convert_lvalue_to_rvalue (expr_loc, expr, false, true);
|
||||
|
@ -13870,6 +13899,7 @@ c_parser_omp_clause_grainsize (c_parser *parser, tree list)
|
|||
|
||||
c = build_omp_clause (grainsize_loc, OMP_CLAUSE_GRAINSIZE);
|
||||
OMP_CLAUSE_GRAINSIZE_EXPR (c) = t;
|
||||
OMP_CLAUSE_GRAINSIZE_STRICT (c) = strict;
|
||||
OMP_CLAUSE_CHAIN (c) = list;
|
||||
list = c;
|
||||
}
|
||||
|
|
|
@ -37237,7 +37237,10 @@ cp_parser_omp_clause_num_threads (cp_parser *parser, tree list,
|
|||
}
|
||||
|
||||
/* OpenMP 4.5:
|
||||
num_tasks ( expression ) */
|
||||
num_tasks ( expression )
|
||||
|
||||
OpenMP 5.1:
|
||||
num_tasks ( strict : expression ) */
|
||||
|
||||
static tree
|
||||
cp_parser_omp_clause_num_tasks (cp_parser *parser, tree list,
|
||||
|
@ -37249,6 +37252,19 @@ cp_parser_omp_clause_num_tasks (cp_parser *parser, tree list,
|
|||
if (!parens.require_open (parser))
|
||||
return list;
|
||||
|
||||
bool strict = false;
|
||||
if (cp_lexer_next_token_is (parser->lexer, CPP_NAME)
|
||||
&& cp_lexer_nth_token_is (parser->lexer, 2, CPP_COLON))
|
||||
{
|
||||
tree id = cp_lexer_peek_token (parser->lexer)->u.value;
|
||||
if (!strcmp (IDENTIFIER_POINTER (id), "strict"))
|
||||
{
|
||||
strict = true;
|
||||
cp_lexer_consume_token (parser->lexer);
|
||||
cp_lexer_consume_token (parser->lexer);
|
||||
}
|
||||
}
|
||||
|
||||
t = cp_parser_assignment_expression (parser);
|
||||
|
||||
if (t == error_mark_node
|
||||
|
@ -37262,13 +37278,17 @@ cp_parser_omp_clause_num_tasks (cp_parser *parser, tree list,
|
|||
|
||||
c = build_omp_clause (location, OMP_CLAUSE_NUM_TASKS);
|
||||
OMP_CLAUSE_NUM_TASKS_EXPR (c) = t;
|
||||
OMP_CLAUSE_NUM_TASKS_STRICT (c) = strict;
|
||||
OMP_CLAUSE_CHAIN (c) = list;
|
||||
|
||||
return c;
|
||||
}
|
||||
|
||||
/* OpenMP 4.5:
|
||||
grainsize ( expression ) */
|
||||
grainsize ( expression )
|
||||
|
||||
OpenMP 5.1:
|
||||
grainsize ( strict : expression ) */
|
||||
|
||||
static tree
|
||||
cp_parser_omp_clause_grainsize (cp_parser *parser, tree list,
|
||||
|
@ -37280,6 +37300,19 @@ cp_parser_omp_clause_grainsize (cp_parser *parser, tree list,
|
|||
if (!parens.require_open (parser))
|
||||
return list;
|
||||
|
||||
bool strict = false;
|
||||
if (cp_lexer_next_token_is (parser->lexer, CPP_NAME)
|
||||
&& cp_lexer_nth_token_is (parser->lexer, 2, CPP_COLON))
|
||||
{
|
||||
tree id = cp_lexer_peek_token (parser->lexer)->u.value;
|
||||
if (!strcmp (IDENTIFIER_POINTER (id), "strict"))
|
||||
{
|
||||
strict = true;
|
||||
cp_lexer_consume_token (parser->lexer);
|
||||
cp_lexer_consume_token (parser->lexer);
|
||||
}
|
||||
}
|
||||
|
||||
t = cp_parser_assignment_expression (parser);
|
||||
|
||||
if (t == error_mark_node
|
||||
|
@ -37293,6 +37326,7 @@ cp_parser_omp_clause_grainsize (cp_parser *parser, tree list,
|
|||
|
||||
c = build_omp_clause (location, OMP_CLAUSE_GRAINSIZE);
|
||||
OMP_CLAUSE_GRAINSIZE_EXPR (c) = t;
|
||||
OMP_CLAUSE_GRAINSIZE_STRICT (c) = strict;
|
||||
OMP_CLAUSE_CHAIN (c) = list;
|
||||
|
||||
return c;
|
||||
|
|
|
@ -791,13 +791,19 @@ expand_task_call (struct omp_region *region, basic_block bb,
|
|||
tree tclauses = gimple_omp_for_clauses (g);
|
||||
num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_NUM_TASKS);
|
||||
if (num_tasks)
|
||||
num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
|
||||
{
|
||||
if (OMP_CLAUSE_NUM_TASKS_STRICT (num_tasks))
|
||||
iflags |= GOMP_TASK_FLAG_STRICT;
|
||||
num_tasks = OMP_CLAUSE_NUM_TASKS_EXPR (num_tasks);
|
||||
}
|
||||
else
|
||||
{
|
||||
num_tasks = omp_find_clause (tclauses, OMP_CLAUSE_GRAINSIZE);
|
||||
if (num_tasks)
|
||||
{
|
||||
iflags |= GOMP_TASK_FLAG_GRAINSIZE;
|
||||
if (OMP_CLAUSE_GRAINSIZE_STRICT (num_tasks))
|
||||
iflags |= GOMP_TASK_FLAG_STRICT;
|
||||
num_tasks = OMP_CLAUSE_GRAINSIZE_EXPR (num_tasks);
|
||||
}
|
||||
else
|
||||
|
|
|
@ -1066,6 +1066,8 @@ dump_omp_clause (pretty_printer *pp, tree clause, int spc, dump_flags_t flags)
|
|||
|
||||
case OMP_CLAUSE_GRAINSIZE:
|
||||
pp_string (pp, "grainsize(");
|
||||
if (OMP_CLAUSE_GRAINSIZE_STRICT (clause))
|
||||
pp_string (pp, "strict:");
|
||||
dump_generic_node (pp, OMP_CLAUSE_GRAINSIZE_EXPR (clause),
|
||||
spc, flags, false);
|
||||
pp_right_paren (pp);
|
||||
|
@ -1073,6 +1075,8 @@ dump_omp_clause (pretty_printer *pp, tree clause, int spc, dump_flags_t flags)
|
|||
|
||||
case OMP_CLAUSE_NUM_TASKS:
|
||||
pp_string (pp, "num_tasks(");
|
||||
if (OMP_CLAUSE_NUM_TASKS_STRICT (clause))
|
||||
pp_string (pp, "strict:");
|
||||
dump_generic_node (pp, OMP_CLAUSE_NUM_TASKS_EXPR (clause),
|
||||
spc, flags, false);
|
||||
pp_right_paren (pp);
|
||||
|
|
|
@ -1612,6 +1612,11 @@ class auto_suppress_location_wrappers
|
|||
#define OMP_CLAUSE_PRIORITY_EXPR(NODE) \
|
||||
OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_PRIORITY),0)
|
||||
|
||||
#define OMP_CLAUSE_GRAINSIZE_STRICT(NODE) \
|
||||
TREE_PRIVATE (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_GRAINSIZE))
|
||||
#define OMP_CLAUSE_NUM_TASKS_STRICT(NODE) \
|
||||
TREE_PRIVATE (OMP_CLAUSE_SUBCODE_CHECK (NODE, OMP_CLAUSE_NUM_TASKS))
|
||||
|
||||
/* OpenACC clause expressions */
|
||||
#define OMP_CLAUSE_EXPR(NODE, CLAUSE) \
|
||||
OMP_CLAUSE_OPERAND (OMP_CLAUSE_SUBCODE_CHECK (NODE, CLAUSE), 0)
|
||||
|
|
|
@ -222,6 +222,7 @@ enum gomp_map_kind
|
|||
#define GOMP_TASK_FLAG_NOGROUP (1 << 11)
|
||||
#define GOMP_TASK_FLAG_REDUCTION (1 << 12)
|
||||
#define GOMP_TASK_FLAG_DETACH (1 << 13)
|
||||
#define GOMP_TASK_FLAG_STRICT (1 << 14)
|
||||
|
||||
/* GOMP_target{_ext,update_ext,enter_exit_data} flags argument. */
|
||||
#define GOMP_TARGET_FLAG_NOWAIT (1 << 0)
|
||||
|
|
|
@ -97,6 +97,7 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
|
|||
#endif
|
||||
|
||||
TYPE task_step = step;
|
||||
TYPE nfirst_task_step = step;
|
||||
unsigned long nfirst = n;
|
||||
if (flags & GOMP_TASK_FLAG_GRAINSIZE)
|
||||
{
|
||||
|
@ -109,7 +110,22 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
|
|||
if (num_tasks != ndiv)
|
||||
num_tasks = ~0UL;
|
||||
#endif
|
||||
if (num_tasks <= 1)
|
||||
if ((flags & GOMP_TASK_FLAG_STRICT)
|
||||
&& num_tasks != ~0ULL)
|
||||
{
|
||||
UTYPE mod = n % grainsize;
|
||||
task_step = (TYPE) grainsize * step;
|
||||
if (mod)
|
||||
{
|
||||
num_tasks++;
|
||||
nfirst_task_step = (TYPE) mod * step;
|
||||
if (num_tasks == 1)
|
||||
task_step = nfirst_task_step;
|
||||
else
|
||||
nfirst = num_tasks - 2;
|
||||
}
|
||||
}
|
||||
else if (num_tasks <= 1)
|
||||
{
|
||||
num_tasks = 1;
|
||||
task_step = end - start;
|
||||
|
@ -124,6 +140,7 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
|
|||
task_step = (TYPE) grainsize * step;
|
||||
if (mul != n)
|
||||
{
|
||||
nfirst_task_step = task_step;
|
||||
task_step += step;
|
||||
nfirst = n - mul - 1;
|
||||
}
|
||||
|
@ -135,6 +152,7 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
|
|||
task_step = (TYPE) div * step;
|
||||
if (mod)
|
||||
{
|
||||
nfirst_task_step = task_step;
|
||||
task_step += step;
|
||||
nfirst = mod - 1;
|
||||
}
|
||||
|
@ -153,6 +171,7 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
|
|||
task_step = (TYPE) div * step;
|
||||
if (mod)
|
||||
{
|
||||
nfirst_task_step = task_step;
|
||||
task_step += step;
|
||||
nfirst = mod - 1;
|
||||
}
|
||||
|
@ -225,7 +244,7 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
|
|||
start += task_step;
|
||||
((TYPE *)arg)[1] = start;
|
||||
if (i == nfirst)
|
||||
task_step -= step;
|
||||
task_step = nfirst_task_step;
|
||||
fn (arg);
|
||||
arg += arg_size;
|
||||
if (!priority_queue_empty_p (&task[i].children_queue,
|
||||
|
@ -258,7 +277,7 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
|
|||
start += task_step;
|
||||
((TYPE *)data)[1] = start;
|
||||
if (i == nfirst)
|
||||
task_step -= step;
|
||||
task_step = nfirst_task_step;
|
||||
fn (data);
|
||||
if (!priority_queue_empty_p (&task.children_queue,
|
||||
MEMMODEL_RELAXED))
|
||||
|
@ -303,7 +322,7 @@ GOMP_taskloop (void (*fn) (void *), void *data, void (*cpyfn) (void *, void *),
|
|||
start += task_step;
|
||||
((TYPE *)arg)[1] = start;
|
||||
if (i == nfirst)
|
||||
task_step -= step;
|
||||
task_step = nfirst_task_step;
|
||||
thr->task = parent;
|
||||
task->kind = GOMP_TASK_WAITING;
|
||||
task->fn = fn;
|
||||
|
|
|
@ -85,7 +85,8 @@ main ()
|
|||
if (test (7, 21, 2, 15, grainsize, &ntasks, &min_iters, &max_iters) != 7
|
||||
|| ntasks != 1 || min_iters != 7 || max_iters != 7)
|
||||
__builtin_abort ();
|
||||
/* If num_tasks is present, # of task loop iters is min (# of loop iters, num_tasks). */
|
||||
/* If num_tasks is present, # of tasks is min (# of loop iters, num_tasks)
|
||||
and each task has at least one iteration. */
|
||||
if (test (-51, 2500, 48, 9, num_tasks, &ntasks, &min_iters, &max_iters) != 54
|
||||
|| ntasks != 9)
|
||||
__builtin_abort ();
|
||||
|
|
135
libgomp/testsuite/libgomp.c-c++-common/taskloop-5.c
Normal file
135
libgomp/testsuite/libgomp.c-c++-common/taskloop-5.c
Normal file
|
@ -0,0 +1,135 @@
|
|||
/* { dg-do run } */
|
||||
/* { dg-options "-O2" } */
|
||||
|
||||
int u[64], v, w[64];
|
||||
|
||||
__attribute__((noinline, noclone)) int
|
||||
test (int a, int b, int c, int d, void (*fn) (int, int, int, int),
|
||||
int *num_tasks, int *min_iters, int *max_iters, int *sep)
|
||||
{
|
||||
int i, j, t = 0;
|
||||
__builtin_memset (u, 0, sizeof u);
|
||||
v = 0;
|
||||
fn (a, b, c, d);
|
||||
*min_iters = 0;
|
||||
*max_iters = 0;
|
||||
*num_tasks = v;
|
||||
*sep = v;
|
||||
if (v)
|
||||
{
|
||||
*min_iters = u[0];
|
||||
*max_iters = u[0];
|
||||
t = u[0];
|
||||
for (i = 1; i < v; i++)
|
||||
{
|
||||
if (*min_iters > u[i])
|
||||
*min_iters = u[i];
|
||||
if (*max_iters < u[i])
|
||||
*max_iters = u[i];
|
||||
t += u[i];
|
||||
}
|
||||
if (*min_iters != *max_iters)
|
||||
{
|
||||
for (i = 0; i < v - 1; i++)
|
||||
{
|
||||
int min_idx = i;
|
||||
for (j = i + 1; j < v; j++)
|
||||
if (w[min_idx] > w[j])
|
||||
min_idx = j;
|
||||
if (min_idx != i)
|
||||
{
|
||||
int tem = u[i];
|
||||
u[i] = u[min_idx];
|
||||
u[min_idx] = tem;
|
||||
tem = w[i];
|
||||
w[i] = w[min_idx];
|
||||
w[min_idx] = tem;
|
||||
}
|
||||
}
|
||||
if (u[0] != *max_iters)
|
||||
__builtin_abort ();
|
||||
for (i = 1; i < v; i++)
|
||||
if (u[i] != u[i - 1])
|
||||
{
|
||||
if (*sep != v || u[i] != *min_iters)
|
||||
__builtin_abort ();
|
||||
*sep = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
return t;
|
||||
}
|
||||
|
||||
void
|
||||
grainsize (int a, int b, int c, int d)
|
||||
{
|
||||
int i, j = 0, k = 0;
|
||||
#pragma omp taskloop firstprivate (j, k) grainsize(strict:d)
|
||||
for (i = a; i < b; i += c)
|
||||
{
|
||||
if (j == 0)
|
||||
{
|
||||
#pragma omp atomic capture
|
||||
k = v++;
|
||||
if (k >= 64)
|
||||
__builtin_abort ();
|
||||
w[k] = i;
|
||||
}
|
||||
u[k] = ++j;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
num_tasks (int a, int b, int c, int d)
|
||||
{
|
||||
int i, j = 0, k = 0;
|
||||
#pragma omp taskloop firstprivate (j, k) num_tasks(strict:d)
|
||||
for (i = a; i < b; i += c)
|
||||
{
|
||||
if (j == 0)
|
||||
{
|
||||
#pragma omp atomic capture
|
||||
k = v++;
|
||||
if (k >= 64)
|
||||
__builtin_abort ();
|
||||
w[k] = i;
|
||||
}
|
||||
u[k] = ++j;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
main ()
|
||||
{
|
||||
#pragma omp parallel
|
||||
#pragma omp single
|
||||
{
|
||||
int min_iters, max_iters, ntasks, sep;
|
||||
/* If grainsize is present and has strict modifier, # of task loop iters is == grainsize,
|
||||
except that it can be smaller on the last task. */
|
||||
if (test (0, 79, 1, 17, grainsize, &ntasks, &min_iters, &max_iters, &sep) != 79
|
||||
|| ntasks != 5 || min_iters != 11 || max_iters != 17 || sep != 4)
|
||||
__builtin_abort ();
|
||||
if (test (-49, 2541, 7, 28, grainsize, &ntasks, &min_iters, &max_iters, &sep) != 370
|
||||
|| ntasks != 14 || min_iters != 6 || max_iters != 28 || sep != 13)
|
||||
__builtin_abort ();
|
||||
if (test (7, 21, 2, 15, grainsize, &ntasks, &min_iters, &max_iters, &sep) != 7
|
||||
|| ntasks != 1 || min_iters != 7 || max_iters != 7 || sep != 1)
|
||||
__builtin_abort ();
|
||||
/* If num_tasks is present, # of tasks is min (# of loop iters, num_tasks)
|
||||
and each task has at least one iteration. If strict modifier is present,
|
||||
first set of tasks has ceil (# of loop iters / num_tasks) iterations,
|
||||
followed by possibly empty set of tasks with floor (# of loop iters / num_tasks)
|
||||
iterations. */
|
||||
if (test (-51, 2500, 48, 9, num_tasks, &ntasks, &min_iters, &max_iters, &sep) != 54
|
||||
|| ntasks != 9 || min_iters != 6 || max_iters != 6 || sep != 9)
|
||||
__builtin_abort ();
|
||||
if (test (0, 57, 1, 9, num_tasks, &ntasks, &min_iters, &max_iters, &sep) != 57
|
||||
|| ntasks != 9 || min_iters != 6 || max_iters != 7 || sep != 3)
|
||||
__builtin_abort ();
|
||||
if (test (0, 25, 2, 17, num_tasks, &ntasks, &min_iters, &max_iters, &sep) != 13
|
||||
|| ntasks != 13 || min_iters != 1 || max_iters != 1 || sep != 13)
|
||||
__builtin_abort ();
|
||||
}
|
||||
return 0;
|
||||
}
|
Loading…
Add table
Reference in a new issue