111
|
1 /* General types and functions that are uselful for processing of OpenMP,
|
|
2 OpenACC and similar directivers at various stages of compilation.
|
|
3
|
145
|
4 Copyright (C) 2005-2020 Free Software Foundation, Inc.
|
111
|
5
|
|
6 This file is part of GCC.
|
|
7
|
|
8 GCC is free software; you can redistribute it and/or modify it under
|
|
9 the terms of the GNU General Public License as published by the Free
|
|
10 Software Foundation; either version 3, or (at your option) any later
|
|
11 version.
|
|
12
|
|
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
16 for more details.
|
|
17
|
|
18 You should have received a copy of the GNU General Public License
|
|
19 along with GCC; see the file COPYING3. If not see
|
|
20 <http://www.gnu.org/licenses/>. */
|
|
21
|
|
22 /* Find an OMP clause of type KIND within CLAUSES. */
|
|
23
|
|
24 #include "config.h"
|
|
25 #include "system.h"
|
|
26 #include "coretypes.h"
|
|
27 #include "backend.h"
|
|
28 #include "target.h"
|
|
29 #include "tree.h"
|
|
30 #include "gimple.h"
|
|
31 #include "ssa.h"
|
|
32 #include "diagnostic-core.h"
|
|
33 #include "fold-const.h"
|
|
34 #include "langhooks.h"
|
|
35 #include "omp-general.h"
|
|
36 #include "stringpool.h"
|
|
37 #include "attribs.h"
|
145
|
38 #include "gimplify.h"
|
|
39 #include "cgraph.h"
|
|
40 #include "alloc-pool.h"
|
|
41 #include "symbol-summary.h"
|
|
42 #include "hsa-common.h"
|
|
43 #include "tree-pass.h"
|
|
44 #include "omp-device-properties.h"
|
|
45
|
|
46 enum omp_requires omp_requires_mask;
|
111
|
47
|
|
48 tree
|
|
49 omp_find_clause (tree clauses, enum omp_clause_code kind)
|
|
50 {
|
|
51 for (; clauses ; clauses = OMP_CLAUSE_CHAIN (clauses))
|
|
52 if (OMP_CLAUSE_CODE (clauses) == kind)
|
|
53 return clauses;
|
|
54
|
|
55 return NULL_TREE;
|
|
56 }
|
|
57
|
145
|
58 /* True if OpenMP should regard this DECL as being a scalar which has Fortran's
|
|
59 allocatable or pointer attribute. */
|
|
60 bool
|
|
61 omp_is_allocatable_or_ptr (tree decl)
|
|
62 {
|
|
63 return lang_hooks.decls.omp_is_allocatable_or_ptr (decl);
|
|
64 }
|
|
65
|
|
66 /* Check whether this DECL belongs to a Fortran optional argument.
|
|
67 With 'for_present_check' set to false, decls which are optional parameters
|
|
68 themselve are returned as tree - or a NULL_TREE otherwise. Those decls are
|
|
69 always pointers. With 'for_present_check' set to true, the decl for checking
|
|
70 whether an argument is present is returned; for arguments with value
|
|
71 attribute this is the hidden argument and of BOOLEAN_TYPE. If the decl is
|
|
72 unrelated to optional arguments, NULL_TREE is returned. */
|
|
73
|
|
74 tree
|
|
75 omp_check_optional_argument (tree decl, bool for_present_check)
|
|
76 {
|
|
77 return lang_hooks.decls.omp_check_optional_argument (decl, for_present_check);
|
|
78 }
|
|
79
|
111
|
80 /* Return true if DECL is a reference type. */
|
|
81
|
|
82 bool
|
|
83 omp_is_reference (tree decl)
|
|
84 {
|
|
85 return lang_hooks.decls.omp_privatize_by_reference (decl);
|
|
86 }
|
|
87
|
145
|
88 /* Adjust *COND_CODE and *N2 so that the former is either LT_EXPR or GT_EXPR,
|
|
89 given that V is the loop index variable and STEP is loop step. */
|
111
|
90
|
|
91 void
|
145
|
92 omp_adjust_for_condition (location_t loc, enum tree_code *cond_code, tree *n2,
|
|
93 tree v, tree step)
|
111
|
94 {
|
|
95 switch (*cond_code)
|
|
96 {
|
|
97 case LT_EXPR:
|
|
98 case GT_EXPR:
|
145
|
99 break;
|
|
100
|
111
|
101 case NE_EXPR:
|
145
|
102 gcc_assert (TREE_CODE (step) == INTEGER_CST);
|
|
103 if (TREE_CODE (TREE_TYPE (v)) == INTEGER_TYPE)
|
|
104 {
|
|
105 if (integer_onep (step))
|
|
106 *cond_code = LT_EXPR;
|
|
107 else
|
|
108 {
|
|
109 gcc_assert (integer_minus_onep (step));
|
|
110 *cond_code = GT_EXPR;
|
|
111 }
|
|
112 }
|
|
113 else
|
|
114 {
|
|
115 tree unit = TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (v)));
|
|
116 gcc_assert (TREE_CODE (unit) == INTEGER_CST);
|
|
117 if (tree_int_cst_equal (unit, step))
|
|
118 *cond_code = LT_EXPR;
|
|
119 else
|
|
120 {
|
|
121 gcc_assert (wi::neg (wi::to_widest (unit))
|
|
122 == wi::to_widest (step));
|
|
123 *cond_code = GT_EXPR;
|
|
124 }
|
|
125 }
|
|
126
|
111
|
127 break;
|
145
|
128
|
111
|
129 case LE_EXPR:
|
|
130 if (POINTER_TYPE_P (TREE_TYPE (*n2)))
|
|
131 *n2 = fold_build_pointer_plus_hwi_loc (loc, *n2, 1);
|
|
132 else
|
|
133 *n2 = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (*n2), *n2,
|
|
134 build_int_cst (TREE_TYPE (*n2), 1));
|
|
135 *cond_code = LT_EXPR;
|
|
136 break;
|
|
137 case GE_EXPR:
|
|
138 if (POINTER_TYPE_P (TREE_TYPE (*n2)))
|
|
139 *n2 = fold_build_pointer_plus_hwi_loc (loc, *n2, -1);
|
|
140 else
|
|
141 *n2 = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (*n2), *n2,
|
|
142 build_int_cst (TREE_TYPE (*n2), 1));
|
|
143 *cond_code = GT_EXPR;
|
|
144 break;
|
|
145 default:
|
|
146 gcc_unreachable ();
|
|
147 }
|
|
148 }
|
|
149
|
|
150 /* Return the looping step from INCR, extracted from the step of a gimple omp
|
|
151 for statement. */
|
|
152
|
|
153 tree
|
|
154 omp_get_for_step_from_incr (location_t loc, tree incr)
|
|
155 {
|
|
156 tree step;
|
|
157 switch (TREE_CODE (incr))
|
|
158 {
|
|
159 case PLUS_EXPR:
|
|
160 step = TREE_OPERAND (incr, 1);
|
|
161 break;
|
|
162 case POINTER_PLUS_EXPR:
|
|
163 step = fold_convert (ssizetype, TREE_OPERAND (incr, 1));
|
|
164 break;
|
|
165 case MINUS_EXPR:
|
|
166 step = TREE_OPERAND (incr, 1);
|
|
167 step = fold_build1_loc (loc, NEGATE_EXPR, TREE_TYPE (step), step);
|
|
168 break;
|
|
169 default:
|
|
170 gcc_unreachable ();
|
|
171 }
|
|
172 return step;
|
|
173 }
|
|
174
|
|
175 /* Extract the header elements of parallel loop FOR_STMT and store
|
|
176 them into *FD. */
|
|
177
|
|
178 void
|
|
179 omp_extract_for_data (gomp_for *for_stmt, struct omp_for_data *fd,
|
|
180 struct omp_for_data_loop *loops)
|
|
181 {
|
|
182 tree t, var, *collapse_iter, *collapse_count;
|
|
183 tree count = NULL_TREE, iter_type = long_integer_type_node;
|
|
184 struct omp_for_data_loop *loop;
|
|
185 int i;
|
|
186 struct omp_for_data_loop dummy_loop;
|
|
187 location_t loc = gimple_location (for_stmt);
|
145
|
188 bool simd = gimple_omp_for_kind (for_stmt) == GF_OMP_FOR_KIND_SIMD;
|
111
|
189 bool distribute = gimple_omp_for_kind (for_stmt)
|
|
190 == GF_OMP_FOR_KIND_DISTRIBUTE;
|
|
191 bool taskloop = gimple_omp_for_kind (for_stmt)
|
|
192 == GF_OMP_FOR_KIND_TASKLOOP;
|
|
193 tree iterv, countv;
|
|
194
|
|
195 fd->for_stmt = for_stmt;
|
|
196 fd->pre = NULL;
|
|
197 fd->have_nowait = distribute || simd;
|
|
198 fd->have_ordered = false;
|
145
|
199 fd->have_reductemp = false;
|
|
200 fd->have_pointer_condtemp = false;
|
|
201 fd->have_scantemp = false;
|
|
202 fd->have_nonctrl_scantemp = false;
|
|
203 fd->lastprivate_conditional = 0;
|
111
|
204 fd->tiling = NULL_TREE;
|
|
205 fd->collapse = 1;
|
|
206 fd->ordered = 0;
|
|
207 fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC;
|
|
208 fd->sched_modifiers = 0;
|
|
209 fd->chunk_size = NULL_TREE;
|
|
210 fd->simd_schedule = false;
|
|
211 collapse_iter = NULL;
|
|
212 collapse_count = NULL;
|
|
213
|
|
214 for (t = gimple_omp_for_clauses (for_stmt); t ; t = OMP_CLAUSE_CHAIN (t))
|
|
215 switch (OMP_CLAUSE_CODE (t))
|
|
216 {
|
|
217 case OMP_CLAUSE_NOWAIT:
|
|
218 fd->have_nowait = true;
|
|
219 break;
|
|
220 case OMP_CLAUSE_ORDERED:
|
|
221 fd->have_ordered = true;
|
|
222 if (OMP_CLAUSE_ORDERED_EXPR (t))
|
|
223 fd->ordered = tree_to_shwi (OMP_CLAUSE_ORDERED_EXPR (t));
|
|
224 break;
|
|
225 case OMP_CLAUSE_SCHEDULE:
|
|
226 gcc_assert (!distribute && !taskloop);
|
|
227 fd->sched_kind
|
|
228 = (enum omp_clause_schedule_kind)
|
|
229 (OMP_CLAUSE_SCHEDULE_KIND (t) & OMP_CLAUSE_SCHEDULE_MASK);
|
|
230 fd->sched_modifiers = (OMP_CLAUSE_SCHEDULE_KIND (t)
|
|
231 & ~OMP_CLAUSE_SCHEDULE_MASK);
|
|
232 fd->chunk_size = OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t);
|
|
233 fd->simd_schedule = OMP_CLAUSE_SCHEDULE_SIMD (t);
|
|
234 break;
|
|
235 case OMP_CLAUSE_DIST_SCHEDULE:
|
|
236 gcc_assert (distribute);
|
|
237 fd->chunk_size = OMP_CLAUSE_DIST_SCHEDULE_CHUNK_EXPR (t);
|
|
238 break;
|
|
239 case OMP_CLAUSE_COLLAPSE:
|
|
240 fd->collapse = tree_to_shwi (OMP_CLAUSE_COLLAPSE_EXPR (t));
|
|
241 if (fd->collapse > 1)
|
|
242 {
|
|
243 collapse_iter = &OMP_CLAUSE_COLLAPSE_ITERVAR (t);
|
|
244 collapse_count = &OMP_CLAUSE_COLLAPSE_COUNT (t);
|
|
245 }
|
|
246 break;
|
|
247 case OMP_CLAUSE_TILE:
|
|
248 fd->tiling = OMP_CLAUSE_TILE_LIST (t);
|
|
249 fd->collapse = list_length (fd->tiling);
|
|
250 gcc_assert (fd->collapse);
|
|
251 collapse_iter = &OMP_CLAUSE_TILE_ITERVAR (t);
|
|
252 collapse_count = &OMP_CLAUSE_TILE_COUNT (t);
|
|
253 break;
|
145
|
254 case OMP_CLAUSE__REDUCTEMP_:
|
|
255 fd->have_reductemp = true;
|
|
256 break;
|
|
257 case OMP_CLAUSE_LASTPRIVATE:
|
|
258 if (OMP_CLAUSE_LASTPRIVATE_CONDITIONAL (t))
|
|
259 fd->lastprivate_conditional++;
|
|
260 break;
|
|
261 case OMP_CLAUSE__CONDTEMP_:
|
|
262 if (POINTER_TYPE_P (TREE_TYPE (OMP_CLAUSE_DECL (t))))
|
|
263 fd->have_pointer_condtemp = true;
|
|
264 break;
|
|
265 case OMP_CLAUSE__SCANTEMP_:
|
|
266 fd->have_scantemp = true;
|
|
267 if (!OMP_CLAUSE__SCANTEMP__ALLOC (t)
|
|
268 && !OMP_CLAUSE__SCANTEMP__CONTROL (t))
|
|
269 fd->have_nonctrl_scantemp = true;
|
|
270 break;
|
111
|
271 default:
|
|
272 break;
|
|
273 }
|
|
274
|
|
275 if (fd->collapse > 1 || fd->tiling)
|
|
276 fd->loops = loops;
|
|
277 else
|
|
278 fd->loops = &fd->loop;
|
|
279
|
|
280 if (fd->ordered && fd->collapse == 1 && loops != NULL)
|
|
281 {
|
|
282 fd->loops = loops;
|
|
283 iterv = NULL_TREE;
|
|
284 countv = NULL_TREE;
|
|
285 collapse_iter = &iterv;
|
|
286 collapse_count = &countv;
|
|
287 }
|
|
288
|
|
289 /* FIXME: for now map schedule(auto) to schedule(static).
|
|
290 There should be analysis to determine whether all iterations
|
|
291 are approximately the same amount of work (then schedule(static)
|
|
292 is best) or if it varies (then schedule(dynamic,N) is better). */
|
|
293 if (fd->sched_kind == OMP_CLAUSE_SCHEDULE_AUTO)
|
|
294 {
|
|
295 fd->sched_kind = OMP_CLAUSE_SCHEDULE_STATIC;
|
|
296 gcc_assert (fd->chunk_size == NULL);
|
|
297 }
|
|
298 gcc_assert ((fd->collapse == 1 && !fd->tiling) || collapse_iter != NULL);
|
|
299 if (taskloop)
|
|
300 fd->sched_kind = OMP_CLAUSE_SCHEDULE_RUNTIME;
|
|
301 if (fd->sched_kind == OMP_CLAUSE_SCHEDULE_RUNTIME)
|
|
302 gcc_assert (fd->chunk_size == NULL);
|
|
303 else if (fd->chunk_size == NULL)
|
|
304 {
|
|
305 /* We only need to compute a default chunk size for ordered
|
|
306 static loops and dynamic loops. */
|
|
307 if (fd->sched_kind != OMP_CLAUSE_SCHEDULE_STATIC
|
|
308 || fd->have_ordered)
|
|
309 fd->chunk_size = (fd->sched_kind == OMP_CLAUSE_SCHEDULE_STATIC)
|
|
310 ? integer_zero_node : integer_one_node;
|
|
311 }
|
|
312
|
|
313 int cnt = fd->ordered ? fd->ordered : fd->collapse;
|
|
314 for (i = 0; i < cnt; i++)
|
|
315 {
|
|
316 if (i == 0
|
|
317 && fd->collapse == 1
|
|
318 && !fd->tiling
|
|
319 && (fd->ordered == 0 || loops == NULL))
|
|
320 loop = &fd->loop;
|
|
321 else if (loops != NULL)
|
|
322 loop = loops + i;
|
|
323 else
|
|
324 loop = &dummy_loop;
|
|
325
|
|
326 loop->v = gimple_omp_for_index (for_stmt, i);
|
|
327 gcc_assert (SSA_VAR_P (loop->v));
|
|
328 gcc_assert (TREE_CODE (TREE_TYPE (loop->v)) == INTEGER_TYPE
|
|
329 || TREE_CODE (TREE_TYPE (loop->v)) == POINTER_TYPE);
|
|
330 var = TREE_CODE (loop->v) == SSA_NAME ? SSA_NAME_VAR (loop->v) : loop->v;
|
|
331 loop->n1 = gimple_omp_for_initial (for_stmt, i);
|
|
332
|
|
333 loop->cond_code = gimple_omp_for_cond (for_stmt, i);
|
|
334 loop->n2 = gimple_omp_for_final (for_stmt, i);
|
145
|
335 gcc_assert (loop->cond_code != NE_EXPR
|
|
336 || (gimple_omp_for_kind (for_stmt)
|
|
337 != GF_OMP_FOR_KIND_OACC_LOOP));
|
111
|
338
|
|
339 t = gimple_omp_for_incr (for_stmt, i);
|
|
340 gcc_assert (TREE_OPERAND (t, 0) == var);
|
|
341 loop->step = omp_get_for_step_from_incr (loc, t);
|
|
342
|
145
|
343 omp_adjust_for_condition (loc, &loop->cond_code, &loop->n2, loop->v,
|
|
344 loop->step);
|
|
345
|
111
|
346 if (simd
|
|
347 || (fd->sched_kind == OMP_CLAUSE_SCHEDULE_STATIC
|
|
348 && !fd->have_ordered))
|
|
349 {
|
|
350 if (fd->collapse == 1 && !fd->tiling)
|
|
351 iter_type = TREE_TYPE (loop->v);
|
|
352 else if (i == 0
|
|
353 || TYPE_PRECISION (iter_type)
|
|
354 < TYPE_PRECISION (TREE_TYPE (loop->v)))
|
|
355 iter_type
|
|
356 = build_nonstandard_integer_type
|
|
357 (TYPE_PRECISION (TREE_TYPE (loop->v)), 1);
|
|
358 }
|
|
359 else if (iter_type != long_long_unsigned_type_node)
|
|
360 {
|
|
361 if (POINTER_TYPE_P (TREE_TYPE (loop->v)))
|
|
362 iter_type = long_long_unsigned_type_node;
|
|
363 else if (TYPE_UNSIGNED (TREE_TYPE (loop->v))
|
|
364 && TYPE_PRECISION (TREE_TYPE (loop->v))
|
|
365 >= TYPE_PRECISION (iter_type))
|
|
366 {
|
|
367 tree n;
|
|
368
|
|
369 if (loop->cond_code == LT_EXPR)
|
145
|
370 n = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (loop->v),
|
|
371 loop->n2, loop->step);
|
111
|
372 else
|
|
373 n = loop->n1;
|
|
374 if (TREE_CODE (n) != INTEGER_CST
|
|
375 || tree_int_cst_lt (TYPE_MAX_VALUE (iter_type), n))
|
|
376 iter_type = long_long_unsigned_type_node;
|
|
377 }
|
|
378 else if (TYPE_PRECISION (TREE_TYPE (loop->v))
|
|
379 > TYPE_PRECISION (iter_type))
|
|
380 {
|
|
381 tree n1, n2;
|
|
382
|
|
383 if (loop->cond_code == LT_EXPR)
|
|
384 {
|
|
385 n1 = loop->n1;
|
145
|
386 n2 = fold_build2_loc (loc, PLUS_EXPR, TREE_TYPE (loop->v),
|
|
387 loop->n2, loop->step);
|
111
|
388 }
|
|
389 else
|
|
390 {
|
145
|
391 n1 = fold_build2_loc (loc, MINUS_EXPR, TREE_TYPE (loop->v),
|
|
392 loop->n2, loop->step);
|
111
|
393 n2 = loop->n1;
|
|
394 }
|
|
395 if (TREE_CODE (n1) != INTEGER_CST
|
|
396 || TREE_CODE (n2) != INTEGER_CST
|
|
397 || !tree_int_cst_lt (TYPE_MIN_VALUE (iter_type), n1)
|
|
398 || !tree_int_cst_lt (n2, TYPE_MAX_VALUE (iter_type)))
|
|
399 iter_type = long_long_unsigned_type_node;
|
|
400 }
|
|
401 }
|
|
402
|
|
403 if (i >= fd->collapse)
|
|
404 continue;
|
|
405
|
|
406 if (collapse_count && *collapse_count == NULL)
|
|
407 {
|
|
408 t = fold_binary (loop->cond_code, boolean_type_node,
|
|
409 fold_convert (TREE_TYPE (loop->v), loop->n1),
|
|
410 fold_convert (TREE_TYPE (loop->v), loop->n2));
|
|
411 if (t && integer_zerop (t))
|
|
412 count = build_zero_cst (long_long_unsigned_type_node);
|
|
413 else if ((i == 0 || count != NULL_TREE)
|
|
414 && TREE_CODE (TREE_TYPE (loop->v)) == INTEGER_TYPE
|
|
415 && TREE_CONSTANT (loop->n1)
|
|
416 && TREE_CONSTANT (loop->n2)
|
|
417 && TREE_CODE (loop->step) == INTEGER_CST)
|
|
418 {
|
|
419 tree itype = TREE_TYPE (loop->v);
|
|
420
|
|
421 if (POINTER_TYPE_P (itype))
|
|
422 itype = signed_type_for (itype);
|
|
423 t = build_int_cst (itype, (loop->cond_code == LT_EXPR ? -1 : 1));
|
145
|
424 t = fold_build2_loc (loc, PLUS_EXPR, itype,
|
|
425 fold_convert_loc (loc, itype, loop->step),
|
|
426 t);
|
111
|
427 t = fold_build2_loc (loc, PLUS_EXPR, itype, t,
|
145
|
428 fold_convert_loc (loc, itype, loop->n2));
|
111
|
429 t = fold_build2_loc (loc, MINUS_EXPR, itype, t,
|
145
|
430 fold_convert_loc (loc, itype, loop->n1));
|
111
|
431 if (TYPE_UNSIGNED (itype) && loop->cond_code == GT_EXPR)
|
145
|
432 {
|
|
433 tree step = fold_convert_loc (loc, itype, loop->step);
|
|
434 t = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype,
|
|
435 fold_build1_loc (loc, NEGATE_EXPR,
|
|
436 itype, t),
|
|
437 fold_build1_loc (loc, NEGATE_EXPR,
|
|
438 itype, step));
|
|
439 }
|
111
|
440 else
|
|
441 t = fold_build2_loc (loc, TRUNC_DIV_EXPR, itype, t,
|
145
|
442 fold_convert_loc (loc, itype,
|
|
443 loop->step));
|
111
|
444 t = fold_convert_loc (loc, long_long_unsigned_type_node, t);
|
|
445 if (count != NULL_TREE)
|
145
|
446 count = fold_build2_loc (loc, MULT_EXPR,
|
|
447 long_long_unsigned_type_node,
|
|
448 count, t);
|
111
|
449 else
|
|
450 count = t;
|
|
451 if (TREE_CODE (count) != INTEGER_CST)
|
|
452 count = NULL_TREE;
|
|
453 }
|
|
454 else if (count && !integer_zerop (count))
|
|
455 count = NULL_TREE;
|
|
456 }
|
|
457 }
|
|
458
|
|
459 if (count
|
|
460 && !simd
|
|
461 && (fd->sched_kind != OMP_CLAUSE_SCHEDULE_STATIC
|
|
462 || fd->have_ordered))
|
|
463 {
|
|
464 if (!tree_int_cst_lt (count, TYPE_MAX_VALUE (long_integer_type_node)))
|
|
465 iter_type = long_long_unsigned_type_node;
|
|
466 else
|
|
467 iter_type = long_integer_type_node;
|
|
468 }
|
|
469 else if (collapse_iter && *collapse_iter != NULL)
|
|
470 iter_type = TREE_TYPE (*collapse_iter);
|
|
471 fd->iter_type = iter_type;
|
|
472 if (collapse_iter && *collapse_iter == NULL)
|
|
473 *collapse_iter = create_tmp_var (iter_type, ".iter");
|
|
474 if (collapse_count && *collapse_count == NULL)
|
|
475 {
|
|
476 if (count)
|
|
477 *collapse_count = fold_convert_loc (loc, iter_type, count);
|
|
478 else
|
|
479 *collapse_count = create_tmp_var (iter_type, ".count");
|
|
480 }
|
|
481
|
|
482 if (fd->collapse > 1 || fd->tiling || (fd->ordered && loops))
|
|
483 {
|
|
484 fd->loop.v = *collapse_iter;
|
|
485 fd->loop.n1 = build_int_cst (TREE_TYPE (fd->loop.v), 0);
|
|
486 fd->loop.n2 = *collapse_count;
|
|
487 fd->loop.step = build_int_cst (TREE_TYPE (fd->loop.v), 1);
|
|
488 fd->loop.cond_code = LT_EXPR;
|
|
489 }
|
|
490 else if (loops)
|
|
491 loops[0] = fd->loop;
|
|
492 }
|
|
493
|
|
494 /* Build a call to GOMP_barrier. */
|
|
495
|
|
496 gimple *
|
|
497 omp_build_barrier (tree lhs)
|
|
498 {
|
|
499 tree fndecl = builtin_decl_explicit (lhs ? BUILT_IN_GOMP_BARRIER_CANCEL
|
|
500 : BUILT_IN_GOMP_BARRIER);
|
|
501 gcall *g = gimple_build_call (fndecl, 0);
|
|
502 if (lhs)
|
|
503 gimple_call_set_lhs (g, lhs);
|
|
504 return g;
|
|
505 }
|
|
506
|
|
507 /* Return maximum possible vectorization factor for the target. */
|
|
508
|
131
|
509 poly_uint64
|
111
|
510 omp_max_vf (void)
|
|
511 {
|
|
512 if (!optimize
|
|
513 || optimize_debug
|
|
514 || !flag_tree_loop_optimize
|
|
515 || (!flag_tree_loop_vectorize
|
|
516 && global_options_set.x_flag_tree_loop_vectorize))
|
|
517 return 1;
|
|
518
|
145
|
519 auto_vector_modes modes;
|
|
520 targetm.vectorize.autovectorize_vector_modes (&modes, true);
|
|
521 if (!modes.is_empty ())
|
111
|
522 {
|
131
|
523 poly_uint64 vf = 0;
|
145
|
524 for (unsigned int i = 0; i < modes.length (); ++i)
|
|
525 /* The returned modes use the smallest element size (and thus
|
|
526 the largest nunits) for the vectorization approach that they
|
|
527 represent. */
|
|
528 vf = ordered_max (vf, GET_MODE_NUNITS (modes[i]));
|
131
|
529 return vf;
|
111
|
530 }
|
131
|
531
|
|
532 machine_mode vqimode = targetm.vectorize.preferred_simd_mode (QImode);
|
|
533 if (GET_MODE_CLASS (vqimode) == MODE_VECTOR_INT)
|
|
534 return GET_MODE_NUNITS (vqimode);
|
|
535
|
|
536 return 1;
|
111
|
537 }
|
|
538
|
|
539 /* Return maximum SIMT width if offloading may target SIMT hardware. */
|
|
540
|
|
541 int
|
|
542 omp_max_simt_vf (void)
|
|
543 {
|
|
544 if (!optimize)
|
|
545 return 0;
|
|
546 if (ENABLE_OFFLOADING)
|
|
547 for (const char *c = getenv ("OFFLOAD_TARGET_NAMES"); c;)
|
|
548 {
|
|
549 if (!strncmp (c, "nvptx", strlen ("nvptx")))
|
|
550 return 32;
|
145
|
551 else if ((c = strchr (c, ':')))
|
111
|
552 c++;
|
|
553 }
|
|
554 return 0;
|
|
555 }
|
|
556
|
145
|
557 /* Store the construct selectors as tree codes from last to first,
|
|
558 return their number. */
|
|
559
|
|
560 int
|
|
561 omp_constructor_traits_to_codes (tree ctx, enum tree_code *constructs)
|
|
562 {
|
|
563 int nconstructs = list_length (ctx);
|
|
564 int i = nconstructs - 1;
|
|
565 for (tree t2 = ctx; t2; t2 = TREE_CHAIN (t2), i--)
|
|
566 {
|
|
567 const char *sel = IDENTIFIER_POINTER (TREE_PURPOSE (t2));
|
|
568 if (!strcmp (sel, "target"))
|
|
569 constructs[i] = OMP_TARGET;
|
|
570 else if (!strcmp (sel, "teams"))
|
|
571 constructs[i] = OMP_TEAMS;
|
|
572 else if (!strcmp (sel, "parallel"))
|
|
573 constructs[i] = OMP_PARALLEL;
|
|
574 else if (!strcmp (sel, "for") || !strcmp (sel, "do"))
|
|
575 constructs[i] = OMP_FOR;
|
|
576 else if (!strcmp (sel, "simd"))
|
|
577 constructs[i] = OMP_SIMD;
|
|
578 else
|
|
579 gcc_unreachable ();
|
|
580 }
|
|
581 gcc_assert (i == -1);
|
|
582 return nconstructs;
|
|
583 }
|
|
584
|
|
585 /* Return true if PROP is possibly present in one of the offloading target's
|
|
586 OpenMP contexts. The format of PROPS string is always offloading target's
|
|
587 name terminated by '\0', followed by properties for that offloading
|
|
588 target separated by '\0' and terminated by another '\0'. The strings
|
|
589 are created from omp-device-properties installed files of all configured
|
|
590 offloading targets. */
|
|
591
|
|
592 static bool
|
|
593 omp_offload_device_kind_arch_isa (const char *props, const char *prop)
|
|
594 {
|
|
595 const char *names = getenv ("OFFLOAD_TARGET_NAMES");
|
|
596 if (names == NULL || *names == '\0')
|
|
597 return false;
|
|
598 while (*props != '\0')
|
|
599 {
|
|
600 size_t name_len = strlen (props);
|
|
601 bool matches = false;
|
|
602 for (const char *c = names; c; )
|
|
603 {
|
|
604 if (strncmp (props, c, name_len) == 0
|
|
605 && (c[name_len] == '\0'
|
|
606 || c[name_len] == ':'
|
|
607 || c[name_len] == '='))
|
|
608 {
|
|
609 matches = true;
|
|
610 break;
|
|
611 }
|
|
612 else if ((c = strchr (c, ':')))
|
|
613 c++;
|
|
614 }
|
|
615 props = props + name_len + 1;
|
|
616 while (*props != '\0')
|
|
617 {
|
|
618 if (matches && strcmp (props, prop) == 0)
|
|
619 return true;
|
|
620 props = strchr (props, '\0') + 1;
|
|
621 }
|
|
622 props++;
|
|
623 }
|
|
624 return false;
|
|
625 }
|
|
626
|
|
627 /* Return true if the current code location is or might be offloaded.
|
|
628 Return true in declare target functions, or when nested in a target
|
|
629 region or when unsure, return false otherwise. */
|
|
630
|
|
631 static bool
|
|
632 omp_maybe_offloaded (void)
|
|
633 {
|
|
634 if (!hsa_gen_requested_p ())
|
|
635 {
|
|
636 if (!ENABLE_OFFLOADING)
|
|
637 return false;
|
|
638 const char *names = getenv ("OFFLOAD_TARGET_NAMES");
|
|
639 if (names == NULL || *names == '\0')
|
|
640 return false;
|
|
641 }
|
|
642 if (symtab->state == PARSING)
|
|
643 /* Maybe. */
|
|
644 return true;
|
|
645 if (current_function_decl
|
|
646 && lookup_attribute ("omp declare target",
|
|
647 DECL_ATTRIBUTES (current_function_decl)))
|
|
648 return true;
|
|
649 if (cfun && (cfun->curr_properties & PROP_gimple_any) == 0)
|
|
650 {
|
|
651 enum tree_code construct = OMP_TARGET;
|
|
652 if (omp_construct_selector_matches (&construct, 1, NULL))
|
|
653 return true;
|
|
654 }
|
|
655 return false;
|
|
656 }
|
|
657
|
|
658 /* Return a name from PROP, a property in selectors accepting
|
|
659 name lists. */
|
|
660
|
|
661 static const char *
|
|
662 omp_context_name_list_prop (tree prop)
|
|
663 {
|
|
664 if (TREE_PURPOSE (prop))
|
|
665 return IDENTIFIER_POINTER (TREE_PURPOSE (prop));
|
|
666 else
|
|
667 {
|
|
668 const char *ret = TREE_STRING_POINTER (TREE_VALUE (prop));
|
|
669 if ((size_t) TREE_STRING_LENGTH (TREE_VALUE (prop)) == strlen (ret) + 1)
|
|
670 return ret;
|
|
671 return NULL;
|
|
672 }
|
|
673 }
|
|
674
|
|
675 /* Return 1 if context selector matches the current OpenMP context, 0
|
|
676 if it does not and -1 if it is unknown and need to be determined later.
|
|
677 Some properties can be checked right away during parsing (this routine),
|
|
678 others need to wait until the whole TU is parsed, others need to wait until
|
|
679 IPA, others until vectorization. */
|
|
680
|
|
681 int
|
|
682 omp_context_selector_matches (tree ctx)
|
|
683 {
|
|
684 int ret = 1;
|
|
685 for (tree t1 = ctx; t1; t1 = TREE_CHAIN (t1))
|
|
686 {
|
|
687 char set = IDENTIFIER_POINTER (TREE_PURPOSE (t1))[0];
|
|
688 if (set == 'c')
|
|
689 {
|
|
690 /* For now, ignore the construct set. While something can be
|
|
691 determined already during parsing, we don't know until end of TU
|
|
692 whether additional constructs aren't added through declare variant
|
|
693 unless "omp declare variant variant" attribute exists already
|
|
694 (so in most of the cases), and we'd need to maintain set of
|
|
695 surrounding OpenMP constructs, which is better handled during
|
|
696 gimplification. */
|
|
697 if (symtab->state == PARSING
|
|
698 || (cfun->curr_properties & PROP_gimple_any) != 0)
|
|
699 {
|
|
700 ret = -1;
|
|
701 continue;
|
|
702 }
|
|
703
|
|
704 enum tree_code constructs[5];
|
|
705 int nconstructs
|
|
706 = omp_constructor_traits_to_codes (TREE_VALUE (t1), constructs);
|
|
707 int r = omp_construct_selector_matches (constructs, nconstructs,
|
|
708 NULL);
|
|
709 if (r == 0)
|
|
710 return 0;
|
|
711 if (r == -1)
|
|
712 ret = -1;
|
|
713 continue;
|
|
714 }
|
|
715 for (tree t2 = TREE_VALUE (t1); t2; t2 = TREE_CHAIN (t2))
|
|
716 {
|
|
717 const char *sel = IDENTIFIER_POINTER (TREE_PURPOSE (t2));
|
|
718 switch (*sel)
|
|
719 {
|
|
720 case 'v':
|
|
721 if (set == 'i' && !strcmp (sel, "vendor"))
|
|
722 for (tree t3 = TREE_VALUE (t2); t3; t3 = TREE_CHAIN (t3))
|
|
723 {
|
|
724 const char *prop = omp_context_name_list_prop (t3);
|
|
725 if (prop == NULL)
|
|
726 return 0;
|
|
727 if ((!strcmp (prop, " score") && TREE_PURPOSE (t3))
|
|
728 || !strcmp (prop, "gnu"))
|
|
729 continue;
|
|
730 return 0;
|
|
731 }
|
|
732 break;
|
|
733 case 'e':
|
|
734 if (set == 'i' && !strcmp (sel, "extension"))
|
|
735 /* We don't support any extensions right now. */
|
|
736 return 0;
|
|
737 break;
|
|
738 case 'a':
|
|
739 if (set == 'i' && !strcmp (sel, "atomic_default_mem_order"))
|
|
740 {
|
|
741 enum omp_memory_order omo
|
|
742 = ((enum omp_memory_order)
|
|
743 (omp_requires_mask
|
|
744 & OMP_REQUIRES_ATOMIC_DEFAULT_MEM_ORDER));
|
|
745 if (omo == OMP_MEMORY_ORDER_UNSPECIFIED)
|
|
746 {
|
|
747 /* We don't know yet, until end of TU. */
|
|
748 if (symtab->state == PARSING)
|
|
749 {
|
|
750 ret = -1;
|
|
751 break;
|
|
752 }
|
|
753 else
|
|
754 omo = OMP_MEMORY_ORDER_RELAXED;
|
|
755 }
|
|
756 tree t3 = TREE_VALUE (t2);
|
|
757 const char *prop = IDENTIFIER_POINTER (TREE_PURPOSE (t3));
|
|
758 if (!strcmp (prop, " score"))
|
|
759 {
|
|
760 t3 = TREE_CHAIN (t3);
|
|
761 prop = IDENTIFIER_POINTER (TREE_PURPOSE (t3));
|
|
762 }
|
|
763 if (!strcmp (prop, "relaxed")
|
|
764 && omo != OMP_MEMORY_ORDER_RELAXED)
|
|
765 return 0;
|
|
766 else if (!strcmp (prop, "seq_cst")
|
|
767 && omo != OMP_MEMORY_ORDER_SEQ_CST)
|
|
768 return 0;
|
|
769 else if (!strcmp (prop, "acq_rel")
|
|
770 && omo != OMP_MEMORY_ORDER_ACQ_REL)
|
|
771 return 0;
|
|
772 }
|
|
773 if (set == 'd' && !strcmp (sel, "arch"))
|
|
774 for (tree t3 = TREE_VALUE (t2); t3; t3 = TREE_CHAIN (t3))
|
|
775 {
|
|
776 const char *arch = omp_context_name_list_prop (t3);
|
|
777 if (arch == NULL)
|
|
778 return 0;
|
|
779 int r = 0;
|
|
780 if (targetm.omp.device_kind_arch_isa != NULL)
|
|
781 r = targetm.omp.device_kind_arch_isa (omp_device_arch,
|
|
782 arch);
|
|
783 if (r == 0 || (r == -1 && symtab->state != PARSING))
|
|
784 {
|
|
785 /* If we are or might be in a target region or
|
|
786 declare target function, need to take into account
|
|
787 also offloading values. */
|
|
788 if (!omp_maybe_offloaded ())
|
|
789 return 0;
|
|
790 if (strcmp (arch, "hsa") == 0
|
|
791 && hsa_gen_requested_p ())
|
|
792 {
|
|
793 ret = -1;
|
|
794 continue;
|
|
795 }
|
|
796 if (ENABLE_OFFLOADING)
|
|
797 {
|
|
798 const char *arches = omp_offload_device_arch;
|
|
799 if (omp_offload_device_kind_arch_isa (arches,
|
|
800 arch))
|
|
801 {
|
|
802 ret = -1;
|
|
803 continue;
|
|
804 }
|
|
805 }
|
|
806 return 0;
|
|
807 }
|
|
808 else if (r == -1)
|
|
809 ret = -1;
|
|
810 /* If arch matches on the host, it still might not match
|
|
811 in the offloading region. */
|
|
812 else if (omp_maybe_offloaded ())
|
|
813 ret = -1;
|
|
814 }
|
|
815 break;
|
|
816 case 'u':
|
|
817 if (set == 'i' && !strcmp (sel, "unified_address"))
|
|
818 {
|
|
819 if ((omp_requires_mask & OMP_REQUIRES_UNIFIED_ADDRESS) == 0)
|
|
820 {
|
|
821 if (symtab->state == PARSING)
|
|
822 ret = -1;
|
|
823 else
|
|
824 return 0;
|
|
825 }
|
|
826 break;
|
|
827 }
|
|
828 if (set == 'i' && !strcmp (sel, "unified_shared_memory"))
|
|
829 {
|
|
830 if ((omp_requires_mask
|
|
831 & OMP_REQUIRES_UNIFIED_SHARED_MEMORY) == 0)
|
|
832 {
|
|
833 if (symtab->state == PARSING)
|
|
834 ret = -1;
|
|
835 else
|
|
836 return 0;
|
|
837 }
|
|
838 break;
|
|
839 }
|
|
840 break;
|
|
841 case 'd':
|
|
842 if (set == 'i' && !strcmp (sel, "dynamic_allocators"))
|
|
843 {
|
|
844 if ((omp_requires_mask
|
|
845 & OMP_REQUIRES_DYNAMIC_ALLOCATORS) == 0)
|
|
846 {
|
|
847 if (symtab->state == PARSING)
|
|
848 ret = -1;
|
|
849 else
|
|
850 return 0;
|
|
851 }
|
|
852 break;
|
|
853 }
|
|
854 break;
|
|
855 case 'r':
|
|
856 if (set == 'i' && !strcmp (sel, "reverse_offload"))
|
|
857 {
|
|
858 if ((omp_requires_mask & OMP_REQUIRES_REVERSE_OFFLOAD) == 0)
|
|
859 {
|
|
860 if (symtab->state == PARSING)
|
|
861 ret = -1;
|
|
862 else
|
|
863 return 0;
|
|
864 }
|
|
865 break;
|
|
866 }
|
|
867 break;
|
|
868 case 'k':
|
|
869 if (set == 'd' && !strcmp (sel, "kind"))
|
|
870 for (tree t3 = TREE_VALUE (t2); t3; t3 = TREE_CHAIN (t3))
|
|
871 {
|
|
872 const char *prop = omp_context_name_list_prop (t3);
|
|
873 if (prop == NULL)
|
|
874 return 0;
|
|
875 if (!strcmp (prop, "any"))
|
|
876 continue;
|
|
877 if (!strcmp (prop, "host"))
|
|
878 {
|
|
879 if (omp_maybe_offloaded ())
|
|
880 ret = -1;
|
|
881 continue;
|
|
882 }
|
|
883 if (!strcmp (prop, "nohost"))
|
|
884 {
|
|
885 if (omp_maybe_offloaded ())
|
|
886 ret = -1;
|
|
887 else
|
|
888 return 0;
|
|
889 continue;
|
|
890 }
|
|
891 int r = 0;
|
|
892 if (targetm.omp.device_kind_arch_isa != NULL)
|
|
893 r = targetm.omp.device_kind_arch_isa (omp_device_kind,
|
|
894 prop);
|
|
895 else
|
|
896 r = strcmp (prop, "cpu") == 0;
|
|
897 if (r == 0 || (r == -1 && symtab->state != PARSING))
|
|
898 {
|
|
899 /* If we are or might be in a target region or
|
|
900 declare target function, need to take into account
|
|
901 also offloading values. */
|
|
902 if (!omp_maybe_offloaded ())
|
|
903 return 0;
|
|
904 if (strcmp (prop, "gpu") == 0
|
|
905 && hsa_gen_requested_p ())
|
|
906 {
|
|
907 ret = -1;
|
|
908 continue;
|
|
909 }
|
|
910 if (ENABLE_OFFLOADING)
|
|
911 {
|
|
912 const char *kinds = omp_offload_device_kind;
|
|
913 if (omp_offload_device_kind_arch_isa (kinds, prop))
|
|
914 {
|
|
915 ret = -1;
|
|
916 continue;
|
|
917 }
|
|
918 }
|
|
919 return 0;
|
|
920 }
|
|
921 else if (r == -1)
|
|
922 ret = -1;
|
|
923 /* If kind matches on the host, it still might not match
|
|
924 in the offloading region. */
|
|
925 else if (omp_maybe_offloaded ())
|
|
926 ret = -1;
|
|
927 }
|
|
928 break;
|
|
929 case 'i':
|
|
930 if (set == 'd' && !strcmp (sel, "isa"))
|
|
931 for (tree t3 = TREE_VALUE (t2); t3; t3 = TREE_CHAIN (t3))
|
|
932 {
|
|
933 const char *isa = omp_context_name_list_prop (t3);
|
|
934 if (isa == NULL)
|
|
935 return 0;
|
|
936 int r = 0;
|
|
937 if (targetm.omp.device_kind_arch_isa != NULL)
|
|
938 r = targetm.omp.device_kind_arch_isa (omp_device_isa,
|
|
939 isa);
|
|
940 if (r == 0 || (r == -1 && symtab->state != PARSING))
|
|
941 {
|
|
942 /* If isa is valid on the target, but not in the
|
|
943 current function and current function has
|
|
944 #pragma omp declare simd on it, some simd clones
|
|
945 might have the isa added later on. */
|
|
946 if (r == -1
|
|
947 && targetm.simd_clone.compute_vecsize_and_simdlen)
|
|
948 {
|
|
949 tree attrs
|
|
950 = DECL_ATTRIBUTES (current_function_decl);
|
|
951 if (lookup_attribute ("omp declare simd", attrs))
|
|
952 {
|
|
953 ret = -1;
|
|
954 continue;
|
|
955 }
|
|
956 }
|
|
957 /* If we are or might be in a target region or
|
|
958 declare target function, need to take into account
|
|
959 also offloading values. */
|
|
960 if (!omp_maybe_offloaded ())
|
|
961 return 0;
|
|
962 if (ENABLE_OFFLOADING)
|
|
963 {
|
|
964 const char *isas = omp_offload_device_isa;
|
|
965 if (omp_offload_device_kind_arch_isa (isas, isa))
|
|
966 {
|
|
967 ret = -1;
|
|
968 continue;
|
|
969 }
|
|
970 }
|
|
971 return 0;
|
|
972 }
|
|
973 else if (r == -1)
|
|
974 ret = -1;
|
|
975 /* If isa matches on the host, it still might not match
|
|
976 in the offloading region. */
|
|
977 else if (omp_maybe_offloaded ())
|
|
978 ret = -1;
|
|
979 }
|
|
980 break;
|
|
981 case 'c':
|
|
982 if (set == 'u' && !strcmp (sel, "condition"))
|
|
983 for (tree t3 = TREE_VALUE (t2); t3; t3 = TREE_CHAIN (t3))
|
|
984 if (TREE_PURPOSE (t3) == NULL_TREE)
|
|
985 {
|
|
986 if (integer_zerop (TREE_VALUE (t3)))
|
|
987 return 0;
|
|
988 if (integer_nonzerop (TREE_VALUE (t3)))
|
|
989 break;
|
|
990 ret = -1;
|
|
991 }
|
|
992 break;
|
|
993 default:
|
|
994 break;
|
|
995 }
|
|
996 }
|
|
997 }
|
|
998 return ret;
|
|
999 }
|
|
1000
|
|
1001 /* Compare construct={simd} CLAUSES1 with CLAUSES2, return 0/-1/1/2 as
|
|
1002 in omp_context_selector_set_compare. */
|
|
1003
|
|
1004 static int
|
|
1005 omp_construct_simd_compare (tree clauses1, tree clauses2)
|
|
1006 {
|
|
1007 if (clauses1 == NULL_TREE)
|
|
1008 return clauses2 == NULL_TREE ? 0 : -1;
|
|
1009 if (clauses2 == NULL_TREE)
|
|
1010 return 1;
|
|
1011
|
|
1012 int r = 0;
|
|
1013 struct declare_variant_simd_data {
|
|
1014 bool inbranch, notinbranch;
|
|
1015 tree simdlen;
|
|
1016 auto_vec<tree,16> data_sharing;
|
|
1017 auto_vec<tree,16> aligned;
|
|
1018 declare_variant_simd_data ()
|
|
1019 : inbranch(false), notinbranch(false), simdlen(NULL_TREE) {}
|
|
1020 } data[2];
|
|
1021 unsigned int i;
|
|
1022 for (i = 0; i < 2; i++)
|
|
1023 for (tree c = i ? clauses2 : clauses1; c; c = OMP_CLAUSE_CHAIN (c))
|
|
1024 {
|
|
1025 vec<tree> *v;
|
|
1026 switch (OMP_CLAUSE_CODE (c))
|
|
1027 {
|
|
1028 case OMP_CLAUSE_INBRANCH:
|
|
1029 data[i].inbranch = true;
|
|
1030 continue;
|
|
1031 case OMP_CLAUSE_NOTINBRANCH:
|
|
1032 data[i].notinbranch = true;
|
|
1033 continue;
|
|
1034 case OMP_CLAUSE_SIMDLEN:
|
|
1035 data[i].simdlen = OMP_CLAUSE_SIMDLEN_EXPR (c);
|
|
1036 continue;
|
|
1037 case OMP_CLAUSE_UNIFORM:
|
|
1038 case OMP_CLAUSE_LINEAR:
|
|
1039 v = &data[i].data_sharing;
|
|
1040 break;
|
|
1041 case OMP_CLAUSE_ALIGNED:
|
|
1042 v = &data[i].aligned;
|
|
1043 break;
|
|
1044 default:
|
|
1045 gcc_unreachable ();
|
|
1046 }
|
|
1047 unsigned HOST_WIDE_INT argno = tree_to_uhwi (OMP_CLAUSE_DECL (c));
|
|
1048 if (argno >= v->length ())
|
|
1049 v->safe_grow_cleared (argno + 1);
|
|
1050 (*v)[argno] = c;
|
|
1051 }
|
|
1052 /* Here, r is used as a bitmask, 2 is set if CLAUSES1 has something
|
|
1053 CLAUSES2 doesn't, 1 is set if CLAUSES2 has something CLAUSES1
|
|
1054 doesn't. Thus, r == 3 implies return value 2, r == 1 implies
|
|
1055 -1, r == 2 implies 1 and r == 0 implies 0. */
|
|
1056 if (data[0].inbranch != data[1].inbranch)
|
|
1057 r |= data[0].inbranch ? 2 : 1;
|
|
1058 if (data[0].notinbranch != data[1].notinbranch)
|
|
1059 r |= data[0].notinbranch ? 2 : 1;
|
|
1060 if (!simple_cst_equal (data[0].simdlen, data[1].simdlen))
|
|
1061 {
|
|
1062 if (data[0].simdlen && data[1].simdlen)
|
|
1063 return 2;
|
|
1064 r |= data[0].simdlen ? 2 : 1;
|
|
1065 }
|
|
1066 if (data[0].data_sharing.length () < data[1].data_sharing.length ()
|
|
1067 || data[0].aligned.length () < data[1].aligned.length ())
|
|
1068 r |= 1;
|
|
1069 tree c1, c2;
|
|
1070 FOR_EACH_VEC_ELT (data[0].data_sharing, i, c1)
|
|
1071 {
|
|
1072 c2 = (i < data[1].data_sharing.length ()
|
|
1073 ? data[1].data_sharing[i] : NULL_TREE);
|
|
1074 if ((c1 == NULL_TREE) != (c2 == NULL_TREE))
|
|
1075 {
|
|
1076 r |= c1 != NULL_TREE ? 2 : 1;
|
|
1077 continue;
|
|
1078 }
|
|
1079 if (c1 == NULL_TREE)
|
|
1080 continue;
|
|
1081 if (OMP_CLAUSE_CODE (c1) != OMP_CLAUSE_CODE (c2))
|
|
1082 return 2;
|
|
1083 if (OMP_CLAUSE_CODE (c1) != OMP_CLAUSE_LINEAR)
|
|
1084 continue;
|
|
1085 if (OMP_CLAUSE_LINEAR_VARIABLE_STRIDE (c1)
|
|
1086 != OMP_CLAUSE_LINEAR_VARIABLE_STRIDE (c2))
|
|
1087 return 2;
|
|
1088 if (OMP_CLAUSE_LINEAR_KIND (c1) != OMP_CLAUSE_LINEAR_KIND (c2))
|
|
1089 return 2;
|
|
1090 if (!simple_cst_equal (OMP_CLAUSE_LINEAR_STEP (c1),
|
|
1091 OMP_CLAUSE_LINEAR_STEP (c2)))
|
|
1092 return 2;
|
|
1093 }
|
|
1094 FOR_EACH_VEC_ELT (data[0].aligned, i, c1)
|
|
1095 {
|
|
1096 c2 = i < data[1].aligned.length () ? data[1].aligned[i] : NULL_TREE;
|
|
1097 if ((c1 == NULL_TREE) != (c2 == NULL_TREE))
|
|
1098 {
|
|
1099 r |= c1 != NULL_TREE ? 2 : 1;
|
|
1100 continue;
|
|
1101 }
|
|
1102 if (c1 == NULL_TREE)
|
|
1103 continue;
|
|
1104 if (!simple_cst_equal (OMP_CLAUSE_ALIGNED_ALIGNMENT (c1),
|
|
1105 OMP_CLAUSE_ALIGNED_ALIGNMENT (c2)))
|
|
1106 return 2;
|
|
1107 }
|
|
1108 switch (r)
|
|
1109 {
|
|
1110 case 0: return 0;
|
|
1111 case 1: return -1;
|
|
1112 case 2: return 1;
|
|
1113 case 3: return 2;
|
|
1114 default: gcc_unreachable ();
|
|
1115 }
|
|
1116 }
|
|
1117
|
|
1118 /* Compare properties of selectors SEL from SET other than construct.
|
|
1119 Return 0/-1/1/2 as in omp_context_selector_set_compare.
|
|
1120 Unlike set names or selector names, properties can have duplicates. */
|
|
1121
|
|
1122 static int
|
|
1123 omp_context_selector_props_compare (const char *set, const char *sel,
|
|
1124 tree ctx1, tree ctx2)
|
|
1125 {
|
|
1126 int ret = 0;
|
|
1127 for (int pass = 0; pass < 2; pass++)
|
|
1128 for (tree t1 = pass ? ctx2 : ctx1; t1; t1 = TREE_CHAIN (t1))
|
|
1129 {
|
|
1130 tree t2;
|
|
1131 for (t2 = pass ? ctx1 : ctx2; t2; t2 = TREE_CHAIN (t2))
|
|
1132 if (TREE_PURPOSE (t1) == TREE_PURPOSE (t2))
|
|
1133 {
|
|
1134 if (TREE_PURPOSE (t1) == NULL_TREE)
|
|
1135 {
|
|
1136 if (set[0] == 'u' && strcmp (sel, "condition") == 0)
|
|
1137 {
|
|
1138 if (integer_zerop (TREE_VALUE (t1))
|
|
1139 != integer_zerop (TREE_VALUE (t2)))
|
|
1140 return 2;
|
|
1141 break;
|
|
1142 }
|
|
1143 if (simple_cst_equal (TREE_VALUE (t1), TREE_VALUE (t2)))
|
|
1144 break;
|
|
1145 }
|
|
1146 else if (strcmp (IDENTIFIER_POINTER (TREE_PURPOSE (t1)),
|
|
1147 " score") == 0)
|
|
1148 {
|
|
1149 if (!simple_cst_equal (TREE_VALUE (t1), TREE_VALUE (t2)))
|
|
1150 return 2;
|
|
1151 break;
|
|
1152 }
|
|
1153 else
|
|
1154 break;
|
|
1155 }
|
|
1156 else if (TREE_PURPOSE (t1)
|
|
1157 && TREE_PURPOSE (t2) == NULL_TREE
|
|
1158 && TREE_CODE (TREE_VALUE (t2)) == STRING_CST)
|
|
1159 {
|
|
1160 const char *p1 = omp_context_name_list_prop (t1);
|
|
1161 const char *p2 = omp_context_name_list_prop (t2);
|
|
1162 if (p2
|
|
1163 && strcmp (p1, p2) == 0
|
|
1164 && strcmp (p1, " score"))
|
|
1165 break;
|
|
1166 }
|
|
1167 else if (TREE_PURPOSE (t1) == NULL_TREE
|
|
1168 && TREE_PURPOSE (t2)
|
|
1169 && TREE_CODE (TREE_VALUE (t1)) == STRING_CST)
|
|
1170 {
|
|
1171 const char *p1 = omp_context_name_list_prop (t1);
|
|
1172 const char *p2 = omp_context_name_list_prop (t2);
|
|
1173 if (p1
|
|
1174 && strcmp (p1, p2) == 0
|
|
1175 && strcmp (p1, " score"))
|
|
1176 break;
|
|
1177 }
|
|
1178 if (t2 == NULL_TREE)
|
|
1179 {
|
|
1180 int r = pass ? -1 : 1;
|
|
1181 if (ret && ret != r)
|
|
1182 return 2;
|
|
1183 else if (pass)
|
|
1184 return r;
|
|
1185 else
|
|
1186 {
|
|
1187 ret = r;
|
|
1188 break;
|
|
1189 }
|
|
1190 }
|
|
1191 }
|
|
1192 return ret;
|
|
1193 }
|
|
1194
|
|
1195 /* Compare single context selector sets CTX1 and CTX2 with SET name.
|
|
1196 Return 0 if CTX1 is equal to CTX2,
|
|
1197 -1 if CTX1 is a strict subset of CTX2,
|
|
1198 1 if CTX2 is a strict subset of CTX1, or
|
|
1199 2 if neither context is a subset of another one. */
|
|
1200
|
|
1201 int
|
|
1202 omp_context_selector_set_compare (const char *set, tree ctx1, tree ctx2)
|
|
1203 {
|
|
1204 bool swapped = false;
|
|
1205 int ret = 0;
|
|
1206 int len1 = list_length (ctx1);
|
|
1207 int len2 = list_length (ctx2);
|
|
1208 int cnt = 0;
|
|
1209 if (len1 < len2)
|
|
1210 {
|
|
1211 swapped = true;
|
|
1212 std::swap (ctx1, ctx2);
|
|
1213 std::swap (len1, len2);
|
|
1214 }
|
|
1215 if (set[0] == 'c')
|
|
1216 {
|
|
1217 tree t1;
|
|
1218 tree t2 = ctx2;
|
|
1219 tree simd = get_identifier ("simd");
|
|
1220 /* Handle construct set specially. In this case the order
|
|
1221 of the selector matters too. */
|
|
1222 for (t1 = ctx1; t1; t1 = TREE_CHAIN (t1))
|
|
1223 if (TREE_PURPOSE (t1) == TREE_PURPOSE (t2))
|
|
1224 {
|
|
1225 int r = 0;
|
|
1226 if (TREE_PURPOSE (t1) == simd)
|
|
1227 r = omp_construct_simd_compare (TREE_VALUE (t1),
|
|
1228 TREE_VALUE (t2));
|
|
1229 if (r == 2 || (ret && r && (ret < 0) != (r < 0)))
|
|
1230 return 2;
|
|
1231 if (ret == 0)
|
|
1232 ret = r;
|
|
1233 t2 = TREE_CHAIN (t2);
|
|
1234 if (t2 == NULL_TREE)
|
|
1235 {
|
|
1236 t1 = TREE_CHAIN (t1);
|
|
1237 break;
|
|
1238 }
|
|
1239 }
|
|
1240 else if (ret < 0)
|
|
1241 return 2;
|
|
1242 else
|
|
1243 ret = 1;
|
|
1244 if (t2 != NULL_TREE)
|
|
1245 return 2;
|
|
1246 if (t1 != NULL_TREE)
|
|
1247 {
|
|
1248 if (ret < 0)
|
|
1249 return 2;
|
|
1250 ret = 1;
|
|
1251 }
|
|
1252 if (ret == 0)
|
|
1253 return 0;
|
|
1254 return swapped ? -ret : ret;
|
|
1255 }
|
|
1256 for (tree t1 = ctx1; t1; t1 = TREE_CHAIN (t1))
|
|
1257 {
|
|
1258 tree t2;
|
|
1259 for (t2 = ctx2; t2; t2 = TREE_CHAIN (t2))
|
|
1260 if (TREE_PURPOSE (t1) == TREE_PURPOSE (t2))
|
|
1261 {
|
|
1262 const char *sel = IDENTIFIER_POINTER (TREE_PURPOSE (t1));
|
|
1263 int r = omp_context_selector_props_compare (set, sel,
|
|
1264 TREE_VALUE (t1),
|
|
1265 TREE_VALUE (t2));
|
|
1266 if (r == 2 || (ret && r && (ret < 0) != (r < 0)))
|
|
1267 return 2;
|
|
1268 if (ret == 0)
|
|
1269 ret = r;
|
|
1270 cnt++;
|
|
1271 break;
|
|
1272 }
|
|
1273 if (t2 == NULL_TREE)
|
|
1274 {
|
|
1275 if (ret == -1)
|
|
1276 return 2;
|
|
1277 ret = 1;
|
|
1278 }
|
|
1279 }
|
|
1280 if (cnt < len2)
|
|
1281 return 2;
|
|
1282 if (ret == 0)
|
|
1283 return 0;
|
|
1284 return swapped ? -ret : ret;
|
|
1285 }
|
|
1286
|
|
1287 /* Compare whole context selector specification CTX1 and CTX2.
|
|
1288 Return 0 if CTX1 is equal to CTX2,
|
|
1289 -1 if CTX1 is a strict subset of CTX2,
|
|
1290 1 if CTX2 is a strict subset of CTX1, or
|
|
1291 2 if neither context is a subset of another one. */
|
|
1292
|
|
1293 static int
|
|
1294 omp_context_selector_compare (tree ctx1, tree ctx2)
|
|
1295 {
|
|
1296 bool swapped = false;
|
|
1297 int ret = 0;
|
|
1298 int len1 = list_length (ctx1);
|
|
1299 int len2 = list_length (ctx2);
|
|
1300 int cnt = 0;
|
|
1301 if (len1 < len2)
|
|
1302 {
|
|
1303 swapped = true;
|
|
1304 std::swap (ctx1, ctx2);
|
|
1305 std::swap (len1, len2);
|
|
1306 }
|
|
1307 for (tree t1 = ctx1; t1; t1 = TREE_CHAIN (t1))
|
|
1308 {
|
|
1309 tree t2;
|
|
1310 for (t2 = ctx2; t2; t2 = TREE_CHAIN (t2))
|
|
1311 if (TREE_PURPOSE (t1) == TREE_PURPOSE (t2))
|
|
1312 {
|
|
1313 const char *set = IDENTIFIER_POINTER (TREE_PURPOSE (t1));
|
|
1314 int r = omp_context_selector_set_compare (set, TREE_VALUE (t1),
|
|
1315 TREE_VALUE (t2));
|
|
1316 if (r == 2 || (ret && r && (ret < 0) != (r < 0)))
|
|
1317 return 2;
|
|
1318 if (ret == 0)
|
|
1319 ret = r;
|
|
1320 cnt++;
|
|
1321 break;
|
|
1322 }
|
|
1323 if (t2 == NULL_TREE)
|
|
1324 {
|
|
1325 if (ret == -1)
|
|
1326 return 2;
|
|
1327 ret = 1;
|
|
1328 }
|
|
1329 }
|
|
1330 if (cnt < len2)
|
|
1331 return 2;
|
|
1332 if (ret == 0)
|
|
1333 return 0;
|
|
1334 return swapped ? -ret : ret;
|
|
1335 }
|
|
1336
|
|
1337 /* From context selector CTX, return trait-selector with name SEL in
|
|
1338 trait-selector-set with name SET if any, or NULL_TREE if not found.
|
|
1339 If SEL is NULL, return the list of trait-selectors in SET. */
|
|
1340
|
|
1341 tree
|
|
1342 omp_get_context_selector (tree ctx, const char *set, const char *sel)
|
|
1343 {
|
|
1344 tree setid = get_identifier (set);
|
|
1345 tree selid = sel ? get_identifier (sel) : NULL_TREE;
|
|
1346 for (tree t1 = ctx; t1; t1 = TREE_CHAIN (t1))
|
|
1347 if (TREE_PURPOSE (t1) == setid)
|
|
1348 {
|
|
1349 if (sel == NULL)
|
|
1350 return TREE_VALUE (t1);
|
|
1351 for (tree t2 = TREE_VALUE (t1); t2; t2 = TREE_CHAIN (t2))
|
|
1352 if (TREE_PURPOSE (t2) == selid)
|
|
1353 return t2;
|
|
1354 }
|
|
1355 return NULL_TREE;
|
|
1356 }
|
|
1357
|
|
1358 /* Compute *SCORE for context selector CTX. Return true if the score
|
|
1359 would be different depending on whether it is a declare simd clone or
|
|
1360 not. DECLARE_SIMD should be true for the case when it would be
|
|
1361 a declare simd clone. */
|
|
1362
|
|
1363 static bool
|
|
1364 omp_context_compute_score (tree ctx, widest_int *score, bool declare_simd)
|
|
1365 {
|
|
1366 tree construct = omp_get_context_selector (ctx, "construct", NULL);
|
|
1367 bool has_kind = omp_get_context_selector (ctx, "device", "kind");
|
|
1368 bool has_arch = omp_get_context_selector (ctx, "device", "arch");
|
|
1369 bool has_isa = omp_get_context_selector (ctx, "device", "isa");
|
|
1370 bool ret = false;
|
|
1371 *score = 1;
|
|
1372 for (tree t1 = ctx; t1; t1 = TREE_CHAIN (t1))
|
|
1373 if (TREE_VALUE (t1) != construct)
|
|
1374 for (tree t2 = TREE_VALUE (t1); t2; t2 = TREE_CHAIN (t2))
|
|
1375 if (tree t3 = TREE_VALUE (t2))
|
|
1376 if (TREE_PURPOSE (t3)
|
|
1377 && strcmp (IDENTIFIER_POINTER (TREE_PURPOSE (t3)), " score") == 0
|
|
1378 && TREE_CODE (TREE_VALUE (t3)) == INTEGER_CST)
|
|
1379 *score += wi::to_widest (TREE_VALUE (t3));
|
|
1380 if (construct || has_kind || has_arch || has_isa)
|
|
1381 {
|
|
1382 int scores[12];
|
|
1383 enum tree_code constructs[5];
|
|
1384 int nconstructs = 0;
|
|
1385 if (construct)
|
|
1386 nconstructs = omp_constructor_traits_to_codes (construct, constructs);
|
|
1387 if (omp_construct_selector_matches (constructs, nconstructs, scores)
|
|
1388 == 2)
|
|
1389 ret = true;
|
|
1390 int b = declare_simd ? nconstructs + 1 : 0;
|
|
1391 if (scores[b + nconstructs] + 4U < score->get_precision ())
|
|
1392 {
|
|
1393 for (int n = 0; n < nconstructs; ++n)
|
|
1394 {
|
|
1395 if (scores[b + n] < 0)
|
|
1396 {
|
|
1397 *score = -1;
|
|
1398 return ret;
|
|
1399 }
|
|
1400 *score += wi::shifted_mask <widest_int> (scores[b + n], 1, false);
|
|
1401 }
|
|
1402 if (has_kind)
|
|
1403 *score += wi::shifted_mask <widest_int> (scores[b + nconstructs],
|
|
1404 1, false);
|
|
1405 if (has_arch)
|
|
1406 *score += wi::shifted_mask <widest_int> (scores[b + nconstructs] + 1,
|
|
1407 1, false);
|
|
1408 if (has_isa)
|
|
1409 *score += wi::shifted_mask <widest_int> (scores[b + nconstructs] + 2,
|
|
1410 1, false);
|
|
1411 }
|
|
1412 else /* FIXME: Implement this. */
|
|
1413 gcc_unreachable ();
|
|
1414 }
|
|
1415 return ret;
|
|
1416 }
|
|
1417
|
|
1418 /* Try to resolve declare variant, return the variant decl if it should
|
|
1419 be used instead of base, or base otherwise. */
|
|
1420
|
|
1421 tree
|
|
1422 omp_resolve_declare_variant (tree base)
|
|
1423 {
|
|
1424 tree variant1 = NULL_TREE, variant2 = NULL_TREE;
|
|
1425 auto_vec <tree, 16> variants;
|
|
1426 auto_vec <bool, 16> defer;
|
|
1427 bool any_deferred = false;
|
|
1428 for (tree attr = DECL_ATTRIBUTES (base); attr; attr = TREE_CHAIN (attr))
|
|
1429 {
|
|
1430 attr = lookup_attribute ("omp declare variant base", attr);
|
|
1431 if (attr == NULL_TREE)
|
|
1432 break;
|
|
1433 if (TREE_CODE (TREE_PURPOSE (TREE_VALUE (attr))) != FUNCTION_DECL)
|
|
1434 continue;
|
|
1435 switch (omp_context_selector_matches (TREE_VALUE (TREE_VALUE (attr))))
|
|
1436 {
|
|
1437 case 0:
|
|
1438 /* No match, ignore. */
|
|
1439 break;
|
|
1440 case -1:
|
|
1441 /* Needs to be deferred. */
|
|
1442 any_deferred = true;
|
|
1443 variants.safe_push (attr);
|
|
1444 defer.safe_push (true);
|
|
1445 break;
|
|
1446 default:
|
|
1447 variants.safe_push (attr);
|
|
1448 defer.safe_push (false);
|
|
1449 break;
|
|
1450 }
|
|
1451 }
|
|
1452 if (variants.length () == 0)
|
|
1453 return base;
|
|
1454
|
|
1455 if (any_deferred)
|
|
1456 {
|
|
1457 widest_int max_score1 = 0;
|
|
1458 widest_int max_score2 = 0;
|
|
1459 bool first = true;
|
|
1460 unsigned int i;
|
|
1461 tree attr1, attr2;
|
|
1462 FOR_EACH_VEC_ELT (variants, i, attr1)
|
|
1463 {
|
|
1464 widest_int score1;
|
|
1465 widest_int score2;
|
|
1466 bool need_two;
|
|
1467 tree ctx = TREE_VALUE (TREE_VALUE (attr1));
|
|
1468 need_two = omp_context_compute_score (ctx, &score1, false);
|
|
1469 if (need_two)
|
|
1470 omp_context_compute_score (ctx, &score2, true);
|
|
1471 else
|
|
1472 score2 = score1;
|
|
1473 if (first)
|
|
1474 {
|
|
1475 first = false;
|
|
1476 max_score1 = score1;
|
|
1477 max_score2 = score2;
|
|
1478 if (!defer[i])
|
|
1479 {
|
|
1480 variant1 = attr1;
|
|
1481 variant2 = attr1;
|
|
1482 }
|
|
1483 }
|
|
1484 else
|
|
1485 {
|
|
1486 if (max_score1 == score1)
|
|
1487 variant1 = NULL_TREE;
|
|
1488 else if (score1 > max_score1)
|
|
1489 {
|
|
1490 max_score1 = score1;
|
|
1491 variant1 = defer[i] ? NULL_TREE : attr1;
|
|
1492 }
|
|
1493 if (max_score2 == score2)
|
|
1494 variant2 = NULL_TREE;
|
|
1495 else if (score2 > max_score2)
|
|
1496 {
|
|
1497 max_score2 = score2;
|
|
1498 variant2 = defer[i] ? NULL_TREE : attr1;
|
|
1499 }
|
|
1500 }
|
|
1501 }
|
|
1502
|
|
1503 /* If there is a clear winner variant with the score which is not
|
|
1504 deferred, verify it is not a strict subset of any other context
|
|
1505 selector and if it is not, it is the best alternative no matter
|
|
1506 whether the others do or don't match. */
|
|
1507 if (variant1 && variant1 == variant2)
|
|
1508 {
|
|
1509 tree ctx1 = TREE_VALUE (TREE_VALUE (variant1));
|
|
1510 FOR_EACH_VEC_ELT (variants, i, attr2)
|
|
1511 {
|
|
1512 if (attr2 == variant1)
|
|
1513 continue;
|
|
1514 tree ctx2 = TREE_VALUE (TREE_VALUE (attr2));
|
|
1515 int r = omp_context_selector_compare (ctx1, ctx2);
|
|
1516 if (r == -1)
|
|
1517 {
|
|
1518 /* The winner is a strict subset of ctx2, can't
|
|
1519 decide now. */
|
|
1520 variant1 = NULL_TREE;
|
|
1521 break;
|
|
1522 }
|
|
1523 }
|
|
1524 if (variant1)
|
|
1525 return TREE_PURPOSE (TREE_VALUE (variant1));
|
|
1526 }
|
|
1527
|
|
1528 return base;
|
|
1529 }
|
|
1530
|
|
1531 if (variants.length () == 1)
|
|
1532 return TREE_PURPOSE (TREE_VALUE (variants[0]));
|
|
1533
|
|
1534 /* A context selector that is a strict subset of another context selector has a score
|
|
1535 of zero. */
|
|
1536 tree attr1, attr2;
|
|
1537 unsigned int i, j;
|
|
1538 FOR_EACH_VEC_ELT (variants, i, attr1)
|
|
1539 if (attr1)
|
|
1540 {
|
|
1541 tree ctx1 = TREE_VALUE (TREE_VALUE (attr1));
|
|
1542 FOR_EACH_VEC_ELT_FROM (variants, j, attr2, i + 1)
|
|
1543 if (attr2)
|
|
1544 {
|
|
1545 tree ctx2 = TREE_VALUE (TREE_VALUE (attr2));
|
|
1546 int r = omp_context_selector_compare (ctx1, ctx2);
|
|
1547 if (r == -1)
|
|
1548 {
|
|
1549 /* ctx1 is a strict subset of ctx2, remove
|
|
1550 attr1 from the vector. */
|
|
1551 variants[i] = NULL_TREE;
|
|
1552 break;
|
|
1553 }
|
|
1554 else if (r == 1)
|
|
1555 /* ctx2 is a strict subset of ctx1, remove attr2
|
|
1556 from the vector. */
|
|
1557 variants[j] = NULL_TREE;
|
|
1558 }
|
|
1559 }
|
|
1560 widest_int max_score1 = 0;
|
|
1561 widest_int max_score2 = 0;
|
|
1562 bool first = true;
|
|
1563 FOR_EACH_VEC_ELT (variants, i, attr1)
|
|
1564 if (attr1)
|
|
1565 {
|
|
1566 if (variant1)
|
|
1567 {
|
|
1568 widest_int score1;
|
|
1569 widest_int score2;
|
|
1570 bool need_two;
|
|
1571 tree ctx;
|
|
1572 if (first)
|
|
1573 {
|
|
1574 first = false;
|
|
1575 ctx = TREE_VALUE (TREE_VALUE (variant1));
|
|
1576 need_two = omp_context_compute_score (ctx, &max_score1, false);
|
|
1577 if (need_two)
|
|
1578 omp_context_compute_score (ctx, &max_score2, true);
|
|
1579 else
|
|
1580 max_score2 = max_score1;
|
|
1581 }
|
|
1582 ctx = TREE_VALUE (TREE_VALUE (attr1));
|
|
1583 need_two = omp_context_compute_score (ctx, &score1, false);
|
|
1584 if (need_two)
|
|
1585 omp_context_compute_score (ctx, &score2, true);
|
|
1586 else
|
|
1587 score2 = score1;
|
|
1588 if (score1 > max_score1)
|
|
1589 {
|
|
1590 max_score1 = score1;
|
|
1591 variant1 = attr1;
|
|
1592 }
|
|
1593 if (score2 > max_score2)
|
|
1594 {
|
|
1595 max_score2 = score2;
|
|
1596 variant2 = attr1;
|
|
1597 }
|
|
1598 }
|
|
1599 else
|
|
1600 {
|
|
1601 variant1 = attr1;
|
|
1602 variant2 = attr1;
|
|
1603 }
|
|
1604 }
|
|
1605 /* If there is a disagreement on which variant has the highest score
|
|
1606 depending on whether it will be in a declare simd clone or not,
|
|
1607 punt for now and defer until after IPA where we will know that. */
|
|
1608 return ((variant1 && variant1 == variant2)
|
|
1609 ? TREE_PURPOSE (TREE_VALUE (variant1)) : base);
|
|
1610 }
|
|
1611
|
|
1612
|
111
|
1613 /* Encode an oacc launch argument. This matches the GOMP_LAUNCH_PACK
|
|
1614 macro on gomp-constants.h. We do not check for overflow. */
|
|
1615
|
|
1616 tree
|
|
1617 oacc_launch_pack (unsigned code, tree device, unsigned op)
|
|
1618 {
|
|
1619 tree res;
|
|
1620
|
|
1621 res = build_int_cst (unsigned_type_node, GOMP_LAUNCH_PACK (code, 0, op));
|
|
1622 if (device)
|
|
1623 {
|
|
1624 device = fold_build2 (LSHIFT_EXPR, unsigned_type_node,
|
|
1625 device, build_int_cst (unsigned_type_node,
|
|
1626 GOMP_LAUNCH_DEVICE_SHIFT));
|
|
1627 res = fold_build2 (BIT_IOR_EXPR, unsigned_type_node, res, device);
|
|
1628 }
|
|
1629 return res;
|
|
1630 }
|
|
1631
|
|
1632 /* FIXME: What is the following comment for? */
|
|
1633 /* Look for compute grid dimension clauses and convert to an attribute
|
|
1634 attached to FN. This permits the target-side code to (a) massage
|
|
1635 the dimensions, (b) emit that data and (c) optimize. Non-constant
|
|
1636 dimensions are pushed onto ARGS.
|
|
1637
|
|
1638 The attribute value is a TREE_LIST. A set of dimensions is
|
|
1639 represented as a list of INTEGER_CST. Those that are runtime
|
|
1640 exprs are represented as an INTEGER_CST of zero.
|
|
1641
|
|
1642 TODO: Normally the attribute will just contain a single such list. If
|
|
1643 however it contains a list of lists, this will represent the use of
|
|
1644 device_type. Each member of the outer list is an assoc list of
|
|
1645 dimensions, keyed by the device type. The first entry will be the
|
|
1646 default. Well, that's the plan. */
|
|
1647
|
|
1648 /* Replace any existing oacc fn attribute with updated dimensions. */
|
|
1649
|
145
|
1650 /* Variant working on a list of attributes. */
|
|
1651
|
|
1652 tree
|
|
1653 oacc_replace_fn_attrib_attr (tree attribs, tree dims)
|
111
|
1654 {
|
|
1655 tree ident = get_identifier (OACC_FN_ATTRIB);
|
|
1656
|
|
1657 /* If we happen to be present as the first attrib, drop it. */
|
|
1658 if (attribs && TREE_PURPOSE (attribs) == ident)
|
|
1659 attribs = TREE_CHAIN (attribs);
|
145
|
1660 return tree_cons (ident, dims, attribs);
|
|
1661 }
|
|
1662
|
|
1663 /* Variant working on a function decl. */
|
|
1664
|
|
1665 void
|
|
1666 oacc_replace_fn_attrib (tree fn, tree dims)
|
|
1667 {
|
|
1668 DECL_ATTRIBUTES (fn)
|
|
1669 = oacc_replace_fn_attrib_attr (DECL_ATTRIBUTES (fn), dims);
|
111
|
1670 }
|
|
1671
|
|
1672 /* Scan CLAUSES for launch dimensions and attach them to the oacc
|
|
1673 function attribute. Push any that are non-constant onto the ARGS
|
|
1674 list, along with an appropriate GOMP_LAUNCH_DIM tag. */
|
|
1675
|
|
1676 void
|
|
1677 oacc_set_fn_attrib (tree fn, tree clauses, vec<tree> *args)
|
|
1678 {
|
|
1679 /* Must match GOMP_DIM ordering. */
|
|
1680 static const omp_clause_code ids[]
|
|
1681 = { OMP_CLAUSE_NUM_GANGS, OMP_CLAUSE_NUM_WORKERS,
|
|
1682 OMP_CLAUSE_VECTOR_LENGTH };
|
|
1683 unsigned ix;
|
|
1684 tree dims[GOMP_DIM_MAX];
|
|
1685
|
|
1686 tree attr = NULL_TREE;
|
|
1687 unsigned non_const = 0;
|
|
1688
|
|
1689 for (ix = GOMP_DIM_MAX; ix--;)
|
|
1690 {
|
|
1691 tree clause = omp_find_clause (clauses, ids[ix]);
|
|
1692 tree dim = NULL_TREE;
|
|
1693
|
|
1694 if (clause)
|
|
1695 dim = OMP_CLAUSE_EXPR (clause, ids[ix]);
|
|
1696 dims[ix] = dim;
|
|
1697 if (dim && TREE_CODE (dim) != INTEGER_CST)
|
|
1698 {
|
|
1699 dim = integer_zero_node;
|
|
1700 non_const |= GOMP_DIM_MASK (ix);
|
|
1701 }
|
|
1702 attr = tree_cons (NULL_TREE, dim, attr);
|
|
1703 }
|
|
1704
|
|
1705 oacc_replace_fn_attrib (fn, attr);
|
|
1706
|
|
1707 if (non_const)
|
|
1708 {
|
|
1709 /* Push a dynamic argument set. */
|
|
1710 args->safe_push (oacc_launch_pack (GOMP_LAUNCH_DIM,
|
|
1711 NULL_TREE, non_const));
|
|
1712 for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++)
|
|
1713 if (non_const & GOMP_DIM_MASK (ix))
|
|
1714 args->safe_push (dims[ix]);
|
|
1715 }
|
|
1716 }
|
|
1717
|
145
|
1718 /* Verify OpenACC routine clauses.
|
|
1719
|
|
1720 Returns 0 if FNDECL should be marked with an OpenACC 'routine' directive, 1
|
|
1721 if it has already been marked in compatible way, and -1 if incompatible.
|
|
1722 Upon returning, the chain of clauses will contain exactly one clause
|
|
1723 specifying the level of parallelism. */
|
|
1724
|
|
1725 int
|
|
1726 oacc_verify_routine_clauses (tree fndecl, tree *clauses, location_t loc,
|
|
1727 const char *routine_str)
|
|
1728 {
|
|
1729 tree c_level = NULL_TREE;
|
|
1730 tree c_p = NULL_TREE;
|
|
1731 for (tree c = *clauses; c; c_p = c, c = OMP_CLAUSE_CHAIN (c))
|
|
1732 switch (OMP_CLAUSE_CODE (c))
|
|
1733 {
|
|
1734 case OMP_CLAUSE_GANG:
|
|
1735 case OMP_CLAUSE_WORKER:
|
|
1736 case OMP_CLAUSE_VECTOR:
|
|
1737 case OMP_CLAUSE_SEQ:
|
|
1738 if (c_level == NULL_TREE)
|
|
1739 c_level = c;
|
|
1740 else if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_CODE (c_level))
|
|
1741 {
|
|
1742 /* This has already been diagnosed in the front ends. */
|
|
1743 /* Drop the duplicate clause. */
|
|
1744 gcc_checking_assert (c_p != NULL_TREE);
|
|
1745 OMP_CLAUSE_CHAIN (c_p) = OMP_CLAUSE_CHAIN (c);
|
|
1746 c = c_p;
|
|
1747 }
|
|
1748 else
|
|
1749 {
|
|
1750 error_at (OMP_CLAUSE_LOCATION (c),
|
|
1751 "%qs specifies a conflicting level of parallelism",
|
|
1752 omp_clause_code_name[OMP_CLAUSE_CODE (c)]);
|
|
1753 inform (OMP_CLAUSE_LOCATION (c_level),
|
|
1754 "... to the previous %qs clause here",
|
|
1755 omp_clause_code_name[OMP_CLAUSE_CODE (c_level)]);
|
|
1756 /* Drop the conflicting clause. */
|
|
1757 gcc_checking_assert (c_p != NULL_TREE);
|
|
1758 OMP_CLAUSE_CHAIN (c_p) = OMP_CLAUSE_CHAIN (c);
|
|
1759 c = c_p;
|
|
1760 }
|
|
1761 break;
|
|
1762 default:
|
|
1763 gcc_unreachable ();
|
|
1764 }
|
|
1765 if (c_level == NULL_TREE)
|
|
1766 {
|
|
1767 /* Default to an implicit 'seq' clause. */
|
|
1768 c_level = build_omp_clause (loc, OMP_CLAUSE_SEQ);
|
|
1769 OMP_CLAUSE_CHAIN (c_level) = *clauses;
|
|
1770 *clauses = c_level;
|
|
1771 }
|
|
1772 /* In *clauses, we now have exactly one clause specifying the level of
|
|
1773 parallelism. */
|
|
1774
|
|
1775 tree attr
|
|
1776 = lookup_attribute ("omp declare target", DECL_ATTRIBUTES (fndecl));
|
|
1777 if (attr != NULL_TREE)
|
|
1778 {
|
|
1779 /* If a "#pragma acc routine" has already been applied, just verify
|
|
1780 this one for compatibility. */
|
|
1781 /* Collect previous directive's clauses. */
|
|
1782 tree c_level_p = NULL_TREE;
|
|
1783 for (tree c = TREE_VALUE (attr); c; c = OMP_CLAUSE_CHAIN (c))
|
|
1784 switch (OMP_CLAUSE_CODE (c))
|
|
1785 {
|
|
1786 case OMP_CLAUSE_GANG:
|
|
1787 case OMP_CLAUSE_WORKER:
|
|
1788 case OMP_CLAUSE_VECTOR:
|
|
1789 case OMP_CLAUSE_SEQ:
|
|
1790 gcc_checking_assert (c_level_p == NULL_TREE);
|
|
1791 c_level_p = c;
|
|
1792 break;
|
|
1793 default:
|
|
1794 gcc_unreachable ();
|
|
1795 }
|
|
1796 gcc_checking_assert (c_level_p != NULL_TREE);
|
|
1797 /* ..., and compare to current directive's, which we've already collected
|
|
1798 above. */
|
|
1799 tree c_diag;
|
|
1800 tree c_diag_p;
|
|
1801 /* Matching level of parallelism? */
|
|
1802 if (OMP_CLAUSE_CODE (c_level) != OMP_CLAUSE_CODE (c_level_p))
|
|
1803 {
|
|
1804 c_diag = c_level;
|
|
1805 c_diag_p = c_level_p;
|
|
1806 goto incompatible;
|
|
1807 }
|
|
1808 /* Compatible. */
|
|
1809 return 1;
|
|
1810
|
|
1811 incompatible:
|
|
1812 if (c_diag != NULL_TREE)
|
|
1813 error_at (OMP_CLAUSE_LOCATION (c_diag),
|
|
1814 "incompatible %qs clause when applying"
|
|
1815 " %<%s%> to %qD, which has already been"
|
|
1816 " marked with an OpenACC 'routine' directive",
|
|
1817 omp_clause_code_name[OMP_CLAUSE_CODE (c_diag)],
|
|
1818 routine_str, fndecl);
|
|
1819 else if (c_diag_p != NULL_TREE)
|
|
1820 error_at (loc,
|
|
1821 "missing %qs clause when applying"
|
|
1822 " %<%s%> to %qD, which has already been"
|
|
1823 " marked with an OpenACC 'routine' directive",
|
|
1824 omp_clause_code_name[OMP_CLAUSE_CODE (c_diag_p)],
|
|
1825 routine_str, fndecl);
|
|
1826 else
|
|
1827 gcc_unreachable ();
|
|
1828 if (c_diag_p != NULL_TREE)
|
|
1829 inform (OMP_CLAUSE_LOCATION (c_diag_p),
|
|
1830 "... with %qs clause here",
|
|
1831 omp_clause_code_name[OMP_CLAUSE_CODE (c_diag_p)]);
|
|
1832 else
|
|
1833 {
|
|
1834 /* In the front ends, we don't preserve location information for the
|
|
1835 OpenACC routine directive itself. However, that of c_level_p
|
|
1836 should be close. */
|
|
1837 location_t loc_routine = OMP_CLAUSE_LOCATION (c_level_p);
|
|
1838 inform (loc_routine, "... without %qs clause near to here",
|
|
1839 omp_clause_code_name[OMP_CLAUSE_CODE (c_diag)]);
|
|
1840 }
|
|
1841 /* Incompatible. */
|
|
1842 return -1;
|
|
1843 }
|
|
1844
|
|
1845 return 0;
|
|
1846 }
|
|
1847
|
|
1848 /* Process the OpenACC 'routine' directive clauses to generate an attribute
|
|
1849 for the level of parallelism. All dimensions have a size of zero
|
111
|
1850 (dynamic). TREE_PURPOSE is set to indicate whether that dimension
|
|
1851 can have a loop partitioned on it. non-zero indicates
|
|
1852 yes, zero indicates no. By construction once a non-zero has been
|
|
1853 reached, further inner dimensions must also be non-zero. We set
|
|
1854 TREE_VALUE to zero for the dimensions that may be partitioned and
|
|
1855 1 for the other ones -- if a loop is (erroneously) spawned at
|
|
1856 an outer level, we don't want to try and partition it. */
|
|
1857
|
|
1858 tree
|
|
1859 oacc_build_routine_dims (tree clauses)
|
|
1860 {
|
|
1861 /* Must match GOMP_DIM ordering. */
|
|
1862 static const omp_clause_code ids[]
|
|
1863 = {OMP_CLAUSE_GANG, OMP_CLAUSE_WORKER, OMP_CLAUSE_VECTOR, OMP_CLAUSE_SEQ};
|
|
1864 int ix;
|
|
1865 int level = -1;
|
|
1866
|
|
1867 for (; clauses; clauses = OMP_CLAUSE_CHAIN (clauses))
|
|
1868 for (ix = GOMP_DIM_MAX + 1; ix--;)
|
|
1869 if (OMP_CLAUSE_CODE (clauses) == ids[ix])
|
|
1870 {
|
|
1871 level = ix;
|
|
1872 break;
|
|
1873 }
|
145
|
1874 gcc_checking_assert (level >= 0);
|
111
|
1875
|
|
1876 tree dims = NULL_TREE;
|
|
1877
|
|
1878 for (ix = GOMP_DIM_MAX; ix--;)
|
|
1879 dims = tree_cons (build_int_cst (boolean_type_node, ix >= level),
|
|
1880 build_int_cst (integer_type_node, ix < level), dims);
|
|
1881
|
|
1882 return dims;
|
|
1883 }
|
|
1884
|
|
1885 /* Retrieve the oacc function attrib and return it. Non-oacc
|
|
1886 functions will return NULL. */
|
|
1887
|
|
1888 tree
|
|
1889 oacc_get_fn_attrib (tree fn)
|
|
1890 {
|
|
1891 return lookup_attribute (OACC_FN_ATTRIB, DECL_ATTRIBUTES (fn));
|
|
1892 }
|
|
1893
|
131
|
1894 /* Return true if FN is an OpenMP or OpenACC offloading function. */
|
|
1895
|
|
1896 bool
|
|
1897 offloading_function_p (tree fn)
|
|
1898 {
|
|
1899 tree attrs = DECL_ATTRIBUTES (fn);
|
|
1900 return (lookup_attribute ("omp declare target", attrs)
|
|
1901 || lookup_attribute ("omp target entrypoint", attrs));
|
|
1902 }
|
|
1903
|
111
|
1904 /* Extract an oacc execution dimension from FN. FN must be an
|
|
1905 offloaded function or routine that has already had its execution
|
|
1906 dimensions lowered to the target-specific values. */
|
|
1907
|
|
1908 int
|
|
1909 oacc_get_fn_dim_size (tree fn, int axis)
|
|
1910 {
|
|
1911 tree attrs = oacc_get_fn_attrib (fn);
|
|
1912
|
|
1913 gcc_assert (axis < GOMP_DIM_MAX);
|
|
1914
|
|
1915 tree dims = TREE_VALUE (attrs);
|
|
1916 while (axis--)
|
|
1917 dims = TREE_CHAIN (dims);
|
|
1918
|
|
1919 int size = TREE_INT_CST_LOW (TREE_VALUE (dims));
|
|
1920
|
|
1921 return size;
|
|
1922 }
|
|
1923
|
|
1924 /* Extract the dimension axis from an IFN_GOACC_DIM_POS or
|
|
1925 IFN_GOACC_DIM_SIZE call. */
|
|
1926
|
|
1927 int
|
|
1928 oacc_get_ifn_dim_arg (const gimple *stmt)
|
|
1929 {
|
|
1930 gcc_checking_assert (gimple_call_internal_fn (stmt) == IFN_GOACC_DIM_SIZE
|
|
1931 || gimple_call_internal_fn (stmt) == IFN_GOACC_DIM_POS);
|
|
1932 tree arg = gimple_call_arg (stmt, 0);
|
|
1933 HOST_WIDE_INT axis = TREE_INT_CST_LOW (arg);
|
|
1934
|
|
1935 gcc_checking_assert (axis >= 0 && axis < GOMP_DIM_MAX);
|
|
1936 return (int) axis;
|
|
1937 }
|