111
|
1 /* Lowering and expansion of OpenMP directives for HSA GPU agents.
|
|
2
|
145
|
3 Copyright (C) 2013-2020 Free Software Foundation, Inc.
|
111
|
4
|
|
5 This file is part of GCC.
|
|
6
|
|
7 GCC is free software; you can redistribute it and/or modify it under
|
|
8 the terms of the GNU General Public License as published by the Free
|
|
9 Software Foundation; either version 3, or (at your option) any later
|
|
10 version.
|
|
11
|
|
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
15 for more details.
|
|
16
|
|
17 You should have received a copy of the GNU General Public License
|
|
18 along with GCC; see the file COPYING3. If not see
|
|
19 <http://www.gnu.org/licenses/>. */
|
|
20
|
|
21 #include "config.h"
|
|
22 #include "system.h"
|
|
23 #include "coretypes.h"
|
|
24 #include "backend.h"
|
|
25 #include "tree.h"
|
|
26 #include "gimple.h"
|
|
27 #include "tree-pass.h"
|
|
28 #include "ssa.h"
|
|
29 #include "cgraph.h"
|
|
30 #include "pretty-print.h"
|
|
31 #include "fold-const.h"
|
|
32 #include "gimplify.h"
|
|
33 #include "gimple-iterator.h"
|
|
34 #include "gimple-walk.h"
|
|
35 #include "tree-inline.h"
|
|
36 #include "langhooks.h"
|
|
37 #include "omp-general.h"
|
|
38 #include "omp-low.h"
|
|
39 #include "omp-grid.h"
|
|
40 #include "gimple-pretty-print.h"
|
|
41
|
|
42 /* Return the lastprivate predicate for a given gridified loop described by
|
|
43 FD). */
|
|
44
|
|
45 tree
|
|
46 omp_grid_lastprivate_predicate (struct omp_for_data *fd)
|
|
47 {
|
|
48 /* When dealing with a gridified loop, we need to check up to three collapsed
|
|
49 iteration variables but they are not actually captured in this fd.
|
|
50 Fortunately, we can easily rely on HSA builtins to get this
|
|
51 information. */
|
|
52
|
|
53 tree id, size;
|
|
54 if (gimple_omp_for_kind (fd->for_stmt) == GF_OMP_FOR_KIND_GRID_LOOP
|
|
55 && gimple_omp_for_grid_intra_group (fd->for_stmt))
|
|
56 {
|
|
57 id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMID);
|
|
58 size = builtin_decl_explicit (BUILT_IN_HSA_CURRENTWORKGROUPSIZE);
|
|
59 }
|
|
60 else
|
|
61 {
|
|
62 id = builtin_decl_explicit (BUILT_IN_HSA_WORKITEMABSID);
|
|
63 size = builtin_decl_explicit (BUILT_IN_HSA_GRIDSIZE);
|
|
64 }
|
|
65 tree cond = NULL;
|
|
66 for (int dim = 0; dim < fd->collapse; dim++)
|
|
67 {
|
|
68 tree dim_tree = build_int_cstu (unsigned_type_node, dim);
|
|
69 tree u1 = build_int_cstu (unsigned_type_node, 1);
|
|
70 tree c2
|
|
71 = build2 (EQ_EXPR, boolean_type_node,
|
|
72 build2 (PLUS_EXPR, unsigned_type_node,
|
|
73 build_call_expr (id, 1, dim_tree), u1),
|
|
74 build_call_expr (size, 1, dim_tree));
|
|
75 if (cond)
|
|
76 cond = build2 (TRUTH_AND_EXPR, boolean_type_node, cond, c2);
|
|
77 else
|
|
78 cond = c2;
|
|
79 }
|
|
80 return cond;
|
|
81 }
|
|
82
|
|
83 /* Structure describing the basic properties of the loop we ara analyzing
|
|
84 whether it can be gridified and when it is gridified. */
|
|
85
|
145
|
86 class grid_prop
|
111
|
87 {
|
145
|
88 public:
|
111
|
89 /* True when we are doing tiling gridification, i.e. when there is a distinct
|
|
90 distribute loop over groups and a loop construct over work-items. False
|
|
91 when distribute and parallel for loops form a combined construct. */
|
|
92 bool tiling;
|
|
93 /* Location of the target construct for optimization information
|
|
94 messages. */
|
131
|
95 dump_user_location_t target_loc;
|
111
|
96 /* The collapse clause of the involved loops. Collapse value of all of them
|
|
97 must be the same for gridification to take place. */
|
|
98 size_t collapse;
|
|
99 /* Group sizes, if requested by the user or NULL if not requested. */
|
|
100 tree group_sizes[3];
|
|
101 };
|
|
102
|
|
103 #define GRID_MISSED_MSG_PREFIX "Will not turn target construct into a " \
|
|
104 "gridified HSA kernel because "
|
|
105
|
|
106 /* Return true if STMT is an assignment of a register-type into a local
|
|
107 VAR_DECL. If GRID is non-NULL, the assignment additionally must not be to
|
|
108 any of the trees specifying group sizes there. */
|
|
109
|
|
110 static bool
|
|
111 grid_safe_assignment_p (gimple *stmt, grid_prop *grid)
|
|
112 {
|
|
113 gassign *assign = dyn_cast <gassign *> (stmt);
|
|
114 if (!assign)
|
|
115 return false;
|
|
116 if (gimple_clobber_p (assign))
|
|
117 return true;
|
|
118 tree lhs = gimple_assign_lhs (assign);
|
|
119 if (!VAR_P (lhs)
|
|
120 || !is_gimple_reg_type (TREE_TYPE (lhs))
|
|
121 || is_global_var (lhs))
|
|
122 return false;
|
|
123 if (grid)
|
|
124 for (unsigned i = 0; i < grid->collapse; i++)
|
|
125 if (lhs == grid->group_sizes[i])
|
|
126 return false;
|
|
127 return true;
|
|
128 }
|
|
129
|
|
130 /* Return true if all statements in SEQ are assignments to local register-type
|
|
131 variables that do not hold group size information. */
|
|
132
|
|
133 static bool
|
|
134 grid_seq_only_contains_local_assignments (gimple_seq seq, grid_prop *grid)
|
|
135 {
|
|
136 if (!seq)
|
|
137 return true;
|
|
138
|
|
139 gimple_stmt_iterator gsi;
|
|
140 for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
141 if (!grid_safe_assignment_p (gsi_stmt (gsi), grid))
|
|
142 return false;
|
|
143 return true;
|
|
144 }
|
|
145
|
|
146 /* Scan statements in SEQ and call itself recursively on any bind. GRID
|
|
147 describes hitherto discovered properties of the loop that is evaluated for
|
|
148 possible gridification. If during whole search only assignments to
|
|
149 register-type local variables (that do not overwrite group size information)
|
|
150 and one single OMP statement is encountered, return true, otherwise return
|
|
151 false. RET is where we store any OMP statement encountered. */
|
|
152
|
|
153 static bool
|
|
154 grid_find_single_omp_among_assignments_1 (gimple_seq seq, grid_prop *grid,
|
|
155 const char *name, gimple **ret)
|
|
156 {
|
|
157 gimple_stmt_iterator gsi;
|
|
158 for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
159 {
|
|
160 gimple *stmt = gsi_stmt (gsi);
|
|
161
|
|
162 if (grid_safe_assignment_p (stmt, grid))
|
|
163 continue;
|
|
164 if (gbind *bind = dyn_cast <gbind *> (stmt))
|
|
165 {
|
|
166 gimple_seq bind_body = gimple_bind_body (bind);
|
|
167 if (!grid_find_single_omp_among_assignments_1 (bind_body, grid, name,
|
|
168 ret))
|
|
169 return false;
|
|
170 }
|
|
171 else if (is_gimple_omp (stmt))
|
|
172 {
|
|
173 if (*ret)
|
|
174 {
|
|
175 if (dump_enabled_p ())
|
|
176 {
|
|
177 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
|
|
178 GRID_MISSED_MSG_PREFIX "%s construct "
|
|
179 "contains multiple OpenMP constructs\n",
|
|
180 name);
|
131
|
181 dump_printf_loc (MSG_NOTE, *ret,
|
111
|
182 "The first OpenMP construct within "
|
|
183 "a parallel\n");
|
131
|
184 dump_printf_loc (MSG_NOTE, stmt,
|
111
|
185 "The second OpenMP construct within "
|
|
186 "a parallel\n");
|
|
187 }
|
|
188 return false;
|
|
189 }
|
|
190 *ret = stmt;
|
|
191 }
|
|
192 else
|
|
193 {
|
|
194 if (dump_enabled_p ())
|
|
195 {
|
|
196 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
|
|
197 GRID_MISSED_MSG_PREFIX "%s construct contains "
|
|
198 "a complex statement\n", name);
|
131
|
199 dump_printf_loc (MSG_NOTE, stmt,
|
111
|
200 "This statement cannot be analyzed for "
|
|
201 "gridification\n");
|
|
202 }
|
|
203 return false;
|
|
204 }
|
|
205 }
|
|
206 return true;
|
|
207 }
|
|
208
|
|
209 /* Scan statements in SEQ and make sure that it and any binds in it contain
|
|
210 only assignments to local register-type variables (that do not overwrite
|
|
211 group size information) and one OMP construct. If so, return that
|
|
212 construct, otherwise return NULL. GRID describes hitherto discovered
|
|
213 properties of the loop that is evaluated for possible gridification. If
|
|
214 dumping is enabled and function fails, use NAME to dump a note with the
|
|
215 reason for failure. */
|
|
216
|
|
217 static gimple *
|
|
218 grid_find_single_omp_among_assignments (gimple_seq seq, grid_prop *grid,
|
|
219 const char *name)
|
|
220 {
|
|
221 if (!seq)
|
|
222 {
|
|
223 if (dump_enabled_p ())
|
|
224 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
|
|
225 GRID_MISSED_MSG_PREFIX "%s construct has empty body\n",
|
|
226 name);
|
|
227 return NULL;
|
|
228 }
|
|
229
|
|
230 gimple *ret = NULL;
|
|
231 if (grid_find_single_omp_among_assignments_1 (seq, grid, name, &ret))
|
|
232 {
|
|
233 if (!ret && dump_enabled_p ())
|
|
234 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
|
|
235 GRID_MISSED_MSG_PREFIX "%s construct does not contain"
|
|
236 " any other OpenMP construct\n", name);
|
|
237 return ret;
|
|
238 }
|
|
239 else
|
|
240 return NULL;
|
|
241 }
|
|
242
|
|
243 /* Walker function looking for statements there is no point gridifying (and for
|
|
244 noreturn function calls which we cannot do). Return non-NULL if such a
|
|
245 function is found. */
|
|
246
|
|
247 static tree
|
|
248 grid_find_ungridifiable_statement (gimple_stmt_iterator *gsi,
|
|
249 bool *handled_ops_p,
|
|
250 struct walk_stmt_info *wi)
|
|
251 {
|
|
252 *handled_ops_p = false;
|
|
253 gimple *stmt = gsi_stmt (*gsi);
|
|
254 switch (gimple_code (stmt))
|
|
255 {
|
|
256 case GIMPLE_CALL:
|
|
257 if (gimple_call_noreturn_p (as_a <gcall *> (stmt)))
|
|
258 {
|
|
259 *handled_ops_p = true;
|
|
260 wi->info = stmt;
|
|
261 return error_mark_node;
|
|
262 }
|
|
263 break;
|
|
264
|
|
265 /* We may reduce the following list if we find a way to implement the
|
|
266 clauses, but now there is no point trying further. */
|
|
267 case GIMPLE_OMP_CRITICAL:
|
|
268 case GIMPLE_OMP_TASKGROUP:
|
|
269 case GIMPLE_OMP_TASK:
|
|
270 case GIMPLE_OMP_SECTION:
|
|
271 case GIMPLE_OMP_SECTIONS:
|
|
272 case GIMPLE_OMP_SECTIONS_SWITCH:
|
|
273 case GIMPLE_OMP_TARGET:
|
|
274 case GIMPLE_OMP_ORDERED:
|
|
275 *handled_ops_p = true;
|
|
276 wi->info = stmt;
|
|
277 return error_mark_node;
|
|
278 default:
|
|
279 break;
|
|
280 }
|
|
281 return NULL;
|
|
282 }
|
|
283
|
|
284 /* Examine clauses of omp parallel statement PAR and if any prevents
|
|
285 gridification, issue a missed-optimization diagnostics and return false,
|
|
286 otherwise return true. GRID describes hitherto discovered properties of the
|
|
287 loop that is evaluated for possible gridification. */
|
|
288
|
|
289 static bool
|
131
|
290 grid_parallel_clauses_gridifiable (gomp_parallel *par, dump_user_location_t tloc)
|
111
|
291 {
|
|
292 tree clauses = gimple_omp_parallel_clauses (par);
|
|
293 while (clauses)
|
|
294 {
|
|
295 switch (OMP_CLAUSE_CODE (clauses))
|
|
296 {
|
|
297 case OMP_CLAUSE_NUM_THREADS:
|
|
298 if (dump_enabled_p ())
|
|
299 {
|
|
300 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
|
|
301 GRID_MISSED_MSG_PREFIX "because there is "
|
|
302 "a num_threads clause of the parallel "
|
|
303 "construct\n");
|
131
|
304 dump_printf_loc (MSG_NOTE, par,
|
111
|
305 "Parallel construct has a num_threads clause\n");
|
|
306 }
|
|
307 return false;
|
|
308
|
|
309 case OMP_CLAUSE_REDUCTION:
|
|
310 if (dump_enabled_p ())
|
|
311 {
|
|
312 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
|
|
313 GRID_MISSED_MSG_PREFIX "a reduction clause "
|
|
314 "is present\n ");
|
131
|
315 dump_printf_loc (MSG_NOTE, par,
|
111
|
316 "Parallel construct has a reduction clause\n");
|
|
317 }
|
|
318 return false;
|
|
319
|
|
320 default:
|
|
321 break;
|
|
322 }
|
|
323 clauses = OMP_CLAUSE_CHAIN (clauses);
|
|
324 }
|
|
325 return true;
|
|
326 }
|
|
327
|
|
328 /* Examine clauses and the body of omp loop statement GFOR and if something
|
|
329 prevents gridification, issue a missed-optimization diagnostics and return
|
|
330 false, otherwise return true. GRID describes hitherto discovered properties
|
|
331 of the loop that is evaluated for possible gridification. */
|
|
332
|
|
333 static bool
|
|
334 grid_inner_loop_gridifiable_p (gomp_for *gfor, grid_prop *grid)
|
|
335 {
|
|
336 if (!grid_seq_only_contains_local_assignments (gimple_omp_for_pre_body (gfor),
|
|
337 grid))
|
|
338 {
|
|
339 if (dump_enabled_p ())
|
|
340 {
|
|
341 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
|
|
342 GRID_MISSED_MSG_PREFIX "the inner loop "
|
|
343 "loop bounds computation contains a complex "
|
|
344 "statement\n");
|
131
|
345 dump_printf_loc (MSG_NOTE, gfor,
|
111
|
346 "Loop construct cannot be analyzed for "
|
|
347 "gridification\n");
|
|
348 }
|
|
349 return false;
|
|
350 }
|
|
351
|
|
352 tree clauses = gimple_omp_for_clauses (gfor);
|
|
353 while (clauses)
|
|
354 {
|
|
355 switch (OMP_CLAUSE_CODE (clauses))
|
|
356 {
|
|
357 case OMP_CLAUSE_SCHEDULE:
|
|
358 if (OMP_CLAUSE_SCHEDULE_KIND (clauses) != OMP_CLAUSE_SCHEDULE_AUTO)
|
|
359 {
|
|
360 if (dump_enabled_p ())
|
|
361 {
|
|
362 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
|
|
363 GRID_MISSED_MSG_PREFIX "the inner loop "
|
|
364 "has a non-automatic schedule clause\n");
|
131
|
365 dump_printf_loc (MSG_NOTE, gfor,
|
111
|
366 "Loop construct has a non automatic "
|
|
367 "schedule clause\n");
|
|
368 }
|
|
369 return false;
|
|
370 }
|
|
371 break;
|
|
372
|
|
373 case OMP_CLAUSE_REDUCTION:
|
|
374 if (dump_enabled_p ())
|
|
375 {
|
|
376 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
|
|
377 GRID_MISSED_MSG_PREFIX "a reduction "
|
|
378 "clause is present\n ");
|
131
|
379 dump_printf_loc (MSG_NOTE, gfor,
|
111
|
380 "Loop construct has a reduction schedule "
|
|
381 "clause\n");
|
|
382 }
|
|
383 return false;
|
|
384
|
|
385 default:
|
|
386 break;
|
|
387 }
|
|
388 clauses = OMP_CLAUSE_CHAIN (clauses);
|
|
389 }
|
|
390 struct walk_stmt_info wi;
|
|
391 memset (&wi, 0, sizeof (wi));
|
|
392 if (walk_gimple_seq (gimple_omp_body (gfor),
|
|
393 grid_find_ungridifiable_statement,
|
|
394 NULL, &wi))
|
|
395 {
|
|
396 gimple *bad = (gimple *) wi.info;
|
|
397 if (dump_enabled_p ())
|
|
398 {
|
|
399 if (is_gimple_call (bad))
|
|
400 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
|
|
401 GRID_MISSED_MSG_PREFIX "the inner loop contains "
|
|
402 "call to a noreturn function\n");
|
|
403 else
|
|
404 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
|
|
405 GRID_MISSED_MSG_PREFIX "the inner loop contains "
|
|
406 "statement %s which cannot be transformed\n",
|
|
407 gimple_code_name[(int) gimple_code (bad)]);
|
131
|
408 dump_printf_loc (MSG_NOTE, bad,
|
111
|
409 "This statement cannot be analyzed for "
|
|
410 "gridification\n");
|
|
411 }
|
|
412 return false;
|
|
413 }
|
|
414 return true;
|
|
415 }
|
|
416
|
|
417 /* Given distribute omp construct represented by DIST, which in the original
|
|
418 source forms a compound construct with a looping construct, return true if it
|
|
419 can be turned into a gridified HSA kernel. Otherwise return false. GRID
|
|
420 describes hitherto discovered properties of the loop that is evaluated for
|
|
421 possible gridification. */
|
|
422
|
|
423 static bool
|
|
424 grid_dist_follows_simple_pattern (gomp_for *dist, grid_prop *grid)
|
|
425 {
|
131
|
426 dump_user_location_t tloc = grid->target_loc;
|
111
|
427 gimple *stmt = grid_find_single_omp_among_assignments (gimple_omp_body (dist),
|
|
428 grid, "distribute");
|
|
429 gomp_parallel *par;
|
|
430 if (!stmt
|
|
431 || !(par = dyn_cast <gomp_parallel *> (stmt))
|
|
432 || !grid_parallel_clauses_gridifiable (par, tloc))
|
|
433 return false;
|
|
434
|
|
435 stmt = grid_find_single_omp_among_assignments (gimple_omp_body (par), grid,
|
|
436 "parallel");
|
|
437 gomp_for *gfor;
|
|
438 if (!stmt || !(gfor = dyn_cast <gomp_for *> (stmt)))
|
|
439 return false;
|
|
440
|
|
441 if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR)
|
|
442 {
|
|
443 if (dump_enabled_p ())
|
|
444 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
|
|
445 GRID_MISSED_MSG_PREFIX "the inner loop is not "
|
|
446 "a simple for loop\n");
|
|
447 return false;
|
|
448 }
|
|
449 gcc_assert (gimple_omp_for_collapse (gfor) == grid->collapse);
|
|
450
|
|
451 if (!grid_inner_loop_gridifiable_p (gfor, grid))
|
|
452 return false;
|
|
453
|
|
454 return true;
|
|
455 }
|
|
456
|
|
457 /* Given an omp loop statement GFOR, return true if it can participate in
|
|
458 tiling gridification, i.e. in one where the distribute and parallel for
|
|
459 loops do not form a compound statement. GRID describes hitherto discovered
|
|
460 properties of the loop that is evaluated for possible gridification. */
|
|
461
|
|
462 static bool
|
|
463 grid_gfor_follows_tiling_pattern (gomp_for *gfor, grid_prop *grid)
|
|
464 {
|
|
465 if (gimple_omp_for_kind (gfor) != GF_OMP_FOR_KIND_FOR)
|
|
466 {
|
|
467 if (dump_enabled_p ())
|
|
468 {
|
|
469 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
|
|
470 GRID_MISSED_MSG_PREFIX "an inner loop is not "
|
|
471 "a simple for loop\n");
|
131
|
472 dump_printf_loc (MSG_NOTE, gfor,
|
111
|
473 "This statement is not a simple for loop\n");
|
|
474 }
|
|
475 return false;
|
|
476 }
|
|
477
|
|
478 if (!grid_inner_loop_gridifiable_p (gfor, grid))
|
|
479 return false;
|
|
480
|
|
481 if (gimple_omp_for_collapse (gfor) != grid->collapse)
|
|
482 {
|
|
483 if (dump_enabled_p ())
|
|
484 {
|
|
485 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
|
|
486 GRID_MISSED_MSG_PREFIX "an inner loop does not "
|
|
487 "have use the same collapse clause\n");
|
131
|
488 dump_printf_loc (MSG_NOTE, gfor,
|
111
|
489 "Loop construct uses a different collapse clause\n");
|
|
490 }
|
|
491 return false;
|
|
492 }
|
|
493
|
|
494 struct omp_for_data fd;
|
|
495 struct omp_for_data_loop *loops
|
|
496 = (struct omp_for_data_loop *)alloca (grid->collapse
|
|
497 * sizeof (struct omp_for_data_loop));
|
|
498 omp_extract_for_data (gfor, &fd, loops);
|
|
499 for (unsigned i = 0; i < grid->collapse; i++)
|
|
500 {
|
|
501 tree itype, type = TREE_TYPE (fd.loops[i].v);
|
|
502 if (POINTER_TYPE_P (type))
|
|
503 itype = signed_type_for (type);
|
|
504 else
|
|
505 itype = type;
|
|
506
|
|
507 tree n1 = fold_convert (itype, fd.loops[i].n1);
|
|
508 tree n2 = fold_convert (itype, fd.loops[i].n2);
|
|
509 tree t = build_int_cst (itype,
|
|
510 (fd.loops[i].cond_code == LT_EXPR ? -1 : 1));
|
|
511 t = fold_build2 (PLUS_EXPR, itype, fd.loops[i].step, t);
|
|
512 t = fold_build2 (PLUS_EXPR, itype, t, n2);
|
|
513 t = fold_build2 (MINUS_EXPR, itype, t, n1);
|
|
514 if (TYPE_UNSIGNED (itype) && fd.loops[i].cond_code == GT_EXPR)
|
|
515 t = fold_build2 (TRUNC_DIV_EXPR, itype,
|
|
516 fold_build1 (NEGATE_EXPR, itype, t),
|
|
517 fold_build1 (NEGATE_EXPR, itype, fd.loops[i].step));
|
|
518 else
|
|
519 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, fd.loops[i].step);
|
|
520
|
|
521 if (!operand_equal_p (grid->group_sizes[i], t, 0))
|
|
522 {
|
|
523 if (dump_enabled_p ())
|
|
524 {
|
|
525 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
|
|
526 GRID_MISSED_MSG_PREFIX "the distribute and "
|
|
527 "an internal loop do not agree on tile size\n");
|
131
|
528 dump_printf_loc (MSG_NOTE, gfor,
|
111
|
529 "Loop construct does not seem to loop over "
|
|
530 "a tile size\n");
|
|
531 }
|
|
532 return false;
|
|
533 }
|
|
534 }
|
|
535 return true;
|
|
536 }
|
|
537
|
|
538 /* Facing a call to FNDECL in the body of a distribute construct, return true
|
|
539 if we can handle it or false if it precludes gridification. */
|
|
540
|
|
541 static bool
|
|
542 grid_call_permissible_in_distribute_p (tree fndecl)
|
|
543 {
|
|
544 if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl))
|
|
545 return true;
|
|
546
|
|
547 const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
|
|
548 if (strstr (name, "omp_") != name)
|
|
549 return false;
|
|
550
|
|
551 if ((strcmp (name, "omp_get_thread_num") == 0)
|
|
552 || (strcmp (name, "omp_get_num_threads") == 0)
|
|
553 || (strcmp (name, "omp_get_num_teams") == 0)
|
|
554 || (strcmp (name, "omp_get_team_num") == 0)
|
|
555 || (strcmp (name, "omp_get_level") == 0)
|
|
556 || (strcmp (name, "omp_get_active_level") == 0)
|
|
557 || (strcmp (name, "omp_in_parallel") == 0))
|
|
558 return true;
|
|
559
|
|
560 return false;
|
|
561 }
|
|
562
|
|
563 /* Facing a call satisfying grid_call_permissible_in_distribute_p in the body
|
|
564 of a distribute construct that is pointed at by GSI, modify it as necessary
|
|
565 for gridification. If the statement itself got removed, return true. */
|
|
566
|
|
567 static bool
|
|
568 grid_handle_call_in_distribute (gimple_stmt_iterator *gsi)
|
|
569 {
|
|
570 gimple *stmt = gsi_stmt (*gsi);
|
|
571 tree fndecl = gimple_call_fndecl (stmt);
|
|
572 gcc_checking_assert (stmt);
|
|
573 if (DECL_PURE_P (fndecl) || TREE_READONLY (fndecl))
|
|
574 return false;
|
|
575
|
|
576 const char *name = IDENTIFIER_POINTER (DECL_NAME (fndecl));
|
|
577 if ((strcmp (name, "omp_get_thread_num") == 0)
|
|
578 || (strcmp (name, "omp_get_level") == 0)
|
|
579 || (strcmp (name, "omp_get_active_level") == 0)
|
|
580 || (strcmp (name, "omp_in_parallel") == 0))
|
|
581 {
|
|
582 tree lhs = gimple_call_lhs (stmt);
|
|
583 if (lhs)
|
|
584 {
|
|
585 gassign *assign
|
|
586 = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
|
|
587 gsi_insert_before (gsi, assign, GSI_SAME_STMT);
|
|
588 }
|
|
589 gsi_remove (gsi, true);
|
|
590 return true;
|
|
591 }
|
|
592
|
|
593 /* The rest of the omp functions can stay as they are, HSA back-end will
|
|
594 handle them correctly. */
|
|
595 gcc_checking_assert ((strcmp (name, "omp_get_num_threads") == 0)
|
|
596 || (strcmp (name, "omp_get_num_teams") == 0)
|
|
597 || (strcmp (name, "omp_get_team_num") == 0));
|
|
598 return false;
|
|
599 }
|
|
600
|
|
601 /* Given a sequence of statements within a distribute omp construct or a
|
|
602 parallel construct, which in the original source does not form a compound
|
|
603 construct with a looping construct, return true if it does not prevent us
|
|
604 from turning it into a gridified HSA kernel. Otherwise return false. GRID
|
|
605 describes hitherto discovered properties of the loop that is evaluated for
|
|
606 possible gridification. IN_PARALLEL must be true if seq is within a
|
|
607 parallel construct and flase if it is only within a distribute
|
|
608 construct. */
|
|
609
|
|
610 static bool
|
|
611 grid_dist_follows_tiling_pattern (gimple_seq seq, grid_prop *grid,
|
|
612 bool in_parallel)
|
|
613 {
|
|
614 gimple_stmt_iterator gsi;
|
|
615 for (gsi = gsi_start (seq); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
616 {
|
|
617 gimple *stmt = gsi_stmt (gsi);
|
|
618
|
|
619 if (grid_safe_assignment_p (stmt, grid)
|
|
620 || gimple_code (stmt) == GIMPLE_GOTO
|
|
621 || gimple_code (stmt) == GIMPLE_LABEL
|
|
622 || gimple_code (stmt) == GIMPLE_COND)
|
|
623 continue;
|
|
624 else if (gbind *bind = dyn_cast <gbind *> (stmt))
|
|
625 {
|
|
626 if (!grid_dist_follows_tiling_pattern (gimple_bind_body (bind),
|
|
627 grid, in_parallel))
|
|
628 return false;
|
|
629 continue;
|
|
630 }
|
|
631 else if (gtry *try_stmt = dyn_cast <gtry *> (stmt))
|
|
632 {
|
|
633 if (gimple_try_kind (try_stmt) == GIMPLE_TRY_CATCH)
|
|
634 {
|
|
635 if (dump_enabled_p ())
|
|
636 {
|
|
637 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
|
|
638 GRID_MISSED_MSG_PREFIX "the distribute "
|
|
639 "construct contains a try..catch region\n");
|
131
|
640 dump_printf_loc (MSG_NOTE, try_stmt,
|
111
|
641 "This statement cannot be analyzed for "
|
|
642 "tiled gridification\n");
|
|
643 }
|
|
644 return false;
|
|
645 }
|
|
646 if (!grid_dist_follows_tiling_pattern (gimple_try_eval (try_stmt),
|
|
647 grid, in_parallel))
|
|
648 return false;
|
|
649 if (!grid_dist_follows_tiling_pattern (gimple_try_cleanup (try_stmt),
|
|
650 grid, in_parallel))
|
|
651 return false;
|
|
652 continue;
|
|
653 }
|
|
654 else if (is_gimple_call (stmt))
|
|
655 {
|
|
656 tree fndecl = gimple_call_fndecl (stmt);
|
|
657 if (fndecl && grid_call_permissible_in_distribute_p (fndecl))
|
|
658 continue;
|
|
659
|
|
660 if (dump_enabled_p ())
|
|
661 {
|
|
662 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
|
|
663 GRID_MISSED_MSG_PREFIX "the distribute "
|
|
664 "construct contains a call\n");
|
131
|
665 dump_printf_loc (MSG_NOTE, stmt,
|
111
|
666 "This statement cannot be analyzed for "
|
|
667 "tiled gridification\n");
|
|
668 }
|
|
669 return false;
|
|
670 }
|
|
671 else if (gomp_parallel *par = dyn_cast <gomp_parallel *> (stmt))
|
|
672 {
|
|
673 if (in_parallel)
|
|
674 {
|
|
675 if (dump_enabled_p ())
|
|
676 {
|
|
677 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
|
|
678 GRID_MISSED_MSG_PREFIX "a parallel "
|
|
679 "construct contains another parallel "
|
|
680 "construct\n");
|
131
|
681 dump_printf_loc (MSG_NOTE, stmt,
|
111
|
682 "This parallel construct is nested in "
|
|
683 "another one\n");
|
|
684 }
|
|
685 return false;
|
|
686 }
|
|
687 if (!grid_parallel_clauses_gridifiable (par, grid->target_loc)
|
|
688 || !grid_dist_follows_tiling_pattern (gimple_omp_body (par),
|
|
689 grid, true))
|
|
690 return false;
|
|
691 }
|
|
692 else if (gomp_for *gfor = dyn_cast <gomp_for *> (stmt))
|
|
693 {
|
|
694 if (!in_parallel)
|
|
695 {
|
|
696 if (dump_enabled_p ())
|
|
697 {
|
|
698 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
|
|
699 GRID_MISSED_MSG_PREFIX "a loop "
|
|
700 "construct is not nested within a parallel "
|
|
701 "construct\n");
|
131
|
702 dump_printf_loc (MSG_NOTE, stmt,
|
111
|
703 "This loop construct is not nested in "
|
|
704 "a parallel construct\n");
|
|
705 }
|
|
706 return false;
|
|
707 }
|
|
708 if (!grid_gfor_follows_tiling_pattern (gfor, grid))
|
|
709 return false;
|
|
710 }
|
|
711 else
|
|
712 {
|
|
713 if (dump_enabled_p ())
|
|
714 {
|
|
715 dump_printf_loc (MSG_MISSED_OPTIMIZATION, grid->target_loc,
|
|
716 GRID_MISSED_MSG_PREFIX "the distribute "
|
|
717 "construct contains a complex statement\n");
|
131
|
718 dump_printf_loc (MSG_NOTE, stmt,
|
111
|
719 "This statement cannot be analyzed for "
|
|
720 "tiled gridification\n");
|
|
721 }
|
|
722 return false;
|
|
723 }
|
|
724 }
|
|
725 return true;
|
|
726 }
|
|
727
|
|
728 /* If TARGET follows a pattern that can be turned into a gridified HSA kernel,
|
|
729 return true, otherwise return false. In the case of success, also fill in
|
|
730 GRID with information describing the kernel grid. */
|
|
731
|
|
732 static bool
|
|
733 grid_target_follows_gridifiable_pattern (gomp_target *target, grid_prop *grid)
|
|
734 {
|
|
735 if (gimple_omp_target_kind (target) != GF_OMP_TARGET_KIND_REGION)
|
|
736 return false;
|
|
737
|
131
|
738 dump_user_location_t tloc = target;
|
111
|
739 grid->target_loc = tloc;
|
|
740 gimple *stmt
|
|
741 = grid_find_single_omp_among_assignments (gimple_omp_body (target),
|
|
742 grid, "target");
|
|
743 if (!stmt)
|
|
744 return false;
|
|
745 gomp_teams *teams = dyn_cast <gomp_teams *> (stmt);
|
|
746 tree group_size = NULL;
|
|
747 if (!teams)
|
|
748 {
|
145
|
749 if (dump_enabled_p ())
|
|
750 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
|
|
751 GRID_MISSED_MSG_PREFIX "it does not have a sole "
|
|
752 "teams construct in it.\n");
|
111
|
753 return false;
|
|
754 }
|
|
755
|
|
756 tree clauses = gimple_omp_teams_clauses (teams);
|
|
757 while (clauses)
|
|
758 {
|
|
759 switch (OMP_CLAUSE_CODE (clauses))
|
|
760 {
|
|
761 case OMP_CLAUSE_NUM_TEAMS:
|
|
762 if (dump_enabled_p ())
|
|
763 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
|
|
764 GRID_MISSED_MSG_PREFIX "the teams construct "
|
|
765 "contains a num_teams clause\n ");
|
|
766 return false;
|
|
767
|
|
768 case OMP_CLAUSE_REDUCTION:
|
|
769 if (dump_enabled_p ())
|
|
770 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
|
|
771 GRID_MISSED_MSG_PREFIX "a reduction "
|
|
772 "clause is present\n ");
|
|
773 return false;
|
|
774
|
|
775 case OMP_CLAUSE_THREAD_LIMIT:
|
|
776 if (!integer_zerop (OMP_CLAUSE_OPERAND (clauses, 0)))
|
|
777 group_size = OMP_CLAUSE_OPERAND (clauses, 0);
|
|
778 break;
|
|
779
|
|
780 default:
|
|
781 break;
|
|
782 }
|
|
783 clauses = OMP_CLAUSE_CHAIN (clauses);
|
|
784 }
|
|
785
|
|
786 stmt = grid_find_single_omp_among_assignments (gimple_omp_body (teams), grid,
|
|
787 "teams");
|
|
788 if (!stmt)
|
|
789 return false;
|
|
790 gomp_for *dist = dyn_cast <gomp_for *> (stmt);
|
|
791 if (!dist)
|
|
792 {
|
145
|
793 if (dump_enabled_p ())
|
|
794 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
|
|
795 GRID_MISSED_MSG_PREFIX "the teams construct does not "
|
|
796 "have a single distribute construct in it.\n");
|
111
|
797 return false;
|
|
798 }
|
|
799
|
|
800 gcc_assert (gimple_omp_for_kind (dist) == GF_OMP_FOR_KIND_DISTRIBUTE);
|
|
801
|
|
802 grid->collapse = gimple_omp_for_collapse (dist);
|
|
803 if (grid->collapse > 3)
|
|
804 {
|
|
805 if (dump_enabled_p ())
|
|
806 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
|
|
807 GRID_MISSED_MSG_PREFIX "the distribute construct "
|
|
808 "contains collapse clause with parameter greater "
|
|
809 "than 3\n");
|
|
810 return false;
|
|
811 }
|
|
812
|
|
813 struct omp_for_data fd;
|
|
814 struct omp_for_data_loop *dist_loops
|
|
815 = (struct omp_for_data_loop *)alloca (grid->collapse
|
|
816 * sizeof (struct omp_for_data_loop));
|
|
817 omp_extract_for_data (dist, &fd, dist_loops);
|
|
818 if (fd.chunk_size)
|
|
819 {
|
|
820 if (group_size && !operand_equal_p (group_size, fd.chunk_size, 0))
|
|
821 {
|
|
822 if (dump_enabled_p ())
|
|
823 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
|
|
824 GRID_MISSED_MSG_PREFIX "the teams "
|
|
825 "thread limit is different from distribute "
|
|
826 "schedule chunk\n");
|
|
827 return false;
|
|
828 }
|
|
829 group_size = fd.chunk_size;
|
|
830 }
|
|
831 if (group_size && grid->collapse > 1)
|
|
832 {
|
|
833 if (dump_enabled_p ())
|
|
834 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
|
|
835 GRID_MISSED_MSG_PREFIX "group size cannot be "
|
|
836 "set using thread_limit or schedule clauses "
|
|
837 "when also using a collapse clause greater than 1\n");
|
|
838 return false;
|
|
839 }
|
|
840
|
|
841 if (gimple_omp_for_combined_p (dist))
|
|
842 {
|
|
843 grid->tiling = false;
|
|
844 grid->group_sizes[0] = group_size;
|
|
845 for (unsigned i = 1; i < grid->collapse; i++)
|
|
846 grid->group_sizes[i] = NULL;
|
|
847 return grid_dist_follows_simple_pattern (dist, grid);
|
|
848 }
|
|
849 else
|
|
850 {
|
|
851 grid->tiling = true;
|
|
852 if (group_size)
|
|
853 {
|
|
854 if (dump_enabled_p ())
|
|
855 dump_printf_loc (MSG_MISSED_OPTIMIZATION, tloc,
|
|
856 GRID_MISSED_MSG_PREFIX "group size cannot be set "
|
|
857 "using thread_limit or schedule clauses when "
|
|
858 "distribute and loop constructs do not form "
|
|
859 "one combined construct\n");
|
|
860 return false;
|
|
861 }
|
|
862 for (unsigned i = 0; i < grid->collapse; i++)
|
|
863 {
|
|
864 if (fd.loops[i].cond_code == GT_EXPR)
|
|
865 grid->group_sizes[i] = fold_build1 (NEGATE_EXPR,
|
|
866 TREE_TYPE (fd.loops[i].step),
|
|
867 fd.loops[i].step);
|
|
868 else
|
|
869 grid->group_sizes[i] = fd.loops[i].step;
|
|
870 }
|
|
871 return grid_dist_follows_tiling_pattern (gimple_omp_body (dist), grid,
|
|
872 false);
|
|
873 }
|
|
874 }
|
|
875
|
|
876 /* Operand walker, used to remap pre-body declarations according to a hash map
|
|
877 provided in DATA. */
|
|
878
|
|
879 static tree
|
|
880 grid_remap_prebody_decls (tree *tp, int *walk_subtrees, void *data)
|
|
881 {
|
|
882 tree t = *tp;
|
|
883
|
|
884 if (DECL_P (t) || TYPE_P (t))
|
|
885 *walk_subtrees = 0;
|
|
886 else
|
|
887 *walk_subtrees = 1;
|
|
888
|
|
889 if (VAR_P (t))
|
|
890 {
|
|
891 struct walk_stmt_info *wi = (struct walk_stmt_info *) data;
|
|
892 hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info;
|
|
893 tree *repl = declmap->get (t);
|
|
894 if (repl)
|
|
895 *tp = *repl;
|
|
896 }
|
|
897 return NULL_TREE;
|
|
898 }
|
|
899
|
|
900 /* Identifiers of segments into which a particular variable should be places
|
|
901 when gridifying. */
|
|
902
|
|
903 enum grid_var_segment {GRID_SEGMENT_PRIVATE, GRID_SEGMENT_GROUP,
|
|
904 GRID_SEGMENT_GLOBAL};
|
|
905
|
|
906 /* Mark VAR so that it is eventually placed into SEGMENT. Place an artificial
|
|
907 builtin call into SEQ that will make sure the variable is always considered
|
|
908 address taken. */
|
|
909
|
|
910 static void
|
|
911 grid_mark_variable_segment (tree var, enum grid_var_segment segment)
|
|
912 {
|
|
913 /* Making a non-addressable variables would require that we re-gimplify all
|
|
914 their uses. Fortunately, we do not have to do this because if they are
|
|
915 not addressable, it means they are not used in atomic or parallel
|
|
916 statements and so relaxed GPU consistency rules mean we can just keep them
|
|
917 private. */
|
|
918 if (!TREE_ADDRESSABLE (var))
|
|
919 return;
|
|
920
|
|
921 switch (segment)
|
|
922 {
|
|
923 case GRID_SEGMENT_GROUP:
|
|
924 DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_group_segment"),
|
|
925 NULL, DECL_ATTRIBUTES (var));
|
|
926 break;
|
|
927 case GRID_SEGMENT_GLOBAL:
|
|
928 DECL_ATTRIBUTES (var) = tree_cons (get_identifier ("hsa_global_segment"),
|
|
929 NULL, DECL_ATTRIBUTES (var));
|
|
930 break;
|
|
931 default:
|
|
932 gcc_unreachable ();
|
|
933 }
|
|
934
|
|
935 if (!TREE_STATIC (var))
|
|
936 {
|
|
937 TREE_STATIC (var) = 1;
|
145
|
938 const char *prefix = IDENTIFIER_POINTER (DECL_NAME (var));
|
|
939 SET_DECL_ASSEMBLER_NAME (var, create_tmp_var_name (prefix));
|
111
|
940 varpool_node::finalize_decl (var);
|
|
941 }
|
|
942
|
|
943 }
|
|
944
|
|
945 /* Copy leading register-type assignments to local variables in SRC to just
|
|
946 before DST, Creating temporaries, adjusting mapping of operands in WI and
|
|
947 remapping operands as necessary. Add any new temporaries to TGT_BIND.
|
|
948 Return the first statement that does not conform to grid_safe_assignment_p
|
|
949 or NULL. If VAR_SEGMENT is not GRID_SEGMENT_PRIVATE, also mark all
|
|
950 variables in traversed bind statements so that they are put into the
|
|
951 appropriate segment. */
|
|
952
|
|
953 static gimple *
|
|
954 grid_copy_leading_local_assignments (gimple_seq src, gimple_stmt_iterator *dst,
|
|
955 gbind *tgt_bind,
|
|
956 enum grid_var_segment var_segment,
|
|
957 struct walk_stmt_info *wi)
|
|
958 {
|
|
959 hash_map<tree, tree> *declmap = (hash_map<tree, tree> *) wi->info;
|
|
960 gimple_stmt_iterator gsi;
|
|
961 for (gsi = gsi_start (src); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
962 {
|
|
963 gimple *stmt = gsi_stmt (gsi);
|
|
964 if (gbind *bind = dyn_cast <gbind *> (stmt))
|
|
965 {
|
|
966 gimple *r = grid_copy_leading_local_assignments
|
|
967 (gimple_bind_body (bind), dst, tgt_bind, var_segment, wi);
|
|
968
|
|
969 if (var_segment != GRID_SEGMENT_PRIVATE)
|
|
970 for (tree var = gimple_bind_vars (bind);
|
|
971 var;
|
|
972 var = DECL_CHAIN (var))
|
|
973 grid_mark_variable_segment (var, var_segment);
|
|
974 if (r)
|
|
975 return r;
|
|
976 else
|
|
977 continue;
|
|
978 }
|
|
979 if (!grid_safe_assignment_p (stmt, NULL))
|
|
980 return stmt;
|
|
981 tree lhs = gimple_assign_lhs (as_a <gassign *> (stmt));
|
|
982 tree repl = copy_var_decl (lhs, create_tmp_var_name (NULL),
|
|
983 TREE_TYPE (lhs));
|
|
984 DECL_CONTEXT (repl) = current_function_decl;
|
|
985 gimple_bind_append_vars (tgt_bind, repl);
|
|
986
|
|
987 declmap->put (lhs, repl);
|
|
988 gassign *copy = as_a <gassign *> (gimple_copy (stmt));
|
|
989 walk_gimple_op (copy, grid_remap_prebody_decls, wi);
|
|
990 gsi_insert_before (dst, copy, GSI_SAME_STMT);
|
|
991 }
|
|
992 return NULL;
|
|
993 }
|
|
994
|
|
995 /* Statement walker function to make adjustments to statements within the
|
|
996 gridifed kernel copy. */
|
|
997
|
|
998 static tree
|
|
999 grid_process_grid_body (gimple_stmt_iterator *gsi, bool *handled_ops_p,
|
|
1000 struct walk_stmt_info *)
|
|
1001 {
|
|
1002 *handled_ops_p = false;
|
|
1003 gimple *stmt = gsi_stmt (*gsi);
|
|
1004 if (gimple_code (stmt) == GIMPLE_OMP_FOR
|
145
|
1005 && gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_SIMD)
|
111
|
1006 {
|
|
1007 gomp_for *loop = as_a <gomp_for *> (stmt);
|
|
1008 tree clauses = gimple_omp_for_clauses (loop);
|
|
1009 tree cl = omp_find_clause (clauses, OMP_CLAUSE_SAFELEN);
|
|
1010 if (cl)
|
|
1011 OMP_CLAUSE_SAFELEN_EXPR (cl) = integer_one_node;
|
|
1012 else
|
|
1013 {
|
|
1014 tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_SAFELEN);
|
|
1015 OMP_CLAUSE_SAFELEN_EXPR (c) = integer_one_node;
|
|
1016 OMP_CLAUSE_CHAIN (c) = clauses;
|
|
1017 gimple_omp_for_set_clauses (loop, c);
|
|
1018 }
|
|
1019 }
|
|
1020 return NULL_TREE;
|
|
1021 }
|
|
1022
|
|
1023 /* Given a PARLOOP that is a normal for looping construct but also a part of a
|
|
1024 combined construct with a simd loop, eliminate the simd loop. */
|
|
1025
|
|
1026 static void
|
|
1027 grid_eliminate_combined_simd_part (gomp_for *parloop)
|
|
1028 {
|
|
1029 struct walk_stmt_info wi;
|
|
1030
|
|
1031 memset (&wi, 0, sizeof (wi));
|
|
1032 wi.val_only = true;
|
145
|
1033 enum gf_mask msk = GF_OMP_FOR_KIND_SIMD;
|
111
|
1034 wi.info = (void *) &msk;
|
|
1035 walk_gimple_seq (gimple_omp_body (parloop), omp_find_combined_for, NULL, &wi);
|
|
1036 gimple *stmt = (gimple *) wi.info;
|
|
1037 /* We expect that the SIMD id the only statement in the parallel loop. */
|
|
1038 gcc_assert (stmt
|
|
1039 && gimple_code (stmt) == GIMPLE_OMP_FOR
|
145
|
1040 && (gimple_omp_for_kind (stmt) == GF_OMP_FOR_KIND_SIMD)
|
111
|
1041 && gimple_omp_for_combined_into_p (stmt)
|
|
1042 && !gimple_omp_for_combined_p (stmt));
|
|
1043 gomp_for *simd = as_a <gomp_for *> (stmt);
|
|
1044
|
|
1045 /* Copy over the iteration properties because the body refers to the index in
|
|
1046 the bottmom-most loop. */
|
|
1047 unsigned i, collapse = gimple_omp_for_collapse (parloop);
|
|
1048 gcc_checking_assert (collapse == gimple_omp_for_collapse (simd));
|
|
1049 for (i = 0; i < collapse; i++)
|
|
1050 {
|
|
1051 gimple_omp_for_set_index (parloop, i, gimple_omp_for_index (simd, i));
|
|
1052 gimple_omp_for_set_initial (parloop, i, gimple_omp_for_initial (simd, i));
|
|
1053 gimple_omp_for_set_final (parloop, i, gimple_omp_for_final (simd, i));
|
|
1054 gimple_omp_for_set_incr (parloop, i, gimple_omp_for_incr (simd, i));
|
|
1055 }
|
|
1056
|
|
1057 tree *tgt= gimple_omp_for_clauses_ptr (parloop);
|
|
1058 while (*tgt)
|
|
1059 tgt = &OMP_CLAUSE_CHAIN (*tgt);
|
|
1060
|
145
|
1061 /* Copy over all clauses, except for linear clauses, which are turned into
|
|
1062 private clauses, and all other simd-specific clauses, which are
|
111
|
1063 ignored. */
|
|
1064 tree *pc = gimple_omp_for_clauses_ptr (simd);
|
|
1065 while (*pc)
|
|
1066 {
|
|
1067 tree c = *pc;
|
|
1068 switch (TREE_CODE (c))
|
|
1069 {
|
|
1070 case OMP_CLAUSE_LINEAR:
|
|
1071 {
|
|
1072 tree priv = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_PRIVATE);
|
|
1073 OMP_CLAUSE_DECL (priv) = OMP_CLAUSE_DECL (c);
|
|
1074 OMP_CLAUSE_CHAIN (priv) = NULL;
|
|
1075 *tgt = priv;
|
|
1076 tgt = &OMP_CLAUSE_CHAIN (priv);
|
|
1077 pc = &OMP_CLAUSE_CHAIN (c);
|
|
1078 break;
|
|
1079 }
|
|
1080
|
|
1081 case OMP_CLAUSE_SAFELEN:
|
|
1082 case OMP_CLAUSE_SIMDLEN:
|
|
1083 case OMP_CLAUSE_ALIGNED:
|
|
1084 pc = &OMP_CLAUSE_CHAIN (c);
|
|
1085 break;
|
|
1086
|
|
1087 default:
|
|
1088 *pc = OMP_CLAUSE_CHAIN (c);
|
|
1089 OMP_CLAUSE_CHAIN (c) = NULL;
|
|
1090 *tgt = c;
|
145
|
1091 tgt = &OMP_CLAUSE_CHAIN (c);
|
111
|
1092 break;
|
|
1093 }
|
|
1094 }
|
|
1095
|
|
1096 /* Finally, throw away the simd and mark the parallel loop as not
|
|
1097 combined. */
|
|
1098 gimple_omp_set_body (parloop, gimple_omp_body (simd));
|
|
1099 gimple_omp_for_set_combined_p (parloop, false);
|
|
1100 }
|
|
1101
|
|
1102 /* Statement walker function marking all parallels as grid_phony and loops as
|
|
1103 grid ones representing threads of a particular thread group. */
|
|
1104
|
|
1105 static tree
|
|
1106 grid_mark_tiling_loops (gimple_stmt_iterator *gsi, bool *handled_ops_p,
|
|
1107 struct walk_stmt_info *wi_in)
|
|
1108 {
|
|
1109 *handled_ops_p = false;
|
|
1110 if (gomp_for *loop = dyn_cast <gomp_for *> (gsi_stmt (*gsi)))
|
|
1111 {
|
|
1112 *handled_ops_p = true;
|
|
1113 gimple_omp_for_set_kind (loop, GF_OMP_FOR_KIND_GRID_LOOP);
|
|
1114 gimple_omp_for_set_grid_intra_group (loop, true);
|
|
1115 if (gimple_omp_for_combined_p (loop))
|
|
1116 grid_eliminate_combined_simd_part (loop);
|
|
1117
|
|
1118 struct walk_stmt_info body_wi;
|
|
1119 memset (&body_wi, 0, sizeof (body_wi));
|
|
1120 walk_gimple_seq_mod (gimple_omp_body_ptr (loop),
|
|
1121 grid_process_grid_body, NULL, &body_wi);
|
|
1122
|
|
1123 gbind *bind = (gbind *) wi_in->info;
|
|
1124 tree c;
|
|
1125 for (c = gimple_omp_for_clauses (loop); c; c = OMP_CLAUSE_CHAIN (c))
|
|
1126 if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_LASTPRIVATE)
|
|
1127 {
|
|
1128 push_gimplify_context ();
|
|
1129 tree ov = OMP_CLAUSE_DECL (c);
|
|
1130 tree gv = copy_var_decl (ov, create_tmp_var_name (NULL),
|
|
1131 TREE_TYPE (ov));
|
|
1132
|
|
1133 grid_mark_variable_segment (gv, GRID_SEGMENT_GROUP);
|
|
1134 DECL_CONTEXT (gv) = current_function_decl;
|
|
1135 gimple_bind_append_vars (bind, gv);
|
|
1136 tree x = lang_hooks.decls.omp_clause_assign_op (c, gv, ov);
|
|
1137 gimplify_and_add (x, &OMP_CLAUSE_LASTPRIVATE_GIMPLE_SEQ (c));
|
|
1138 x = lang_hooks.decls.omp_clause_copy_ctor (c, ov, gv);
|
|
1139 gimple_seq l = NULL;
|
|
1140 gimplify_and_add (x, &l);
|
|
1141 gsi_insert_seq_after (gsi, l, GSI_SAME_STMT);
|
|
1142 pop_gimplify_context (bind);
|
|
1143 }
|
|
1144 }
|
|
1145 return NULL_TREE;
|
|
1146 }
|
|
1147
|
|
1148 /* Statement walker function marking all parallels as grid_phony and loops as
|
|
1149 grid ones representing threads of a particular thread group. */
|
|
1150
|
|
1151 static tree
|
|
1152 grid_mark_tiling_parallels_and_loops (gimple_stmt_iterator *gsi,
|
|
1153 bool *handled_ops_p,
|
|
1154 struct walk_stmt_info *wi_in)
|
|
1155 {
|
|
1156 *handled_ops_p = false;
|
|
1157 wi_in->removed_stmt = false;
|
|
1158 gimple *stmt = gsi_stmt (*gsi);
|
|
1159 if (gbind *bind = dyn_cast <gbind *> (stmt))
|
|
1160 {
|
|
1161 for (tree var = gimple_bind_vars (bind); var; var = DECL_CHAIN (var))
|
|
1162 grid_mark_variable_segment (var, GRID_SEGMENT_GROUP);
|
|
1163 }
|
|
1164 else if (gomp_parallel *parallel = dyn_cast <gomp_parallel *> (stmt))
|
|
1165 {
|
|
1166 *handled_ops_p = true;
|
|
1167 gimple_omp_parallel_set_grid_phony (parallel, true);
|
|
1168
|
|
1169 gbind *new_bind = gimple_build_bind (NULL, NULL, make_node (BLOCK));
|
|
1170 gimple_bind_set_body (new_bind, gimple_omp_body (parallel));
|
|
1171 gimple_seq s = NULL;
|
|
1172 gimple_seq_add_stmt (&s, new_bind);
|
|
1173 gimple_omp_set_body (parallel, s);
|
|
1174
|
|
1175 struct walk_stmt_info wi_par;
|
|
1176 memset (&wi_par, 0, sizeof (wi_par));
|
|
1177 wi_par.info = new_bind;
|
|
1178 walk_gimple_seq_mod (gimple_bind_body_ptr (new_bind),
|
|
1179 grid_mark_tiling_loops, NULL, &wi_par);
|
|
1180 }
|
|
1181 else if (is_a <gcall *> (stmt))
|
|
1182 wi_in->removed_stmt = grid_handle_call_in_distribute (gsi);
|
|
1183 return NULL_TREE;
|
|
1184 }
|
|
1185
|
|
1186 /* Given freshly copied top level kernel SEQ, identify the individual OMP
|
|
1187 components, mark them as part of kernel, copy assignment leading to them
|
|
1188 just before DST, remapping them using WI and adding new temporaries to
|
|
1189 TGT_BIND, and and return the loop that will be used for kernel dispatch. */
|
|
1190
|
|
1191 static gomp_for *
|
|
1192 grid_process_kernel_body_copy (grid_prop *grid, gimple_seq seq,
|
|
1193 gimple_stmt_iterator *dst,
|
|
1194 gbind *tgt_bind, struct walk_stmt_info *wi)
|
|
1195 {
|
|
1196 gimple *stmt = grid_copy_leading_local_assignments (seq, dst, tgt_bind,
|
|
1197 GRID_SEGMENT_GLOBAL, wi);
|
|
1198 gomp_teams *teams = dyn_cast <gomp_teams *> (stmt);
|
|
1199 gcc_assert (teams);
|
|
1200 gimple_omp_teams_set_grid_phony (teams, true);
|
|
1201 stmt = grid_copy_leading_local_assignments (gimple_omp_body (teams), dst,
|
|
1202 tgt_bind, GRID_SEGMENT_GLOBAL,
|
|
1203 wi);
|
|
1204 gcc_checking_assert (stmt);
|
|
1205 gomp_for *dist = dyn_cast <gomp_for *> (stmt);
|
|
1206 gcc_assert (dist);
|
|
1207 gimple_seq prebody = gimple_omp_for_pre_body (dist);
|
|
1208 if (prebody)
|
|
1209 grid_copy_leading_local_assignments (prebody, dst, tgt_bind,
|
|
1210 GRID_SEGMENT_GROUP, wi);
|
|
1211
|
|
1212 if (grid->tiling)
|
|
1213 {
|
|
1214 gimple_omp_for_set_kind (dist, GF_OMP_FOR_KIND_GRID_LOOP);
|
|
1215 gimple_omp_for_set_grid_group_iter (dist, true);
|
|
1216
|
|
1217 struct walk_stmt_info wi_tiled;
|
|
1218 memset (&wi_tiled, 0, sizeof (wi_tiled));
|
|
1219 walk_gimple_seq_mod (gimple_omp_body_ptr (dist),
|
|
1220 grid_mark_tiling_parallels_and_loops, NULL,
|
|
1221 &wi_tiled);
|
|
1222 return dist;
|
|
1223 }
|
|
1224 else
|
|
1225 {
|
|
1226 gimple_omp_for_set_grid_phony (dist, true);
|
|
1227 stmt = grid_copy_leading_local_assignments (gimple_omp_body (dist), dst,
|
|
1228 tgt_bind,
|
|
1229 GRID_SEGMENT_PRIVATE, wi);
|
|
1230 gcc_checking_assert (stmt);
|
|
1231 gomp_parallel *parallel = as_a <gomp_parallel *> (stmt);
|
|
1232 gimple_omp_parallel_set_grid_phony (parallel, true);
|
|
1233 stmt = grid_copy_leading_local_assignments (gimple_omp_body (parallel),
|
|
1234 dst, tgt_bind,
|
|
1235 GRID_SEGMENT_PRIVATE, wi);
|
|
1236 gomp_for *inner_loop = as_a <gomp_for *> (stmt);
|
|
1237 gimple_omp_for_set_kind (inner_loop, GF_OMP_FOR_KIND_GRID_LOOP);
|
|
1238 prebody = gimple_omp_for_pre_body (inner_loop);
|
|
1239 if (prebody)
|
|
1240 grid_copy_leading_local_assignments (prebody, dst, tgt_bind,
|
|
1241 GRID_SEGMENT_PRIVATE, wi);
|
|
1242
|
|
1243 if (gimple_omp_for_combined_p (inner_loop))
|
|
1244 grid_eliminate_combined_simd_part (inner_loop);
|
131
|
1245 struct walk_stmt_info body_wi;
|
111
|
1246 memset (&body_wi, 0, sizeof (body_wi));
|
|
1247 walk_gimple_seq_mod (gimple_omp_body_ptr (inner_loop),
|
|
1248 grid_process_grid_body, NULL, &body_wi);
|
|
1249
|
|
1250 return inner_loop;
|
|
1251 }
|
|
1252 }
|
|
1253
|
|
1254 /* If TARGET points to a GOMP_TARGET which follows a gridifiable pattern,
|
|
1255 create a GPU kernel for it. GSI must point to the same statement, TGT_BIND
|
|
1256 is the bind into which temporaries inserted before TARGET should be
|
|
1257 added. */
|
|
1258
|
|
1259 static void
|
|
1260 grid_attempt_target_gridification (gomp_target *target,
|
|
1261 gimple_stmt_iterator *gsi,
|
|
1262 gbind *tgt_bind)
|
|
1263 {
|
|
1264 /* removed group_size */
|
131
|
1265 grid_prop grid = {};
|
111
|
1266 if (!target || !grid_target_follows_gridifiable_pattern (target, &grid))
|
|
1267 return;
|
|
1268
|
|
1269 location_t loc = gimple_location (target);
|
|
1270 if (dump_enabled_p ())
|
131
|
1271 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, target,
|
111
|
1272 "Target construct will be turned into a gridified HSA "
|
|
1273 "kernel\n");
|
|
1274
|
|
1275 /* Copy target body to a GPUKERNEL construct: */
|
|
1276 gimple_seq kernel_seq = copy_gimple_seq_and_replace_locals
|
|
1277 (gimple_omp_body (target));
|
|
1278
|
|
1279 hash_map<tree, tree> *declmap = new hash_map<tree, tree>;
|
|
1280 struct walk_stmt_info wi;
|
|
1281 memset (&wi, 0, sizeof (struct walk_stmt_info));
|
|
1282 wi.info = declmap;
|
|
1283
|
|
1284 /* Copy assignments in between OMP statements before target, mark OMP
|
|
1285 statements within copy appropriately. */
|
|
1286 gomp_for *inner_loop = grid_process_kernel_body_copy (&grid, kernel_seq, gsi,
|
|
1287 tgt_bind, &wi);
|
|
1288
|
|
1289 gbind *old_bind
|
|
1290 = as_a <gbind *> (gimple_seq_first (gimple_omp_body (target)));
|
|
1291 gbind *new_bind = as_a <gbind *> (gimple_seq_first (kernel_seq));
|
|
1292 tree new_block = gimple_bind_block (new_bind);
|
|
1293 tree enc_block = BLOCK_SUPERCONTEXT (gimple_bind_block (old_bind));
|
|
1294 BLOCK_CHAIN (new_block) = BLOCK_SUBBLOCKS (enc_block);
|
|
1295 BLOCK_SUBBLOCKS (enc_block) = new_block;
|
|
1296 BLOCK_SUPERCONTEXT (new_block) = enc_block;
|
|
1297 gimple *gpukernel = gimple_build_omp_grid_body (kernel_seq);
|
|
1298 gimple_seq_add_stmt
|
|
1299 (gimple_bind_body_ptr (as_a <gbind *> (gimple_omp_body (target))),
|
|
1300 gpukernel);
|
|
1301
|
|
1302 for (size_t i = 0; i < grid.collapse; i++)
|
|
1303 walk_tree (&grid.group_sizes[i], grid_remap_prebody_decls, &wi, NULL);
|
|
1304 push_gimplify_context ();
|
|
1305 for (size_t i = 0; i < grid.collapse; i++)
|
|
1306 {
|
145
|
1307 tree index_var = gimple_omp_for_index (inner_loop, i);
|
|
1308 tree itype, type = TREE_TYPE (index_var);
|
111
|
1309 if (POINTER_TYPE_P (type))
|
|
1310 itype = signed_type_for (type);
|
|
1311 else
|
|
1312 itype = type;
|
|
1313
|
|
1314 enum tree_code cond_code = gimple_omp_for_cond (inner_loop, i);
|
|
1315 tree n1 = unshare_expr (gimple_omp_for_initial (inner_loop, i));
|
|
1316 walk_tree (&n1, grid_remap_prebody_decls, &wi, NULL);
|
|
1317 tree n2 = unshare_expr (gimple_omp_for_final (inner_loop, i));
|
|
1318 walk_tree (&n2, grid_remap_prebody_decls, &wi, NULL);
|
145
|
1319 tree step
|
|
1320 = omp_get_for_step_from_incr (loc, gimple_omp_for_incr (inner_loop, i));
|
|
1321 omp_adjust_for_condition (loc, &cond_code, &n2, index_var, step);
|
111
|
1322 n1 = fold_convert (itype, n1);
|
|
1323 n2 = fold_convert (itype, n2);
|
|
1324
|
131
|
1325 tree cond = fold_build2 (cond_code, boolean_type_node, n1, n2);
|
111
|
1326
|
|
1327 tree t = build_int_cst (itype, (cond_code == LT_EXPR ? -1 : 1));
|
|
1328 t = fold_build2 (PLUS_EXPR, itype, step, t);
|
|
1329 t = fold_build2 (PLUS_EXPR, itype, t, n2);
|
|
1330 t = fold_build2 (MINUS_EXPR, itype, t, n1);
|
|
1331 if (TYPE_UNSIGNED (itype) && cond_code == GT_EXPR)
|
|
1332 t = fold_build2 (TRUNC_DIV_EXPR, itype,
|
|
1333 fold_build1 (NEGATE_EXPR, itype, t),
|
|
1334 fold_build1 (NEGATE_EXPR, itype, step));
|
|
1335 else
|
|
1336 t = fold_build2 (TRUNC_DIV_EXPR, itype, t, step);
|
131
|
1337 t = fold_build3 (COND_EXPR, itype, cond, t, build_zero_cst (itype));
|
111
|
1338 if (grid.tiling)
|
|
1339 {
|
|
1340 if (cond_code == GT_EXPR)
|
|
1341 step = fold_build1 (NEGATE_EXPR, itype, step);
|
|
1342 t = fold_build2 (MULT_EXPR, itype, t, step);
|
|
1343 }
|
|
1344
|
|
1345 tree gs = fold_convert (uint32_type_node, t);
|
|
1346 gimple_seq tmpseq = NULL;
|
|
1347 gimplify_expr (&gs, &tmpseq, NULL, is_gimple_val, fb_rvalue);
|
|
1348 if (!gimple_seq_empty_p (tmpseq))
|
|
1349 gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT);
|
|
1350
|
|
1351 tree ws;
|
|
1352 if (grid.group_sizes[i])
|
|
1353 {
|
|
1354 ws = fold_convert (uint32_type_node, grid.group_sizes[i]);
|
|
1355 tmpseq = NULL;
|
|
1356 gimplify_expr (&ws, &tmpseq, NULL, is_gimple_val, fb_rvalue);
|
|
1357 if (!gimple_seq_empty_p (tmpseq))
|
|
1358 gsi_insert_seq_before (gsi, tmpseq, GSI_SAME_STMT);
|
|
1359 }
|
|
1360 else
|
|
1361 ws = build_zero_cst (uint32_type_node);
|
|
1362
|
|
1363 tree c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE__GRIDDIM_);
|
|
1364 OMP_CLAUSE__GRIDDIM__DIMENSION (c) = i;
|
|
1365 OMP_CLAUSE__GRIDDIM__SIZE (c) = gs;
|
|
1366 OMP_CLAUSE__GRIDDIM__GROUP (c) = ws;
|
|
1367 OMP_CLAUSE_CHAIN (c) = gimple_omp_target_clauses (target);
|
|
1368 gimple_omp_target_set_clauses (target, c);
|
|
1369 }
|
|
1370 pop_gimplify_context (tgt_bind);
|
|
1371 delete declmap;
|
|
1372 return;
|
|
1373 }
|
|
1374
|
|
1375 /* Walker function doing all the work for create_target_kernels. */
|
|
1376
|
|
1377 static tree
|
|
1378 grid_gridify_all_targets_stmt (gimple_stmt_iterator *gsi,
|
|
1379 bool *handled_ops_p,
|
|
1380 struct walk_stmt_info *incoming)
|
|
1381 {
|
|
1382 *handled_ops_p = false;
|
|
1383
|
|
1384 gimple *stmt = gsi_stmt (*gsi);
|
|
1385 gomp_target *target = dyn_cast <gomp_target *> (stmt);
|
|
1386 if (target)
|
|
1387 {
|
|
1388 gbind *tgt_bind = (gbind *) incoming->info;
|
|
1389 gcc_checking_assert (tgt_bind);
|
|
1390 grid_attempt_target_gridification (target, gsi, tgt_bind);
|
|
1391 return NULL_TREE;
|
|
1392 }
|
|
1393 gbind *bind = dyn_cast <gbind *> (stmt);
|
|
1394 if (bind)
|
|
1395 {
|
|
1396 *handled_ops_p = true;
|
|
1397 struct walk_stmt_info wi;
|
|
1398 memset (&wi, 0, sizeof (wi));
|
|
1399 wi.info = bind;
|
|
1400 walk_gimple_seq_mod (gimple_bind_body_ptr (bind),
|
|
1401 grid_gridify_all_targets_stmt, NULL, &wi);
|
|
1402 }
|
|
1403 return NULL_TREE;
|
|
1404 }
|
|
1405
|
|
1406 /* Attempt to gridify all target constructs in BODY_P. All such targets will
|
|
1407 have their bodies duplicated, with the new copy being put into a
|
|
1408 gimple_omp_grid_body statement. All kernel-related construct within the
|
|
1409 grid_body will be marked with phony flags or kernel kinds. Moreover, some
|
|
1410 re-structuring is often needed, such as copying pre-bodies before the target
|
|
1411 construct so that kernel grid sizes can be computed. */
|
|
1412
|
|
1413 void
|
|
1414 omp_grid_gridify_all_targets (gimple_seq *body_p)
|
|
1415 {
|
|
1416 struct walk_stmt_info wi;
|
|
1417 memset (&wi, 0, sizeof (wi));
|
|
1418 walk_gimple_seq_mod (body_p, grid_gridify_all_targets_stmt, NULL, &wi);
|
|
1419 }
|