145
|
1 /* Copyright (C) 2017-2020 Free Software Foundation, Inc.
|
|
2
|
|
3 This file is part of GCC.
|
|
4
|
|
5 GCC is free software; you can redistribute it and/or modify it under
|
|
6 the terms of the GNU General Public License as published by the Free
|
|
7 Software Foundation; either version 3, or (at your option) any later
|
|
8 version.
|
|
9
|
|
10 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
11 WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
12 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
13 for more details.
|
|
14
|
|
15 You should have received a copy of the GNU General Public License
|
|
16 along with GCC; see the file COPYING3. If not see
|
|
17 <http://www.gnu.org/licenses/>. */
|
|
18
|
|
19 /* {{{ Includes. */
|
|
20
|
|
21 #include "config.h"
|
|
22 #include "system.h"
|
|
23 #include "coretypes.h"
|
|
24 #include "backend.h"
|
|
25 #include "target.h"
|
|
26 #include "tree.h"
|
|
27 #include "gimple.h"
|
|
28 #include "tree-pass.h"
|
|
29 #include "gimple-iterator.h"
|
|
30 #include "cfghooks.h"
|
|
31 #include "cfgloop.h"
|
|
32 #include "tm_p.h"
|
|
33 #include "stringpool.h"
|
|
34 #include "fold-const.h"
|
|
35 #include "varasm.h"
|
|
36 #include "omp-low.h"
|
|
37 #include "omp-general.h"
|
|
38 #include "internal-fn.h"
|
|
39 #include "tree-vrp.h"
|
|
40 #include "tree-ssanames.h"
|
|
41 #include "tree-ssa-operands.h"
|
|
42 #include "gimplify.h"
|
|
43 #include "tree-phinodes.h"
|
|
44 #include "cgraph.h"
|
|
45 #include "targhooks.h"
|
|
46 #include "langhooks-def.h"
|
|
47
|
|
48 /* }}} */
|
|
49 /* {{{ OMP GCN pass.
|
|
50
|
|
51 This pass is intended to make any GCN-specfic transformations to OpenMP
|
|
52 target regions.
|
|
53
|
|
54 At present, its only purpose is to convert some "omp" built-in functions
|
|
55 to use closer-to-the-metal "gcn" built-in functions. */
|
|
56
|
|
57 unsigned int
|
|
58 execute_omp_gcn (void)
|
|
59 {
|
|
60 tree thr_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_THREAD_NUM);
|
|
61 tree thr_num_id = DECL_NAME (thr_num_tree);
|
|
62 tree team_num_tree = builtin_decl_explicit (BUILT_IN_OMP_GET_TEAM_NUM);
|
|
63 tree team_num_id = DECL_NAME (team_num_tree);
|
|
64 basic_block bb;
|
|
65 gimple_stmt_iterator gsi;
|
|
66 unsigned int todo = 0;
|
|
67
|
|
68 FOR_EACH_BB_FN (bb, cfun)
|
|
69 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
70 {
|
|
71 gimple *call = gsi_stmt (gsi);
|
|
72 tree decl;
|
|
73
|
|
74 if (is_gimple_call (call) && (decl = gimple_call_fndecl (call)))
|
|
75 {
|
|
76 tree decl_id = DECL_NAME (decl);
|
|
77 tree lhs = gimple_get_lhs (call);
|
|
78
|
|
79 if (decl_id == thr_num_id)
|
|
80 {
|
|
81 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
82 fprintf (dump_file,
|
|
83 "Replace '%s' with __builtin_gcn_dim_pos.\n",
|
|
84 IDENTIFIER_POINTER (decl_id));
|
|
85
|
|
86 /* Transform this:
|
|
87 lhs = __builtin_omp_get_thread_num ()
|
|
88 to this:
|
|
89 lhs = __builtin_gcn_dim_pos (1) */
|
|
90 tree fn = targetm.builtin_decl (GCN_BUILTIN_OMP_DIM_POS, 0);
|
|
91 tree fnarg = build_int_cst (unsigned_type_node, 1);
|
|
92 gimple *stmt = gimple_build_call (fn, 1, fnarg);
|
|
93 gimple_call_set_lhs (stmt, lhs);
|
|
94 gsi_replace (&gsi, stmt, true);
|
|
95
|
|
96 todo |= TODO_update_ssa;
|
|
97 }
|
|
98 else if (decl_id == team_num_id)
|
|
99 {
|
|
100 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
101 fprintf (dump_file,
|
|
102 "Replace '%s' with __builtin_gcn_dim_pos.\n",
|
|
103 IDENTIFIER_POINTER (decl_id));
|
|
104
|
|
105 /* Transform this:
|
|
106 lhs = __builtin_omp_get_team_num ()
|
|
107 to this:
|
|
108 lhs = __builtin_gcn_dim_pos (0) */
|
|
109 tree fn = targetm.builtin_decl (GCN_BUILTIN_OMP_DIM_POS, 0);
|
|
110 tree fnarg = build_zero_cst (unsigned_type_node);
|
|
111 gimple *stmt = gimple_build_call (fn, 1, fnarg);
|
|
112 gimple_call_set_lhs (stmt, lhs);
|
|
113 gsi_replace (&gsi, stmt, true);
|
|
114
|
|
115 todo |= TODO_update_ssa;
|
|
116 }
|
|
117 }
|
|
118 }
|
|
119
|
|
120 return todo;
|
|
121 }
|
|
122
|
|
123 namespace
|
|
124 {
|
|
125
|
|
126 const pass_data pass_data_omp_gcn = {
|
|
127 GIMPLE_PASS,
|
|
128 "omp_gcn", /* name */
|
|
129 OPTGROUP_NONE, /* optinfo_flags */
|
|
130 TV_NONE, /* tv_id */
|
|
131 0, /* properties_required */
|
|
132 0, /* properties_provided */
|
|
133 0, /* properties_destroyed */
|
|
134 0, /* todo_flags_start */
|
|
135 TODO_df_finish, /* todo_flags_finish */
|
|
136 };
|
|
137
|
|
138 class pass_omp_gcn : public gimple_opt_pass
|
|
139 {
|
|
140 public:
|
|
141 pass_omp_gcn (gcc::context *ctxt)
|
|
142 : gimple_opt_pass (pass_data_omp_gcn, ctxt)
|
|
143 {
|
|
144 }
|
|
145
|
|
146 /* opt_pass methods: */
|
|
147 virtual bool gate (function *)
|
|
148 {
|
|
149 return flag_openmp;
|
|
150 }
|
|
151
|
|
152 virtual unsigned int execute (function *)
|
|
153 {
|
|
154 return execute_omp_gcn ();
|
|
155 }
|
|
156
|
|
157 }; /* class pass_omp_gcn. */
|
|
158
|
|
159 } /* anon namespace. */
|
|
160
|
|
161 gimple_opt_pass *
|
|
162 make_pass_omp_gcn (gcc::context *ctxt)
|
|
163 {
|
|
164 return new pass_omp_gcn (ctxt);
|
|
165 }
|
|
166
|
|
167 /* }}} */
|
|
168 /* {{{ OpenACC reductions. */
|
|
169
|
|
170 /* Global lock variable, needed for 128bit worker & gang reductions. */
|
|
171
|
|
172 static GTY(()) tree global_lock_var;
|
|
173
|
|
174 /* Lazily generate the global_lock_var decl and return its address. */
|
|
175
|
|
176 static tree
|
|
177 gcn_global_lock_addr ()
|
|
178 {
|
|
179 tree v = global_lock_var;
|
|
180
|
|
181 if (!v)
|
|
182 {
|
|
183 tree name = get_identifier ("__reduction_lock");
|
|
184 tree type = build_qualified_type (unsigned_type_node,
|
|
185 TYPE_QUAL_VOLATILE);
|
|
186 v = build_decl (BUILTINS_LOCATION, VAR_DECL, name, type);
|
|
187 global_lock_var = v;
|
|
188 DECL_ARTIFICIAL (v) = 1;
|
|
189 DECL_EXTERNAL (v) = 1;
|
|
190 TREE_STATIC (v) = 1;
|
|
191 TREE_PUBLIC (v) = 1;
|
|
192 TREE_USED (v) = 1;
|
|
193 mark_addressable (v);
|
|
194 mark_decl_referenced (v);
|
|
195 }
|
|
196
|
|
197 return build_fold_addr_expr (v);
|
|
198 }
|
|
199
|
|
200 /* Helper function for gcn_reduction_update.
|
|
201
|
|
202 Insert code to locklessly update *PTR with *PTR OP VAR just before
|
|
203 GSI. We use a lockless scheme for nearly all case, which looks
|
|
204 like:
|
|
205 actual = initval (OP);
|
|
206 do {
|
|
207 guess = actual;
|
|
208 write = guess OP myval;
|
|
209 actual = cmp&swap (ptr, guess, write)
|
|
210 } while (actual bit-different-to guess);
|
|
211 return write;
|
|
212
|
|
213 This relies on a cmp&swap instruction, which is available for 32- and
|
|
214 64-bit types. Larger types must use a locking scheme. */
|
|
215
|
|
216 static tree
|
|
217 gcn_lockless_update (location_t loc, gimple_stmt_iterator *gsi,
|
|
218 tree ptr, tree var, tree_code op)
|
|
219 {
|
|
220 unsigned fn = GCN_BUILTIN_CMP_SWAP;
|
|
221 tree_code code = NOP_EXPR;
|
|
222 tree arg_type = unsigned_type_node;
|
|
223 tree var_type = TREE_TYPE (var);
|
|
224
|
|
225 if (TREE_CODE (var_type) == COMPLEX_TYPE
|
|
226 || TREE_CODE (var_type) == REAL_TYPE)
|
|
227 code = VIEW_CONVERT_EXPR;
|
|
228
|
|
229 if (TYPE_SIZE (var_type) == TYPE_SIZE (long_long_unsigned_type_node))
|
|
230 {
|
|
231 arg_type = long_long_unsigned_type_node;
|
|
232 fn = GCN_BUILTIN_CMP_SWAPLL;
|
|
233 }
|
|
234
|
|
235 tree swap_fn = gcn_builtin_decl (fn, true);
|
|
236
|
|
237 gimple_seq init_seq = NULL;
|
|
238 tree init_var = make_ssa_name (arg_type);
|
|
239 tree init_expr = omp_reduction_init_op (loc, op, var_type);
|
|
240 init_expr = fold_build1 (code, arg_type, init_expr);
|
|
241 gimplify_assign (init_var, init_expr, &init_seq);
|
|
242 gimple *init_end = gimple_seq_last (init_seq);
|
|
243
|
|
244 gsi_insert_seq_before (gsi, init_seq, GSI_SAME_STMT);
|
|
245
|
|
246 /* Split the block just after the init stmts. */
|
|
247 basic_block pre_bb = gsi_bb (*gsi);
|
|
248 edge pre_edge = split_block (pre_bb, init_end);
|
|
249 basic_block loop_bb = pre_edge->dest;
|
|
250 pre_bb = pre_edge->src;
|
|
251 /* Reset the iterator. */
|
|
252 *gsi = gsi_for_stmt (gsi_stmt (*gsi));
|
|
253
|
|
254 tree expect_var = make_ssa_name (arg_type);
|
|
255 tree actual_var = make_ssa_name (arg_type);
|
|
256 tree write_var = make_ssa_name (arg_type);
|
|
257
|
|
258 /* Build and insert the reduction calculation. */
|
|
259 gimple_seq red_seq = NULL;
|
|
260 tree write_expr = fold_build1 (code, var_type, expect_var);
|
|
261 write_expr = fold_build2 (op, var_type, write_expr, var);
|
|
262 write_expr = fold_build1 (code, arg_type, write_expr);
|
|
263 gimplify_assign (write_var, write_expr, &red_seq);
|
|
264
|
|
265 gsi_insert_seq_before (gsi, red_seq, GSI_SAME_STMT);
|
|
266
|
|
267 /* Build & insert the cmp&swap sequence. */
|
|
268 gimple_seq latch_seq = NULL;
|
|
269 tree swap_expr = build_call_expr_loc (loc, swap_fn, 3,
|
|
270 ptr, expect_var, write_var);
|
|
271 gimplify_assign (actual_var, swap_expr, &latch_seq);
|
|
272
|
|
273 gcond *cond = gimple_build_cond (EQ_EXPR, actual_var, expect_var,
|
|
274 NULL_TREE, NULL_TREE);
|
|
275 gimple_seq_add_stmt (&latch_seq, cond);
|
|
276
|
|
277 gimple *latch_end = gimple_seq_last (latch_seq);
|
|
278 gsi_insert_seq_before (gsi, latch_seq, GSI_SAME_STMT);
|
|
279
|
|
280 /* Split the block just after the latch stmts. */
|
|
281 edge post_edge = split_block (loop_bb, latch_end);
|
|
282 basic_block post_bb = post_edge->dest;
|
|
283 loop_bb = post_edge->src;
|
|
284 *gsi = gsi_for_stmt (gsi_stmt (*gsi));
|
|
285
|
|
286 post_edge->flags ^= EDGE_TRUE_VALUE | EDGE_FALLTHRU;
|
|
287 /* post_edge->probability = profile_probability::even (); */
|
|
288 edge loop_edge = make_edge (loop_bb, loop_bb, EDGE_FALSE_VALUE);
|
|
289 /* loop_edge->probability = profile_probability::even (); */
|
|
290 set_immediate_dominator (CDI_DOMINATORS, loop_bb, pre_bb);
|
|
291 set_immediate_dominator (CDI_DOMINATORS, post_bb, loop_bb);
|
|
292
|
|
293 gphi *phi = create_phi_node (expect_var, loop_bb);
|
|
294 add_phi_arg (phi, init_var, pre_edge, loc);
|
|
295 add_phi_arg (phi, actual_var, loop_edge, loc);
|
|
296
|
|
297 loop *loop = alloc_loop ();
|
|
298 loop->header = loop_bb;
|
|
299 loop->latch = loop_bb;
|
|
300 add_loop (loop, loop_bb->loop_father);
|
|
301
|
|
302 return fold_build1 (code, var_type, write_var);
|
|
303 }
|
|
304
|
|
305 /* Helper function for gcn_reduction_update.
|
|
306
|
|
307 Insert code to lockfully update *PTR with *PTR OP VAR just before
|
|
308 GSI. This is necessary for types larger than 64 bits, where there
|
|
309 is no cmp&swap instruction to implement a lockless scheme. We use
|
|
310 a lock variable in global memory.
|
|
311
|
|
312 while (cmp&swap (&lock_var, 0, 1))
|
|
313 continue;
|
|
314 T accum = *ptr;
|
|
315 accum = accum OP var;
|
|
316 *ptr = accum;
|
|
317 cmp&swap (&lock_var, 1, 0);
|
|
318 return accum;
|
|
319
|
|
320 A lock in global memory is necessary to force execution engine
|
|
321 descheduling and avoid resource starvation that can occur if the
|
|
322 lock is in shared memory. */
|
|
323
|
|
324 static tree
|
|
325 gcn_lockfull_update (location_t loc, gimple_stmt_iterator *gsi,
|
|
326 tree ptr, tree var, tree_code op)
|
|
327 {
|
|
328 tree var_type = TREE_TYPE (var);
|
|
329 tree swap_fn = gcn_builtin_decl (GCN_BUILTIN_CMP_SWAP, true);
|
|
330 tree uns_unlocked = build_int_cst (unsigned_type_node, 0);
|
|
331 tree uns_locked = build_int_cst (unsigned_type_node, 1);
|
|
332
|
|
333 /* Split the block just before the gsi. Insert a gimple nop to make
|
|
334 this easier. */
|
|
335 gimple *nop = gimple_build_nop ();
|
|
336 gsi_insert_before (gsi, nop, GSI_SAME_STMT);
|
|
337 basic_block entry_bb = gsi_bb (*gsi);
|
|
338 edge entry_edge = split_block (entry_bb, nop);
|
|
339 basic_block lock_bb = entry_edge->dest;
|
|
340 /* Reset the iterator. */
|
|
341 *gsi = gsi_for_stmt (gsi_stmt (*gsi));
|
|
342
|
|
343 /* Build and insert the locking sequence. */
|
|
344 gimple_seq lock_seq = NULL;
|
|
345 tree lock_var = make_ssa_name (unsigned_type_node);
|
|
346 tree lock_expr = gcn_global_lock_addr ();
|
|
347 lock_expr = build_call_expr_loc (loc, swap_fn, 3, lock_expr,
|
|
348 uns_unlocked, uns_locked);
|
|
349 gimplify_assign (lock_var, lock_expr, &lock_seq);
|
|
350 gcond *cond = gimple_build_cond (EQ_EXPR, lock_var, uns_unlocked,
|
|
351 NULL_TREE, NULL_TREE);
|
|
352 gimple_seq_add_stmt (&lock_seq, cond);
|
|
353 gimple *lock_end = gimple_seq_last (lock_seq);
|
|
354 gsi_insert_seq_before (gsi, lock_seq, GSI_SAME_STMT);
|
|
355
|
|
356 /* Split the block just after the lock sequence. */
|
|
357 edge locked_edge = split_block (lock_bb, lock_end);
|
|
358 basic_block update_bb = locked_edge->dest;
|
|
359 lock_bb = locked_edge->src;
|
|
360 *gsi = gsi_for_stmt (gsi_stmt (*gsi));
|
|
361
|
|
362 /* Create the lock loop. */
|
|
363 locked_edge->flags ^= EDGE_TRUE_VALUE | EDGE_FALLTHRU;
|
|
364 locked_edge->probability = profile_probability::even ();
|
|
365 edge loop_edge = make_edge (lock_bb, lock_bb, EDGE_FALSE_VALUE);
|
|
366 loop_edge->probability = profile_probability::even ();
|
|
367 set_immediate_dominator (CDI_DOMINATORS, lock_bb, entry_bb);
|
|
368 set_immediate_dominator (CDI_DOMINATORS, update_bb, lock_bb);
|
|
369
|
|
370 /* Create the loop structure. */
|
|
371 loop *lock_loop = alloc_loop ();
|
|
372 lock_loop->header = lock_bb;
|
|
373 lock_loop->latch = lock_bb;
|
|
374 lock_loop->nb_iterations_estimate = 1;
|
|
375 lock_loop->any_estimate = true;
|
|
376 add_loop (lock_loop, entry_bb->loop_father);
|
|
377
|
|
378 /* Build and insert the reduction calculation. */
|
|
379 gimple_seq red_seq = NULL;
|
|
380 tree acc_in = make_ssa_name (var_type);
|
|
381 tree ref_in = build_simple_mem_ref (ptr);
|
|
382 TREE_THIS_VOLATILE (ref_in) = 1;
|
|
383 gimplify_assign (acc_in, ref_in, &red_seq);
|
|
384
|
|
385 tree acc_out = make_ssa_name (var_type);
|
|
386 tree update_expr = fold_build2 (op, var_type, ref_in, var);
|
|
387 gimplify_assign (acc_out, update_expr, &red_seq);
|
|
388
|
|
389 tree ref_out = build_simple_mem_ref (ptr);
|
|
390 TREE_THIS_VOLATILE (ref_out) = 1;
|
|
391 gimplify_assign (ref_out, acc_out, &red_seq);
|
|
392
|
|
393 gsi_insert_seq_before (gsi, red_seq, GSI_SAME_STMT);
|
|
394
|
|
395 /* Build & insert the unlock sequence. */
|
|
396 gimple_seq unlock_seq = NULL;
|
|
397 tree unlock_expr = gcn_global_lock_addr ();
|
|
398 unlock_expr = build_call_expr_loc (loc, swap_fn, 3, unlock_expr,
|
|
399 uns_locked, uns_unlocked);
|
|
400 gimplify_and_add (unlock_expr, &unlock_seq);
|
|
401 gsi_insert_seq_before (gsi, unlock_seq, GSI_SAME_STMT);
|
|
402
|
|
403 return acc_out;
|
|
404 }
|
|
405
|
|
406 /* Emit a sequence to update a reduction accumulator at *PTR with the
|
|
407 value held in VAR using operator OP. Return the updated value.
|
|
408
|
|
409 TODO: optimize for atomic ops and independent complex ops. */
|
|
410
|
|
411 static tree
|
|
412 gcn_reduction_update (location_t loc, gimple_stmt_iterator *gsi,
|
|
413 tree ptr, tree var, tree_code op)
|
|
414 {
|
|
415 tree type = TREE_TYPE (var);
|
|
416 tree size = TYPE_SIZE (type);
|
|
417
|
|
418 if (size == TYPE_SIZE (unsigned_type_node)
|
|
419 || size == TYPE_SIZE (long_long_unsigned_type_node))
|
|
420 return gcn_lockless_update (loc, gsi, ptr, var, op);
|
|
421 else
|
|
422 return gcn_lockfull_update (loc, gsi, ptr, var, op);
|
|
423 }
|
|
424
|
|
425 /* Return a temporary variable decl to use for an OpenACC worker reduction. */
|
|
426
|
|
427 static tree
|
|
428 gcn_goacc_get_worker_red_decl (tree type, unsigned offset)
|
|
429 {
|
|
430 machine_function *machfun = cfun->machine;
|
|
431 tree existing_decl;
|
|
432
|
|
433 if (TREE_CODE (type) == REFERENCE_TYPE)
|
|
434 type = TREE_TYPE (type);
|
|
435
|
|
436 tree var_type
|
|
437 = build_qualified_type (type,
|
|
438 (TYPE_QUALS (type)
|
|
439 | ENCODE_QUAL_ADDR_SPACE (ADDR_SPACE_LDS)));
|
|
440
|
|
441 if (machfun->reduc_decls
|
|
442 && offset < machfun->reduc_decls->length ()
|
|
443 && (existing_decl = (*machfun->reduc_decls)[offset]))
|
|
444 {
|
|
445 gcc_assert (TREE_TYPE (existing_decl) == var_type);
|
|
446 return existing_decl;
|
|
447 }
|
|
448 else
|
|
449 {
|
|
450 char name[50];
|
|
451 sprintf (name, ".oacc_reduction_%u", offset);
|
|
452 tree decl = create_tmp_var_raw (var_type, name);
|
|
453
|
|
454 DECL_CONTEXT (decl) = NULL_TREE;
|
|
455 TREE_STATIC (decl) = 1;
|
|
456
|
|
457 varpool_node::finalize_decl (decl);
|
|
458
|
|
459 vec_safe_grow_cleared (machfun->reduc_decls, offset + 1);
|
|
460 (*machfun->reduc_decls)[offset] = decl;
|
|
461
|
|
462 return decl;
|
|
463 }
|
|
464
|
|
465 return NULL_TREE;
|
|
466 }
|
|
467
|
|
468 /* Expand IFN_GOACC_REDUCTION_SETUP. */
|
|
469
|
|
470 static void
|
|
471 gcn_goacc_reduction_setup (gcall *call)
|
|
472 {
|
|
473 gimple_stmt_iterator gsi = gsi_for_stmt (call);
|
|
474 tree lhs = gimple_call_lhs (call);
|
|
475 tree var = gimple_call_arg (call, 2);
|
|
476 int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3));
|
|
477 gimple_seq seq = NULL;
|
|
478
|
|
479 push_gimplify_context (true);
|
|
480
|
|
481 if (level != GOMP_DIM_GANG)
|
|
482 {
|
|
483 /* Copy the receiver object. */
|
|
484 tree ref_to_res = gimple_call_arg (call, 1);
|
|
485
|
|
486 if (!integer_zerop (ref_to_res))
|
|
487 var = build_simple_mem_ref (ref_to_res);
|
|
488 }
|
|
489
|
|
490 if (level == GOMP_DIM_WORKER)
|
|
491 {
|
|
492 tree var_type = TREE_TYPE (var);
|
|
493 /* Store incoming value to worker reduction buffer. */
|
|
494 tree offset = gimple_call_arg (call, 5);
|
|
495 tree decl
|
|
496 = gcn_goacc_get_worker_red_decl (var_type, TREE_INT_CST_LOW (offset));
|
|
497
|
|
498 gimplify_assign (decl, var, &seq);
|
|
499 }
|
|
500
|
|
501 if (lhs)
|
|
502 gimplify_assign (lhs, var, &seq);
|
|
503
|
|
504 pop_gimplify_context (NULL);
|
|
505 gsi_replace_with_seq (&gsi, seq, true);
|
|
506 }
|
|
507
|
|
508 /* Expand IFN_GOACC_REDUCTION_INIT. */
|
|
509
|
|
510 static void
|
|
511 gcn_goacc_reduction_init (gcall *call)
|
|
512 {
|
|
513 gimple_stmt_iterator gsi = gsi_for_stmt (call);
|
|
514 tree lhs = gimple_call_lhs (call);
|
|
515 tree var = gimple_call_arg (call, 2);
|
|
516 int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3));
|
|
517 enum tree_code rcode
|
|
518 = (enum tree_code) TREE_INT_CST_LOW (gimple_call_arg (call, 4));
|
|
519 tree init = omp_reduction_init_op (gimple_location (call), rcode,
|
|
520 TREE_TYPE (var));
|
|
521 gimple_seq seq = NULL;
|
|
522
|
|
523 push_gimplify_context (true);
|
|
524
|
|
525 if (level == GOMP_DIM_GANG)
|
|
526 {
|
|
527 /* If there's no receiver object, propagate the incoming VAR. */
|
|
528 tree ref_to_res = gimple_call_arg (call, 1);
|
|
529 if (integer_zerop (ref_to_res))
|
|
530 init = var;
|
|
531 }
|
|
532
|
|
533 if (lhs)
|
|
534 gimplify_assign (lhs, init, &seq);
|
|
535
|
|
536 pop_gimplify_context (NULL);
|
|
537 gsi_replace_with_seq (&gsi, seq, true);
|
|
538 }
|
|
539
|
|
540 /* Expand IFN_GOACC_REDUCTION_FINI. */
|
|
541
|
|
542 static void
|
|
543 gcn_goacc_reduction_fini (gcall *call)
|
|
544 {
|
|
545 gimple_stmt_iterator gsi = gsi_for_stmt (call);
|
|
546 tree lhs = gimple_call_lhs (call);
|
|
547 tree ref_to_res = gimple_call_arg (call, 1);
|
|
548 tree var = gimple_call_arg (call, 2);
|
|
549 int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3));
|
|
550 enum tree_code op
|
|
551 = (enum tree_code) TREE_INT_CST_LOW (gimple_call_arg (call, 4));
|
|
552 gimple_seq seq = NULL;
|
|
553 tree r = NULL_TREE;;
|
|
554
|
|
555 push_gimplify_context (true);
|
|
556
|
|
557 tree accum = NULL_TREE;
|
|
558
|
|
559 if (level == GOMP_DIM_WORKER)
|
|
560 {
|
|
561 tree var_type = TREE_TYPE (var);
|
|
562 tree offset = gimple_call_arg (call, 5);
|
|
563 tree decl
|
|
564 = gcn_goacc_get_worker_red_decl (var_type, TREE_INT_CST_LOW (offset));
|
|
565
|
|
566 accum = build_fold_addr_expr (decl);
|
|
567 }
|
|
568 else if (integer_zerop (ref_to_res))
|
|
569 r = var;
|
|
570 else
|
|
571 accum = ref_to_res;
|
|
572
|
|
573 if (accum)
|
|
574 {
|
|
575 /* UPDATE the accumulator. */
|
|
576 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
|
|
577 seq = NULL;
|
|
578 r = gcn_reduction_update (gimple_location (call), &gsi, accum, var, op);
|
|
579 }
|
|
580
|
|
581 if (lhs)
|
|
582 gimplify_assign (lhs, r, &seq);
|
|
583 pop_gimplify_context (NULL);
|
|
584
|
|
585 gsi_replace_with_seq (&gsi, seq, true);
|
|
586 }
|
|
587
|
|
588 /* Expand IFN_GOACC_REDUCTION_TEARDOWN. */
|
|
589
|
|
590 static void
|
|
591 gcn_goacc_reduction_teardown (gcall *call)
|
|
592 {
|
|
593 gimple_stmt_iterator gsi = gsi_for_stmt (call);
|
|
594 tree lhs = gimple_call_lhs (call);
|
|
595 tree var = gimple_call_arg (call, 2);
|
|
596 int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3));
|
|
597 gimple_seq seq = NULL;
|
|
598
|
|
599 push_gimplify_context (true);
|
|
600
|
|
601 if (level == GOMP_DIM_WORKER)
|
|
602 {
|
|
603 tree var_type = TREE_TYPE (var);
|
|
604
|
|
605 /* Read the worker reduction buffer. */
|
|
606 tree offset = gimple_call_arg (call, 5);
|
|
607 tree decl
|
|
608 = gcn_goacc_get_worker_red_decl (var_type, TREE_INT_CST_LOW (offset));
|
|
609 var = decl;
|
|
610 }
|
|
611
|
|
612 if (level != GOMP_DIM_GANG)
|
|
613 {
|
|
614 /* Write to the receiver object. */
|
|
615 tree ref_to_res = gimple_call_arg (call, 1);
|
|
616
|
|
617 if (!integer_zerop (ref_to_res))
|
|
618 gimplify_assign (build_simple_mem_ref (ref_to_res), var, &seq);
|
|
619 }
|
|
620
|
|
621 if (lhs)
|
|
622 gimplify_assign (lhs, var, &seq);
|
|
623
|
|
624 pop_gimplify_context (NULL);
|
|
625
|
|
626 gsi_replace_with_seq (&gsi, seq, true);
|
|
627 }
|
|
628
|
|
629 /* Implement TARGET_GOACC_REDUCTION.
|
|
630
|
|
631 Expand calls to the GOACC REDUCTION internal function, into a sequence of
|
|
632 gimple instructions. */
|
|
633
|
|
634 void
|
|
635 gcn_goacc_reduction (gcall *call)
|
|
636 {
|
|
637 int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3));
|
|
638
|
|
639 if (level == GOMP_DIM_VECTOR)
|
|
640 {
|
|
641 default_goacc_reduction (call);
|
|
642 return;
|
|
643 }
|
|
644
|
|
645 unsigned code = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 0));
|
|
646
|
|
647 switch (code)
|
|
648 {
|
|
649 case IFN_GOACC_REDUCTION_SETUP:
|
|
650 gcn_goacc_reduction_setup (call);
|
|
651 break;
|
|
652
|
|
653 case IFN_GOACC_REDUCTION_INIT:
|
|
654 gcn_goacc_reduction_init (call);
|
|
655 break;
|
|
656
|
|
657 case IFN_GOACC_REDUCTION_FINI:
|
|
658 gcn_goacc_reduction_fini (call);
|
|
659 break;
|
|
660
|
|
661 case IFN_GOACC_REDUCTION_TEARDOWN:
|
|
662 gcn_goacc_reduction_teardown (call);
|
|
663 break;
|
|
664
|
|
665 default:
|
|
666 gcc_unreachable ();
|
|
667 }
|
|
668 }
|
|
669
|
|
670 /* Implement TARGET_GOACC_ADJUST_PROPAGATION_RECORD.
|
|
671
|
|
672 Tweak (worker) propagation record, e.g. to put it in shared memory. */
|
|
673
|
|
674 tree
|
|
675 gcn_goacc_adjust_propagation_record (tree record_type, bool sender,
|
|
676 const char *name)
|
|
677 {
|
|
678 tree type = record_type;
|
|
679
|
|
680 TYPE_ADDR_SPACE (type) = ADDR_SPACE_LDS;
|
|
681
|
|
682 if (!sender)
|
|
683 type = build_pointer_type (type);
|
|
684
|
|
685 tree decl = create_tmp_var_raw (type, name);
|
|
686
|
|
687 if (sender)
|
|
688 {
|
|
689 DECL_CONTEXT (decl) = NULL_TREE;
|
|
690 TREE_STATIC (decl) = 1;
|
|
691 }
|
|
692
|
|
693 if (sender)
|
|
694 varpool_node::finalize_decl (decl);
|
|
695
|
|
696 return decl;
|
|
697 }
|
|
698
|
|
699 void
|
|
700 gcn_goacc_adjust_gangprivate_decl (tree var)
|
|
701 {
|
|
702 tree type = TREE_TYPE (var);
|
|
703 tree lds_type = build_qualified_type (type,
|
|
704 TYPE_QUALS_NO_ADDR_SPACE (type)
|
|
705 | ENCODE_QUAL_ADDR_SPACE (ADDR_SPACE_LDS));
|
|
706 machine_function *machfun = cfun->machine;
|
|
707
|
|
708 TREE_TYPE (var) = lds_type;
|
|
709 TREE_STATIC (var) = 1;
|
|
710
|
|
711 /* We're making VAR static. We have to mangle the name to avoid collisions
|
|
712 between different local variables that share the same names. */
|
|
713 lhd_set_decl_assembler_name (var);
|
|
714
|
|
715 varpool_node::finalize_decl (var);
|
|
716
|
|
717 if (machfun)
|
|
718 machfun->use_flat_addressing = true;
|
|
719 }
|
|
720
|
|
721 /* }}} */
|