111
|
1 /* Bits of OpenMP and OpenACC handling that is specific to device offloading
|
|
2 and a lowering pass for OpenACC device directives.
|
|
3
|
145
|
4 Copyright (C) 2005-2020 Free Software Foundation, Inc.
|
111
|
5
|
|
6 This file is part of GCC.
|
|
7
|
|
8 GCC is free software; you can redistribute it and/or modify it under
|
|
9 the terms of the GNU General Public License as published by the Free
|
|
10 Software Foundation; either version 3, or (at your option) any later
|
|
11 version.
|
|
12
|
|
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
16 for more details.
|
|
17
|
|
18 You should have received a copy of the GNU General Public License
|
|
19 along with GCC; see the file COPYING3. If not see
|
|
20 <http://www.gnu.org/licenses/>. */
|
|
21
|
|
22 #include "config.h"
|
|
23 #include "system.h"
|
|
24 #include "coretypes.h"
|
|
25 #include "backend.h"
|
|
26 #include "target.h"
|
|
27 #include "tree.h"
|
|
28 #include "gimple.h"
|
|
29 #include "tree-pass.h"
|
|
30 #include "ssa.h"
|
|
31 #include "cgraph.h"
|
|
32 #include "pretty-print.h"
|
|
33 #include "diagnostic-core.h"
|
|
34 #include "fold-const.h"
|
|
35 #include "internal-fn.h"
|
|
36 #include "langhooks.h"
|
|
37 #include "gimplify.h"
|
|
38 #include "gimple-iterator.h"
|
|
39 #include "gimplify-me.h"
|
|
40 #include "gimple-walk.h"
|
|
41 #include "tree-cfg.h"
|
|
42 #include "tree-into-ssa.h"
|
|
43 #include "tree-nested.h"
|
|
44 #include "stor-layout.h"
|
|
45 #include "common/common-target.h"
|
|
46 #include "omp-general.h"
|
|
47 #include "omp-offload.h"
|
|
48 #include "lto-section-names.h"
|
|
49 #include "gomp-constants.h"
|
|
50 #include "gimple-pretty-print.h"
|
|
51 #include "intl.h"
|
|
52 #include "stringpool.h"
|
|
53 #include "attribs.h"
|
|
54 #include "cfgloop.h"
|
|
55
|
|
56 /* Describe the OpenACC looping structure of a function. The entire
|
|
57 function is held in a 'NULL' loop. */
|
|
58
|
|
59 struct oacc_loop
|
|
60 {
|
|
61 oacc_loop *parent; /* Containing loop. */
|
|
62
|
|
63 oacc_loop *child; /* First inner loop. */
|
|
64
|
|
65 oacc_loop *sibling; /* Next loop within same parent. */
|
|
66
|
|
67 location_t loc; /* Location of the loop start. */
|
|
68
|
|
69 gcall *marker; /* Initial head marker. */
|
|
70
|
|
71 gcall *heads[GOMP_DIM_MAX]; /* Head marker functions. */
|
|
72 gcall *tails[GOMP_DIM_MAX]; /* Tail marker functions. */
|
|
73
|
|
74 tree routine; /* Pseudo-loop enclosing a routine. */
|
|
75
|
|
76 unsigned mask; /* Partitioning mask. */
|
|
77 unsigned e_mask; /* Partitioning of element loops (when tiling). */
|
|
78 unsigned inner; /* Partitioning of inner loops. */
|
|
79 unsigned flags; /* Partitioning flags. */
|
|
80 vec<gcall *> ifns; /* Contained loop abstraction functions. */
|
|
81 tree chunk_size; /* Chunk size. */
|
|
82 gcall *head_end; /* Final marker of head sequence. */
|
|
83 };
|
|
84
|
|
85 /* Holds offload tables with decls. */
|
|
86 vec<tree, va_gc> *offload_funcs, *offload_vars;
|
|
87
|
|
88 /* Return level at which oacc routine may spawn a partitioned loop, or
|
|
89 -1 if it is not a routine (i.e. is an offload fn). */
|
|
90
|
145
|
91 int
|
111
|
92 oacc_fn_attrib_level (tree attr)
|
|
93 {
|
|
94 tree pos = TREE_VALUE (attr);
|
|
95
|
|
96 if (!TREE_PURPOSE (pos))
|
|
97 return -1;
|
|
98
|
|
99 int ix = 0;
|
|
100 for (ix = 0; ix != GOMP_DIM_MAX;
|
|
101 ix++, pos = TREE_CHAIN (pos))
|
|
102 if (!integer_zerop (TREE_PURPOSE (pos)))
|
|
103 break;
|
|
104
|
|
105 return ix;
|
|
106 }
|
|
107
|
|
108 /* Helper function for omp_finish_file routine. Takes decls from V_DECLS and
|
|
109 adds their addresses and sizes to constructor-vector V_CTOR. */
|
|
110
|
|
111 static void
|
|
112 add_decls_addresses_to_decl_constructor (vec<tree, va_gc> *v_decls,
|
|
113 vec<constructor_elt, va_gc> *v_ctor)
|
|
114 {
|
|
115 unsigned len = vec_safe_length (v_decls);
|
|
116 for (unsigned i = 0; i < len; i++)
|
|
117 {
|
|
118 tree it = (*v_decls)[i];
|
|
119 bool is_var = VAR_P (it);
|
|
120 bool is_link_var
|
|
121 = is_var
|
|
122 #ifdef ACCEL_COMPILER
|
|
123 && DECL_HAS_VALUE_EXPR_P (it)
|
|
124 #endif
|
|
125 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (it));
|
|
126
|
|
127 tree size = NULL_TREE;
|
|
128 if (is_var)
|
|
129 size = fold_convert (const_ptr_type_node, DECL_SIZE_UNIT (it));
|
|
130
|
|
131 tree addr;
|
|
132 if (!is_link_var)
|
|
133 addr = build_fold_addr_expr (it);
|
|
134 else
|
|
135 {
|
|
136 #ifdef ACCEL_COMPILER
|
|
137 /* For "omp declare target link" vars add address of the pointer to
|
|
138 the target table, instead of address of the var. */
|
|
139 tree value_expr = DECL_VALUE_EXPR (it);
|
|
140 tree link_ptr_decl = TREE_OPERAND (value_expr, 0);
|
|
141 varpool_node::finalize_decl (link_ptr_decl);
|
|
142 addr = build_fold_addr_expr (link_ptr_decl);
|
|
143 #else
|
|
144 addr = build_fold_addr_expr (it);
|
|
145 #endif
|
|
146
|
|
147 /* Most significant bit of the size marks "omp declare target link"
|
|
148 vars in host and target tables. */
|
|
149 unsigned HOST_WIDE_INT isize = tree_to_uhwi (size);
|
|
150 isize |= 1ULL << (int_size_in_bytes (const_ptr_type_node)
|
|
151 * BITS_PER_UNIT - 1);
|
|
152 size = wide_int_to_tree (const_ptr_type_node, isize);
|
|
153 }
|
|
154
|
|
155 CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, addr);
|
|
156 if (is_var)
|
|
157 CONSTRUCTOR_APPEND_ELT (v_ctor, NULL_TREE, size);
|
|
158 }
|
|
159 }
|
|
160
|
|
161 /* Create new symbols containing (address, size) pairs for global variables,
|
|
162 marked with "omp declare target" attribute, as well as addresses for the
|
|
163 functions, which are outlined offloading regions. */
|
|
164 void
|
|
165 omp_finish_file (void)
|
|
166 {
|
|
167 unsigned num_funcs = vec_safe_length (offload_funcs);
|
|
168 unsigned num_vars = vec_safe_length (offload_vars);
|
|
169
|
|
170 if (num_funcs == 0 && num_vars == 0)
|
|
171 return;
|
|
172
|
|
173 if (targetm_common.have_named_sections)
|
|
174 {
|
|
175 vec<constructor_elt, va_gc> *v_f, *v_v;
|
|
176 vec_alloc (v_f, num_funcs);
|
|
177 vec_alloc (v_v, num_vars * 2);
|
|
178
|
|
179 add_decls_addresses_to_decl_constructor (offload_funcs, v_f);
|
|
180 add_decls_addresses_to_decl_constructor (offload_vars, v_v);
|
|
181
|
|
182 tree vars_decl_type = build_array_type_nelts (pointer_sized_int_node,
|
|
183 num_vars * 2);
|
|
184 tree funcs_decl_type = build_array_type_nelts (pointer_sized_int_node,
|
|
185 num_funcs);
|
|
186 SET_TYPE_ALIGN (vars_decl_type, TYPE_ALIGN (pointer_sized_int_node));
|
|
187 SET_TYPE_ALIGN (funcs_decl_type, TYPE_ALIGN (pointer_sized_int_node));
|
|
188 tree ctor_v = build_constructor (vars_decl_type, v_v);
|
|
189 tree ctor_f = build_constructor (funcs_decl_type, v_f);
|
|
190 TREE_CONSTANT (ctor_v) = TREE_CONSTANT (ctor_f) = 1;
|
|
191 TREE_STATIC (ctor_v) = TREE_STATIC (ctor_f) = 1;
|
|
192 tree funcs_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
|
|
193 get_identifier (".offload_func_table"),
|
|
194 funcs_decl_type);
|
|
195 tree vars_decl = build_decl (UNKNOWN_LOCATION, VAR_DECL,
|
|
196 get_identifier (".offload_var_table"),
|
|
197 vars_decl_type);
|
|
198 TREE_STATIC (funcs_decl) = TREE_STATIC (vars_decl) = 1;
|
|
199 /* Do not align tables more than TYPE_ALIGN (pointer_sized_int_node),
|
|
200 otherwise a joint table in a binary will contain padding between
|
|
201 tables from multiple object files. */
|
|
202 DECL_USER_ALIGN (funcs_decl) = DECL_USER_ALIGN (vars_decl) = 1;
|
|
203 SET_DECL_ALIGN (funcs_decl, TYPE_ALIGN (funcs_decl_type));
|
|
204 SET_DECL_ALIGN (vars_decl, TYPE_ALIGN (vars_decl_type));
|
|
205 DECL_INITIAL (funcs_decl) = ctor_f;
|
|
206 DECL_INITIAL (vars_decl) = ctor_v;
|
|
207 set_decl_section_name (funcs_decl, OFFLOAD_FUNC_TABLE_SECTION_NAME);
|
|
208 set_decl_section_name (vars_decl, OFFLOAD_VAR_TABLE_SECTION_NAME);
|
|
209
|
|
210 varpool_node::finalize_decl (vars_decl);
|
|
211 varpool_node::finalize_decl (funcs_decl);
|
|
212 }
|
|
213 else
|
|
214 {
|
|
215 for (unsigned i = 0; i < num_funcs; i++)
|
|
216 {
|
|
217 tree it = (*offload_funcs)[i];
|
|
218 targetm.record_offload_symbol (it);
|
|
219 }
|
|
220 for (unsigned i = 0; i < num_vars; i++)
|
|
221 {
|
|
222 tree it = (*offload_vars)[i];
|
|
223 targetm.record_offload_symbol (it);
|
|
224 }
|
|
225 }
|
|
226 }
|
|
227
|
|
228 /* Call dim_pos (POS == true) or dim_size (POS == false) builtins for
|
|
229 axis DIM. Return a tmp var holding the result. */
|
|
230
|
|
231 static tree
|
|
232 oacc_dim_call (bool pos, int dim, gimple_seq *seq)
|
|
233 {
|
|
234 tree arg = build_int_cst (unsigned_type_node, dim);
|
|
235 tree size = create_tmp_var (integer_type_node);
|
|
236 enum internal_fn fn = pos ? IFN_GOACC_DIM_POS : IFN_GOACC_DIM_SIZE;
|
|
237 gimple *call = gimple_build_call_internal (fn, 1, arg);
|
|
238
|
|
239 gimple_call_set_lhs (call, size);
|
|
240 gimple_seq_add_stmt (seq, call);
|
|
241
|
|
242 return size;
|
|
243 }
|
|
244
|
|
245 /* Find the number of threads (POS = false), or thread number (POS =
|
|
246 true) for an OpenACC region partitioned as MASK. Setup code
|
|
247 required for the calculation is added to SEQ. */
|
|
248
|
|
249 static tree
|
|
250 oacc_thread_numbers (bool pos, int mask, gimple_seq *seq)
|
|
251 {
|
|
252 tree res = pos ? NULL_TREE : build_int_cst (unsigned_type_node, 1);
|
|
253 unsigned ix;
|
|
254
|
|
255 /* Start at gang level, and examine relevant dimension indices. */
|
|
256 for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
|
|
257 if (GOMP_DIM_MASK (ix) & mask)
|
|
258 {
|
|
259 if (res)
|
|
260 {
|
|
261 /* We had an outer index, so scale that by the size of
|
|
262 this dimension. */
|
|
263 tree n = oacc_dim_call (false, ix, seq);
|
|
264 res = fold_build2 (MULT_EXPR, integer_type_node, res, n);
|
|
265 }
|
|
266 if (pos)
|
|
267 {
|
|
268 /* Determine index in this dimension. */
|
|
269 tree id = oacc_dim_call (true, ix, seq);
|
|
270 if (res)
|
|
271 res = fold_build2 (PLUS_EXPR, integer_type_node, res, id);
|
|
272 else
|
|
273 res = id;
|
|
274 }
|
|
275 }
|
|
276
|
|
277 if (res == NULL_TREE)
|
|
278 res = integer_zero_node;
|
|
279
|
|
280 return res;
|
|
281 }
|
|
282
|
|
283 /* Transform IFN_GOACC_LOOP calls to actual code. See
|
|
284 expand_oacc_for for where these are generated. At the vector
|
|
285 level, we stride loops, such that each member of a warp will
|
|
286 operate on adjacent iterations. At the worker and gang level,
|
|
287 each gang/warp executes a set of contiguous iterations. Chunking
|
|
288 can override this such that each iteration engine executes a
|
|
289 contiguous chunk, and then moves on to stride to the next chunk. */
|
|
290
|
|
291 static void
|
|
292 oacc_xform_loop (gcall *call)
|
|
293 {
|
|
294 gimple_stmt_iterator gsi = gsi_for_stmt (call);
|
|
295 enum ifn_goacc_loop_kind code
|
|
296 = (enum ifn_goacc_loop_kind) TREE_INT_CST_LOW (gimple_call_arg (call, 0));
|
|
297 tree dir = gimple_call_arg (call, 1);
|
|
298 tree range = gimple_call_arg (call, 2);
|
|
299 tree step = gimple_call_arg (call, 3);
|
|
300 tree chunk_size = NULL_TREE;
|
|
301 unsigned mask = (unsigned) TREE_INT_CST_LOW (gimple_call_arg (call, 5));
|
|
302 tree lhs = gimple_call_lhs (call);
|
145
|
303 tree type = NULL_TREE;
|
111
|
304 tree diff_type = TREE_TYPE (range);
|
|
305 tree r = NULL_TREE;
|
|
306 gimple_seq seq = NULL;
|
|
307 bool chunking = false, striding = true;
|
|
308 unsigned outer_mask = mask & (~mask + 1); // Outermost partitioning
|
|
309 unsigned inner_mask = mask & ~outer_mask; // Inner partitioning (if any)
|
|
310
|
145
|
311 /* Skip lowering if return value of IFN_GOACC_LOOP call is not used. */
|
|
312 if (!lhs)
|
|
313 {
|
|
314 gsi_replace_with_seq (&gsi, seq, true);
|
|
315 return;
|
|
316 }
|
|
317
|
|
318 type = TREE_TYPE (lhs);
|
|
319
|
111
|
320 #ifdef ACCEL_COMPILER
|
|
321 chunk_size = gimple_call_arg (call, 4);
|
|
322 if (integer_minus_onep (chunk_size) /* Force static allocation. */
|
|
323 || integer_zerop (chunk_size)) /* Default (also static). */
|
|
324 {
|
|
325 /* If we're at the gang level, we want each to execute a
|
|
326 contiguous run of iterations. Otherwise we want each element
|
|
327 to stride. */
|
|
328 striding = !(outer_mask & GOMP_DIM_MASK (GOMP_DIM_GANG));
|
|
329 chunking = false;
|
|
330 }
|
|
331 else
|
|
332 {
|
|
333 /* Chunk of size 1 is striding. */
|
|
334 striding = integer_onep (chunk_size);
|
|
335 chunking = !striding;
|
|
336 }
|
|
337 #endif
|
|
338
|
|
339 /* striding=true, chunking=true
|
|
340 -> invalid.
|
|
341 striding=true, chunking=false
|
|
342 -> chunks=1
|
|
343 striding=false,chunking=true
|
|
344 -> chunks=ceil (range/(chunksize*threads*step))
|
|
345 striding=false,chunking=false
|
|
346 -> chunk_size=ceil(range/(threads*step)),chunks=1 */
|
|
347 push_gimplify_context (true);
|
|
348
|
|
349 switch (code)
|
|
350 {
|
|
351 default: gcc_unreachable ();
|
|
352
|
|
353 case IFN_GOACC_LOOP_CHUNKS:
|
|
354 if (!chunking)
|
|
355 r = build_int_cst (type, 1);
|
|
356 else
|
|
357 {
|
|
358 /* chunk_max
|
|
359 = (range - dir) / (chunks * step * num_threads) + dir */
|
|
360 tree per = oacc_thread_numbers (false, mask, &seq);
|
|
361 per = fold_convert (type, per);
|
|
362 chunk_size = fold_convert (type, chunk_size);
|
|
363 per = fold_build2 (MULT_EXPR, type, per, chunk_size);
|
|
364 per = fold_build2 (MULT_EXPR, type, per, step);
|
|
365 r = build2 (MINUS_EXPR, type, range, dir);
|
|
366 r = build2 (PLUS_EXPR, type, r, per);
|
|
367 r = build2 (TRUNC_DIV_EXPR, type, r, per);
|
|
368 }
|
|
369 break;
|
|
370
|
|
371 case IFN_GOACC_LOOP_STEP:
|
|
372 {
|
|
373 /* If striding, step by the entire compute volume, otherwise
|
|
374 step by the inner volume. */
|
|
375 unsigned volume = striding ? mask : inner_mask;
|
|
376
|
|
377 r = oacc_thread_numbers (false, volume, &seq);
|
|
378 r = build2 (MULT_EXPR, type, fold_convert (type, r), step);
|
|
379 }
|
|
380 break;
|
|
381
|
|
382 case IFN_GOACC_LOOP_OFFSET:
|
|
383 /* Enable vectorization on non-SIMT targets. */
|
|
384 if (!targetm.simt.vf
|
|
385 && outer_mask == GOMP_DIM_MASK (GOMP_DIM_VECTOR)
|
|
386 /* If not -fno-tree-loop-vectorize, hint that we want to vectorize
|
|
387 the loop. */
|
|
388 && (flag_tree_loop_vectorize
|
|
389 || !global_options_set.x_flag_tree_loop_vectorize))
|
|
390 {
|
|
391 basic_block bb = gsi_bb (gsi);
|
145
|
392 class loop *parent = bb->loop_father;
|
|
393 class loop *body = parent->inner;
|
111
|
394
|
|
395 parent->force_vectorize = true;
|
|
396 parent->safelen = INT_MAX;
|
|
397
|
|
398 /* "Chunking loops" may have inner loops. */
|
|
399 if (parent->inner)
|
|
400 {
|
|
401 body->force_vectorize = true;
|
|
402 body->safelen = INT_MAX;
|
|
403 }
|
|
404
|
|
405 cfun->has_force_vectorize_loops = true;
|
|
406 }
|
|
407 if (striding)
|
|
408 {
|
|
409 r = oacc_thread_numbers (true, mask, &seq);
|
|
410 r = fold_convert (diff_type, r);
|
|
411 }
|
|
412 else
|
|
413 {
|
|
414 tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
|
|
415 tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
|
|
416 tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
|
|
417 inner_size, outer_size);
|
|
418
|
|
419 volume = fold_convert (diff_type, volume);
|
|
420 if (chunking)
|
|
421 chunk_size = fold_convert (diff_type, chunk_size);
|
|
422 else
|
|
423 {
|
|
424 tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
|
|
425
|
|
426 chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
|
|
427 chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
|
|
428 chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
|
|
429 }
|
|
430
|
|
431 tree span = build2 (MULT_EXPR, diff_type, chunk_size,
|
|
432 fold_convert (diff_type, inner_size));
|
|
433 r = oacc_thread_numbers (true, outer_mask, &seq);
|
|
434 r = fold_convert (diff_type, r);
|
|
435 r = build2 (MULT_EXPR, diff_type, r, span);
|
|
436
|
|
437 tree inner = oacc_thread_numbers (true, inner_mask, &seq);
|
|
438 inner = fold_convert (diff_type, inner);
|
|
439 r = fold_build2 (PLUS_EXPR, diff_type, r, inner);
|
|
440
|
|
441 if (chunking)
|
|
442 {
|
|
443 tree chunk = fold_convert (diff_type, gimple_call_arg (call, 6));
|
|
444 tree per
|
|
445 = fold_build2 (MULT_EXPR, diff_type, volume, chunk_size);
|
|
446 per = build2 (MULT_EXPR, diff_type, per, chunk);
|
|
447
|
|
448 r = build2 (PLUS_EXPR, diff_type, r, per);
|
|
449 }
|
|
450 }
|
|
451 r = fold_build2 (MULT_EXPR, diff_type, r, step);
|
|
452 if (type != diff_type)
|
|
453 r = fold_convert (type, r);
|
|
454 break;
|
|
455
|
|
456 case IFN_GOACC_LOOP_BOUND:
|
|
457 if (striding)
|
|
458 r = range;
|
|
459 else
|
|
460 {
|
|
461 tree inner_size = oacc_thread_numbers (false, inner_mask, &seq);
|
|
462 tree outer_size = oacc_thread_numbers (false, outer_mask, &seq);
|
|
463 tree volume = fold_build2 (MULT_EXPR, TREE_TYPE (inner_size),
|
|
464 inner_size, outer_size);
|
|
465
|
|
466 volume = fold_convert (diff_type, volume);
|
|
467 if (chunking)
|
|
468 chunk_size = fold_convert (diff_type, chunk_size);
|
|
469 else
|
|
470 {
|
|
471 tree per = fold_build2 (MULT_EXPR, diff_type, volume, step);
|
|
472
|
|
473 chunk_size = build2 (MINUS_EXPR, diff_type, range, dir);
|
|
474 chunk_size = build2 (PLUS_EXPR, diff_type, chunk_size, per);
|
|
475 chunk_size = build2 (TRUNC_DIV_EXPR, diff_type, chunk_size, per);
|
|
476 }
|
|
477
|
|
478 tree span = build2 (MULT_EXPR, diff_type, chunk_size,
|
|
479 fold_convert (diff_type, inner_size));
|
|
480
|
|
481 r = fold_build2 (MULT_EXPR, diff_type, span, step);
|
|
482
|
|
483 tree offset = gimple_call_arg (call, 6);
|
|
484 r = build2 (PLUS_EXPR, diff_type, r,
|
|
485 fold_convert (diff_type, offset));
|
|
486 r = build2 (integer_onep (dir) ? MIN_EXPR : MAX_EXPR,
|
|
487 diff_type, r, range);
|
|
488 }
|
|
489 if (diff_type != type)
|
|
490 r = fold_convert (type, r);
|
|
491 break;
|
|
492 }
|
|
493
|
|
494 gimplify_assign (lhs, r, &seq);
|
|
495
|
|
496 pop_gimplify_context (NULL);
|
|
497
|
|
498 gsi_replace_with_seq (&gsi, seq, true);
|
|
499 }
|
|
500
|
|
501 /* Transform a GOACC_TILE call. Determines the element loop span for
|
|
502 the specified loop of the nest. This is 1 if we're not tiling.
|
|
503
|
|
504 GOACC_TILE (collapse_count, loop_no, tile_arg, gwv_tile, gwv_element); */
|
|
505
|
|
506 static void
|
|
507 oacc_xform_tile (gcall *call)
|
|
508 {
|
|
509 gimple_stmt_iterator gsi = gsi_for_stmt (call);
|
|
510 unsigned collapse = tree_to_uhwi (gimple_call_arg (call, 0));
|
|
511 /* Inner loops have higher loop_nos. */
|
|
512 unsigned loop_no = tree_to_uhwi (gimple_call_arg (call, 1));
|
|
513 tree tile_size = gimple_call_arg (call, 2);
|
|
514 unsigned e_mask = tree_to_uhwi (gimple_call_arg (call, 4));
|
|
515 tree lhs = gimple_call_lhs (call);
|
|
516 tree type = TREE_TYPE (lhs);
|
|
517 gimple_seq seq = NULL;
|
|
518 tree span = build_int_cst (type, 1);
|
|
519
|
|
520 gcc_assert (!(e_mask
|
|
521 & ~(GOMP_DIM_MASK (GOMP_DIM_VECTOR)
|
|
522 | GOMP_DIM_MASK (GOMP_DIM_WORKER))));
|
|
523 push_gimplify_context (!seen_error ());
|
|
524
|
|
525 #ifndef ACCEL_COMPILER
|
|
526 /* Partitioning disabled on host compilers. */
|
|
527 e_mask = 0;
|
|
528 #endif
|
|
529 if (!e_mask)
|
|
530 /* Not paritioning. */
|
|
531 span = integer_one_node;
|
|
532 else if (!integer_zerop (tile_size))
|
|
533 /* User explicitly specified size. */
|
|
534 span = tile_size;
|
|
535 else
|
|
536 {
|
|
537 /* Pick a size based on the paritioning of the element loop and
|
|
538 the number of loop nests. */
|
|
539 tree first_size = NULL_TREE;
|
|
540 tree second_size = NULL_TREE;
|
|
541
|
|
542 if (e_mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR))
|
|
543 first_size = oacc_dim_call (false, GOMP_DIM_VECTOR, &seq);
|
|
544 if (e_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER))
|
|
545 second_size = oacc_dim_call (false, GOMP_DIM_WORKER, &seq);
|
|
546
|
|
547 if (!first_size)
|
|
548 {
|
|
549 first_size = second_size;
|
|
550 second_size = NULL_TREE;
|
|
551 }
|
|
552
|
|
553 if (loop_no + 1 == collapse)
|
|
554 {
|
|
555 span = first_size;
|
|
556 if (!loop_no && second_size)
|
|
557 span = fold_build2 (MULT_EXPR, TREE_TYPE (span),
|
|
558 span, second_size);
|
|
559 }
|
|
560 else if (loop_no + 2 == collapse)
|
|
561 span = second_size;
|
|
562 else
|
|
563 span = NULL_TREE;
|
|
564
|
|
565 if (!span)
|
|
566 /* There's no obvious element size for this loop. Options
|
|
567 are 1, first_size or some non-unity constant (32 is my
|
|
568 favourite). We should gather some statistics. */
|
|
569 span = first_size;
|
|
570 }
|
|
571
|
|
572 span = fold_convert (type, span);
|
|
573 gimplify_assign (lhs, span, &seq);
|
|
574
|
|
575 pop_gimplify_context (NULL);
|
|
576
|
|
577 gsi_replace_with_seq (&gsi, seq, true);
|
|
578 }
|
|
579
|
|
580 /* Default partitioned and minimum partitioned dimensions. */
|
|
581
|
|
582 static int oacc_default_dims[GOMP_DIM_MAX];
|
|
583 static int oacc_min_dims[GOMP_DIM_MAX];
|
|
584
|
145
|
585 int
|
|
586 oacc_get_default_dim (int dim)
|
|
587 {
|
|
588 gcc_assert (0 <= dim && dim < GOMP_DIM_MAX);
|
|
589 return oacc_default_dims[dim];
|
|
590 }
|
|
591
|
|
592 int
|
|
593 oacc_get_min_dim (int dim)
|
|
594 {
|
|
595 gcc_assert (0 <= dim && dim < GOMP_DIM_MAX);
|
|
596 return oacc_min_dims[dim];
|
|
597 }
|
|
598
|
111
|
599 /* Parse the default dimension parameter. This is a set of
|
|
600 :-separated optional compute dimensions. Each specified dimension
|
|
601 is a positive integer. When device type support is added, it is
|
|
602 planned to be a comma separated list of such compute dimensions,
|
|
603 with all but the first prefixed by the colon-terminated device
|
|
604 type. */
|
|
605
|
|
606 static void
|
|
607 oacc_parse_default_dims (const char *dims)
|
|
608 {
|
|
609 int ix;
|
|
610
|
|
611 for (ix = GOMP_DIM_MAX; ix--;)
|
|
612 {
|
|
613 oacc_default_dims[ix] = -1;
|
|
614 oacc_min_dims[ix] = 1;
|
|
615 }
|
|
616
|
|
617 #ifndef ACCEL_COMPILER
|
|
618 /* Cannot be overridden on the host. */
|
|
619 dims = NULL;
|
|
620 #endif
|
|
621 if (dims)
|
|
622 {
|
|
623 const char *pos = dims;
|
|
624
|
|
625 for (ix = 0; *pos && ix != GOMP_DIM_MAX; ix++)
|
|
626 {
|
|
627 if (ix)
|
|
628 {
|
|
629 if (*pos != ':')
|
|
630 goto malformed;
|
|
631 pos++;
|
|
632 }
|
|
633
|
|
634 if (*pos != ':')
|
|
635 {
|
|
636 long val;
|
|
637 const char *eptr;
|
|
638
|
|
639 errno = 0;
|
|
640 val = strtol (pos, CONST_CAST (char **, &eptr), 10);
|
|
641 if (errno || val <= 0 || (int) val != val)
|
|
642 goto malformed;
|
|
643 pos = eptr;
|
|
644 oacc_default_dims[ix] = (int) val;
|
|
645 }
|
|
646 }
|
|
647 if (*pos)
|
|
648 {
|
|
649 malformed:
|
|
650 error_at (UNKNOWN_LOCATION,
|
145
|
651 "%<-fopenacc-dim%> operand is malformed at %qs", pos);
|
111
|
652 }
|
|
653 }
|
|
654
|
|
655 /* Allow the backend to validate the dimensions. */
|
145
|
656 targetm.goacc.validate_dims (NULL_TREE, oacc_default_dims, -1, 0);
|
|
657 targetm.goacc.validate_dims (NULL_TREE, oacc_min_dims, -2, 0);
|
111
|
658 }
|
|
659
|
|
660 /* Validate and update the dimensions for offloaded FN. ATTRS is the
|
|
661 raw attribute. DIMS is an array of dimensions, which is filled in.
|
|
662 LEVEL is the partitioning level of a routine, or -1 for an offload
|
|
663 region itself. USED is the mask of partitioned execution in the
|
|
664 function. */
|
|
665
|
|
666 static void
|
|
667 oacc_validate_dims (tree fn, tree attrs, int *dims, int level, unsigned used)
|
|
668 {
|
|
669 tree purpose[GOMP_DIM_MAX];
|
|
670 unsigned ix;
|
|
671 tree pos = TREE_VALUE (attrs);
|
|
672
|
|
673 /* Make sure the attribute creator attached the dimension
|
|
674 information. */
|
|
675 gcc_assert (pos);
|
|
676
|
|
677 for (ix = 0; ix != GOMP_DIM_MAX; ix++)
|
|
678 {
|
|
679 purpose[ix] = TREE_PURPOSE (pos);
|
|
680 tree val = TREE_VALUE (pos);
|
|
681 dims[ix] = val ? TREE_INT_CST_LOW (val) : -1;
|
|
682 pos = TREE_CHAIN (pos);
|
|
683 }
|
|
684
|
145
|
685 bool changed = targetm.goacc.validate_dims (fn, dims, level, used);
|
111
|
686
|
|
687 /* Default anything left to 1 or a partitioned default. */
|
|
688 for (ix = 0; ix != GOMP_DIM_MAX; ix++)
|
|
689 if (dims[ix] < 0)
|
|
690 {
|
|
691 /* The OpenACC spec says 'If the [num_gangs] clause is not
|
|
692 specified, an implementation-defined default will be used;
|
|
693 the default may depend on the code within the construct.'
|
|
694 (2.5.6). Thus an implementation is free to choose
|
|
695 non-unity default for a parallel region that doesn't have
|
|
696 any gang-partitioned loops. However, it appears that there
|
|
697 is a sufficient body of user code that expects non-gang
|
|
698 partitioned regions to not execute in gang-redundant mode.
|
|
699 So we (a) don't warn about the non-portability and (b) pick
|
|
700 the minimum permissible dimension size when there is no
|
|
701 partitioned execution. Otherwise we pick the global
|
|
702 default for the dimension, which the user can control. The
|
|
703 same wording and logic applies to num_workers and
|
|
704 vector_length, however the worker- or vector- single
|
|
705 execution doesn't have the same impact as gang-redundant
|
|
706 execution. (If the minimum gang-level partioning is not 1,
|
|
707 the target is probably too confusing.) */
|
|
708 dims[ix] = (used & GOMP_DIM_MASK (ix)
|
|
709 ? oacc_default_dims[ix] : oacc_min_dims[ix]);
|
|
710 changed = true;
|
|
711 }
|
|
712
|
|
713 if (changed)
|
|
714 {
|
|
715 /* Replace the attribute with new values. */
|
|
716 pos = NULL_TREE;
|
|
717 for (ix = GOMP_DIM_MAX; ix--;)
|
|
718 pos = tree_cons (purpose[ix],
|
|
719 build_int_cst (integer_type_node, dims[ix]), pos);
|
|
720 oacc_replace_fn_attrib (fn, pos);
|
|
721 }
|
|
722 }
|
|
723
|
|
724 /* Create an empty OpenACC loop structure at LOC. */
|
|
725
|
|
726 static oacc_loop *
|
|
727 new_oacc_loop_raw (oacc_loop *parent, location_t loc)
|
|
728 {
|
|
729 oacc_loop *loop = XCNEW (oacc_loop);
|
|
730
|
|
731 loop->parent = parent;
|
|
732
|
|
733 if (parent)
|
|
734 {
|
|
735 loop->sibling = parent->child;
|
|
736 parent->child = loop;
|
|
737 }
|
|
738
|
|
739 loop->loc = loc;
|
|
740 return loop;
|
|
741 }
|
|
742
|
|
743 /* Create an outermost, dummy OpenACC loop for offloaded function
|
|
744 DECL. */
|
|
745
|
|
746 static oacc_loop *
|
|
747 new_oacc_loop_outer (tree decl)
|
|
748 {
|
|
749 return new_oacc_loop_raw (NULL, DECL_SOURCE_LOCATION (decl));
|
|
750 }
|
|
751
|
|
752 /* Start a new OpenACC loop structure beginning at head marker HEAD.
|
|
753 Link into PARENT loop. Return the new loop. */
|
|
754
|
|
755 static oacc_loop *
|
|
756 new_oacc_loop (oacc_loop *parent, gcall *marker)
|
|
757 {
|
|
758 oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (marker));
|
|
759
|
|
760 loop->marker = marker;
|
|
761
|
|
762 /* TODO: This is where device_type flattening would occur for the loop
|
|
763 flags. */
|
|
764
|
|
765 loop->flags = TREE_INT_CST_LOW (gimple_call_arg (marker, 3));
|
|
766
|
|
767 tree chunk_size = integer_zero_node;
|
|
768 if (loop->flags & OLF_GANG_STATIC)
|
|
769 chunk_size = gimple_call_arg (marker, 4);
|
|
770 loop->chunk_size = chunk_size;
|
|
771
|
|
772 return loop;
|
|
773 }
|
|
774
|
|
775 /* Create a dummy loop encompassing a call to a openACC routine.
|
|
776 Extract the routine's partitioning requirements. */
|
|
777
|
|
778 static void
|
|
779 new_oacc_loop_routine (oacc_loop *parent, gcall *call, tree decl, tree attrs)
|
|
780 {
|
|
781 oacc_loop *loop = new_oacc_loop_raw (parent, gimple_location (call));
|
|
782 int level = oacc_fn_attrib_level (attrs);
|
|
783
|
|
784 gcc_assert (level >= 0);
|
|
785
|
|
786 loop->marker = call;
|
|
787 loop->routine = decl;
|
|
788 loop->mask = ((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1)
|
|
789 ^ (GOMP_DIM_MASK (level) - 1));
|
|
790 }
|
|
791
|
|
792 /* Finish off the current OpenACC loop ending at tail marker TAIL.
|
|
793 Return the parent loop. */
|
|
794
|
|
795 static oacc_loop *
|
|
796 finish_oacc_loop (oacc_loop *loop)
|
|
797 {
|
|
798 /* If the loop has been collapsed, don't partition it. */
|
|
799 if (loop->ifns.is_empty ())
|
|
800 loop->mask = loop->flags = 0;
|
|
801 return loop->parent;
|
|
802 }
|
|
803
|
|
804 /* Free all OpenACC loop structures within LOOP (inclusive). */
|
|
805
|
|
806 static void
|
|
807 free_oacc_loop (oacc_loop *loop)
|
|
808 {
|
|
809 if (loop->sibling)
|
|
810 free_oacc_loop (loop->sibling);
|
|
811 if (loop->child)
|
|
812 free_oacc_loop (loop->child);
|
|
813
|
|
814 loop->ifns.release ();
|
|
815 free (loop);
|
|
816 }
|
|
817
|
|
818 /* Dump out the OpenACC loop head or tail beginning at FROM. */
|
|
819
|
|
820 static void
|
|
821 dump_oacc_loop_part (FILE *file, gcall *from, int depth,
|
|
822 const char *title, int level)
|
|
823 {
|
|
824 enum ifn_unique_kind kind
|
|
825 = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
|
|
826
|
|
827 fprintf (file, "%*s%s-%d:\n", depth * 2, "", title, level);
|
|
828 for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
|
|
829 {
|
|
830 gimple *stmt = gsi_stmt (gsi);
|
|
831
|
|
832 if (gimple_call_internal_p (stmt, IFN_UNIQUE))
|
|
833 {
|
|
834 enum ifn_unique_kind k
|
|
835 = ((enum ifn_unique_kind) TREE_INT_CST_LOW
|
|
836 (gimple_call_arg (stmt, 0)));
|
|
837
|
|
838 if (k == kind && stmt != from)
|
|
839 break;
|
|
840 }
|
|
841 print_gimple_stmt (file, stmt, depth * 2 + 2);
|
|
842
|
|
843 gsi_next (&gsi);
|
|
844 while (gsi_end_p (gsi))
|
|
845 gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
|
|
846 }
|
|
847 }
|
|
848
|
145
|
849 /* Dump OpenACC loop LOOP, its children, and its siblings. */
|
111
|
850
|
|
851 static void
|
|
852 dump_oacc_loop (FILE *file, oacc_loop *loop, int depth)
|
|
853 {
|
|
854 int ix;
|
|
855
|
|
856 fprintf (file, "%*sLoop %x(%x) %s:%u\n", depth * 2, "",
|
|
857 loop->flags, loop->mask,
|
|
858 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc));
|
|
859
|
|
860 if (loop->marker)
|
|
861 print_gimple_stmt (file, loop->marker, depth * 2);
|
|
862
|
|
863 if (loop->routine)
|
|
864 fprintf (file, "%*sRoutine %s:%u:%s\n",
|
|
865 depth * 2, "", DECL_SOURCE_FILE (loop->routine),
|
|
866 DECL_SOURCE_LINE (loop->routine),
|
|
867 IDENTIFIER_POINTER (DECL_NAME (loop->routine)));
|
|
868
|
|
869 for (ix = GOMP_DIM_GANG; ix != GOMP_DIM_MAX; ix++)
|
|
870 if (loop->heads[ix])
|
|
871 dump_oacc_loop_part (file, loop->heads[ix], depth, "Head", ix);
|
|
872 for (ix = GOMP_DIM_MAX; ix--;)
|
|
873 if (loop->tails[ix])
|
|
874 dump_oacc_loop_part (file, loop->tails[ix], depth, "Tail", ix);
|
|
875
|
|
876 if (loop->child)
|
|
877 dump_oacc_loop (file, loop->child, depth + 1);
|
|
878 if (loop->sibling)
|
|
879 dump_oacc_loop (file, loop->sibling, depth);
|
|
880 }
|
|
881
|
|
882 void debug_oacc_loop (oacc_loop *);
|
|
883
|
|
884 /* Dump loops to stderr. */
|
|
885
|
|
886 DEBUG_FUNCTION void
|
|
887 debug_oacc_loop (oacc_loop *loop)
|
|
888 {
|
|
889 dump_oacc_loop (stderr, loop, 0);
|
|
890 }
|
|
891
|
145
|
892 /* Provide diagnostics on OpenACC loop LOOP, its children, and its
|
|
893 siblings. */
|
|
894
|
|
895 static void
|
|
896 inform_oacc_loop (const oacc_loop *loop)
|
|
897 {
|
|
898 const char *gang
|
|
899 = loop->mask & GOMP_DIM_MASK (GOMP_DIM_GANG) ? " gang" : "";
|
|
900 const char *worker
|
|
901 = loop->mask & GOMP_DIM_MASK (GOMP_DIM_WORKER) ? " worker" : "";
|
|
902 const char *vector
|
|
903 = loop->mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR) ? " vector" : "";
|
|
904 const char *seq = loop->mask == 0 ? " seq" : "";
|
|
905 const dump_user_location_t loc
|
|
906 = dump_user_location_t::from_location_t (loop->loc);
|
|
907 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
|
|
908 "assigned OpenACC%s%s%s%s loop parallelism\n", gang, worker,
|
|
909 vector, seq);
|
|
910
|
|
911 if (loop->child)
|
|
912 inform_oacc_loop (loop->child);
|
|
913 if (loop->sibling)
|
|
914 inform_oacc_loop (loop->sibling);
|
|
915 }
|
|
916
|
111
|
917 /* DFS walk of basic blocks BB onwards, creating OpenACC loop
|
|
918 structures as we go. By construction these loops are properly
|
|
919 nested. */
|
|
920
|
|
921 static void
|
|
922 oacc_loop_discover_walk (oacc_loop *loop, basic_block bb)
|
|
923 {
|
|
924 int marker = 0;
|
|
925 int remaining = 0;
|
|
926
|
|
927 if (bb->flags & BB_VISITED)
|
|
928 return;
|
|
929
|
|
930 follow:
|
|
931 bb->flags |= BB_VISITED;
|
|
932
|
|
933 /* Scan for loop markers. */
|
|
934 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
|
|
935 gsi_next (&gsi))
|
|
936 {
|
|
937 gimple *stmt = gsi_stmt (gsi);
|
|
938
|
|
939 if (!is_gimple_call (stmt))
|
|
940 continue;
|
|
941
|
|
942 gcall *call = as_a <gcall *> (stmt);
|
|
943
|
|
944 /* If this is a routine, make a dummy loop for it. */
|
|
945 if (tree decl = gimple_call_fndecl (call))
|
|
946 if (tree attrs = oacc_get_fn_attrib (decl))
|
|
947 {
|
|
948 gcc_assert (!marker);
|
|
949 new_oacc_loop_routine (loop, call, decl, attrs);
|
|
950 }
|
|
951
|
|
952 if (!gimple_call_internal_p (call))
|
|
953 continue;
|
|
954
|
|
955 switch (gimple_call_internal_fn (call))
|
|
956 {
|
|
957 default:
|
|
958 break;
|
|
959
|
|
960 case IFN_GOACC_LOOP:
|
|
961 case IFN_GOACC_TILE:
|
|
962 /* Record the abstraction function, so we can manipulate it
|
|
963 later. */
|
|
964 loop->ifns.safe_push (call);
|
|
965 break;
|
|
966
|
|
967 case IFN_UNIQUE:
|
|
968 enum ifn_unique_kind kind
|
|
969 = (enum ifn_unique_kind) (TREE_INT_CST_LOW
|
|
970 (gimple_call_arg (call, 0)));
|
|
971 if (kind == IFN_UNIQUE_OACC_HEAD_MARK
|
|
972 || kind == IFN_UNIQUE_OACC_TAIL_MARK)
|
|
973 {
|
|
974 if (gimple_call_num_args (call) == 2)
|
|
975 {
|
|
976 gcc_assert (marker && !remaining);
|
|
977 marker = 0;
|
|
978 if (kind == IFN_UNIQUE_OACC_TAIL_MARK)
|
|
979 loop = finish_oacc_loop (loop);
|
|
980 else
|
|
981 loop->head_end = call;
|
|
982 }
|
|
983 else
|
|
984 {
|
|
985 int count = TREE_INT_CST_LOW (gimple_call_arg (call, 2));
|
|
986
|
|
987 if (!marker)
|
|
988 {
|
|
989 if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
|
|
990 loop = new_oacc_loop (loop, call);
|
|
991 remaining = count;
|
|
992 }
|
|
993 gcc_assert (count == remaining);
|
|
994 if (remaining)
|
|
995 {
|
|
996 remaining--;
|
|
997 if (kind == IFN_UNIQUE_OACC_HEAD_MARK)
|
|
998 loop->heads[marker] = call;
|
|
999 else
|
|
1000 loop->tails[remaining] = call;
|
|
1001 }
|
|
1002 marker++;
|
|
1003 }
|
|
1004 }
|
|
1005 }
|
|
1006 }
|
|
1007 if (remaining || marker)
|
|
1008 {
|
|
1009 bb = single_succ (bb);
|
|
1010 gcc_assert (single_pred_p (bb) && !(bb->flags & BB_VISITED));
|
|
1011 goto follow;
|
|
1012 }
|
|
1013
|
|
1014 /* Walk successor blocks. */
|
|
1015 edge e;
|
|
1016 edge_iterator ei;
|
|
1017
|
|
1018 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
1019 oacc_loop_discover_walk (loop, e->dest);
|
|
1020 }
|
|
1021
|
|
1022 /* LOOP is the first sibling. Reverse the order in place and return
|
|
1023 the new first sibling. Recurse to child loops. */
|
|
1024
|
|
1025 static oacc_loop *
|
|
1026 oacc_loop_sibling_nreverse (oacc_loop *loop)
|
|
1027 {
|
|
1028 oacc_loop *last = NULL;
|
|
1029 do
|
|
1030 {
|
|
1031 if (loop->child)
|
|
1032 loop->child = oacc_loop_sibling_nreverse (loop->child);
|
|
1033
|
|
1034 oacc_loop *next = loop->sibling;
|
|
1035 loop->sibling = last;
|
|
1036 last = loop;
|
|
1037 loop = next;
|
|
1038 }
|
|
1039 while (loop);
|
|
1040
|
|
1041 return last;
|
|
1042 }
|
|
1043
|
|
1044 /* Discover the OpenACC loops marked up by HEAD and TAIL markers for
|
|
1045 the current function. */
|
|
1046
|
|
1047 static oacc_loop *
|
|
1048 oacc_loop_discovery ()
|
|
1049 {
|
|
1050 /* Clear basic block flags, in particular BB_VISITED which we're going to use
|
|
1051 in the following. */
|
|
1052 clear_bb_flags ();
|
|
1053
|
|
1054 oacc_loop *top = new_oacc_loop_outer (current_function_decl);
|
|
1055 oacc_loop_discover_walk (top, ENTRY_BLOCK_PTR_FOR_FN (cfun));
|
|
1056
|
|
1057 /* The siblings were constructed in reverse order, reverse them so
|
|
1058 that diagnostics come out in an unsurprising order. */
|
|
1059 top = oacc_loop_sibling_nreverse (top);
|
|
1060
|
|
1061 return top;
|
|
1062 }
|
|
1063
|
|
1064 /* Transform the abstract internal function markers starting at FROM
|
|
1065 to be for partitioning level LEVEL. Stop when we meet another HEAD
|
|
1066 or TAIL marker. */
|
|
1067
|
|
1068 static void
|
|
1069 oacc_loop_xform_head_tail (gcall *from, int level)
|
|
1070 {
|
|
1071 enum ifn_unique_kind kind
|
|
1072 = (enum ifn_unique_kind) TREE_INT_CST_LOW (gimple_call_arg (from, 0));
|
|
1073 tree replacement = build_int_cst (unsigned_type_node, level);
|
|
1074
|
|
1075 for (gimple_stmt_iterator gsi = gsi_for_stmt (from);;)
|
|
1076 {
|
|
1077 gimple *stmt = gsi_stmt (gsi);
|
|
1078
|
|
1079 if (gimple_call_internal_p (stmt, IFN_UNIQUE))
|
|
1080 {
|
|
1081 enum ifn_unique_kind k
|
|
1082 = ((enum ifn_unique_kind)
|
|
1083 TREE_INT_CST_LOW (gimple_call_arg (stmt, 0)));
|
|
1084
|
|
1085 if (k == IFN_UNIQUE_OACC_FORK || k == IFN_UNIQUE_OACC_JOIN)
|
|
1086 *gimple_call_arg_ptr (stmt, 2) = replacement;
|
|
1087 else if (k == kind && stmt != from)
|
|
1088 break;
|
|
1089 }
|
|
1090 else if (gimple_call_internal_p (stmt, IFN_GOACC_REDUCTION))
|
|
1091 *gimple_call_arg_ptr (stmt, 3) = replacement;
|
|
1092
|
|
1093 gsi_next (&gsi);
|
|
1094 while (gsi_end_p (gsi))
|
|
1095 gsi = gsi_start_bb (single_succ (gsi_bb (gsi)));
|
|
1096 }
|
|
1097 }
|
|
1098
|
|
1099 /* Process the discovered OpenACC loops, setting the correct
|
|
1100 partitioning level etc. */
|
|
1101
|
|
1102 static void
|
|
1103 oacc_loop_process (oacc_loop *loop)
|
|
1104 {
|
|
1105 if (loop->child)
|
|
1106 oacc_loop_process (loop->child);
|
|
1107
|
|
1108 if (loop->mask && !loop->routine)
|
|
1109 {
|
|
1110 int ix;
|
|
1111 tree mask_arg = build_int_cst (unsigned_type_node, loop->mask);
|
|
1112 tree e_mask_arg = build_int_cst (unsigned_type_node, loop->e_mask);
|
|
1113 tree chunk_arg = loop->chunk_size;
|
|
1114 gcall *call;
|
|
1115
|
|
1116 for (ix = 0; loop->ifns.iterate (ix, &call); ix++)
|
|
1117 switch (gimple_call_internal_fn (call))
|
|
1118 {
|
|
1119 case IFN_GOACC_LOOP:
|
|
1120 {
|
|
1121 bool is_e = gimple_call_arg (call, 5) == integer_minus_one_node;
|
|
1122 gimple_call_set_arg (call, 5, is_e ? e_mask_arg : mask_arg);
|
|
1123 if (!is_e)
|
|
1124 gimple_call_set_arg (call, 4, chunk_arg);
|
|
1125 }
|
|
1126 break;
|
|
1127
|
|
1128 case IFN_GOACC_TILE:
|
|
1129 gimple_call_set_arg (call, 3, mask_arg);
|
|
1130 gimple_call_set_arg (call, 4, e_mask_arg);
|
|
1131 break;
|
|
1132
|
|
1133 default:
|
|
1134 gcc_unreachable ();
|
|
1135 }
|
|
1136
|
|
1137 unsigned dim = GOMP_DIM_GANG;
|
|
1138 unsigned mask = loop->mask | loop->e_mask;
|
|
1139 for (ix = 0; ix != GOMP_DIM_MAX && mask; ix++)
|
|
1140 {
|
|
1141 while (!(GOMP_DIM_MASK (dim) & mask))
|
|
1142 dim++;
|
|
1143
|
|
1144 oacc_loop_xform_head_tail (loop->heads[ix], dim);
|
|
1145 oacc_loop_xform_head_tail (loop->tails[ix], dim);
|
|
1146
|
|
1147 mask ^= GOMP_DIM_MASK (dim);
|
|
1148 }
|
|
1149 }
|
|
1150
|
|
1151 if (loop->sibling)
|
|
1152 oacc_loop_process (loop->sibling);
|
|
1153 }
|
|
1154
|
|
1155 /* Walk the OpenACC loop heirarchy checking and assigning the
|
|
1156 programmer-specified partitionings. OUTER_MASK is the partitioning
|
|
1157 this loop is contained within. Return mask of partitioning
|
|
1158 encountered. If any auto loops are discovered, set GOMP_DIM_MAX
|
|
1159 bit. */
|
|
1160
|
|
1161 static unsigned
|
|
1162 oacc_loop_fixed_partitions (oacc_loop *loop, unsigned outer_mask)
|
|
1163 {
|
|
1164 unsigned this_mask = loop->mask;
|
|
1165 unsigned mask_all = 0;
|
|
1166 bool noisy = true;
|
|
1167
|
|
1168 #ifdef ACCEL_COMPILER
|
|
1169 /* When device_type is supported, we want the device compiler to be
|
|
1170 noisy, if the loop parameters are device_type-specific. */
|
|
1171 noisy = false;
|
|
1172 #endif
|
|
1173
|
|
1174 if (!loop->routine)
|
|
1175 {
|
|
1176 bool auto_par = (loop->flags & OLF_AUTO) != 0;
|
|
1177 bool seq_par = (loop->flags & OLF_SEQ) != 0;
|
|
1178 bool tiling = (loop->flags & OLF_TILE) != 0;
|
|
1179
|
|
1180 this_mask = ((loop->flags >> OLF_DIM_BASE)
|
|
1181 & (GOMP_DIM_MASK (GOMP_DIM_MAX) - 1));
|
|
1182
|
|
1183 /* Apply auto partitioning if this is a non-partitioned regular
|
|
1184 loop, or (no more than) single axis tiled loop. */
|
|
1185 bool maybe_auto
|
|
1186 = !seq_par && this_mask == (tiling ? this_mask & -this_mask : 0);
|
|
1187
|
|
1188 if ((this_mask != 0) + auto_par + seq_par > 1)
|
|
1189 {
|
|
1190 if (noisy)
|
|
1191 error_at (loop->loc,
|
|
1192 seq_par
|
|
1193 ? G_("%<seq%> overrides other OpenACC loop specifiers")
|
|
1194 : G_("%<auto%> conflicts with other OpenACC loop "
|
|
1195 "specifiers"));
|
|
1196 maybe_auto = false;
|
|
1197 loop->flags &= ~OLF_AUTO;
|
|
1198 if (seq_par)
|
|
1199 {
|
|
1200 loop->flags
|
|
1201 &= ~((GOMP_DIM_MASK (GOMP_DIM_MAX) - 1) << OLF_DIM_BASE);
|
|
1202 this_mask = 0;
|
|
1203 }
|
|
1204 }
|
|
1205
|
|
1206 if (maybe_auto && (loop->flags & OLF_INDEPENDENT))
|
|
1207 {
|
|
1208 loop->flags |= OLF_AUTO;
|
|
1209 mask_all |= GOMP_DIM_MASK (GOMP_DIM_MAX);
|
|
1210 }
|
|
1211 }
|
|
1212
|
|
1213 if (this_mask & outer_mask)
|
|
1214 {
|
|
1215 const oacc_loop *outer;
|
|
1216 for (outer = loop->parent; outer; outer = outer->parent)
|
|
1217 if ((outer->mask | outer->e_mask) & this_mask)
|
|
1218 break;
|
|
1219
|
|
1220 if (noisy)
|
|
1221 {
|
|
1222 if (outer)
|
|
1223 {
|
|
1224 error_at (loop->loc,
|
|
1225 loop->routine
|
|
1226 ? G_("routine call uses same OpenACC parallelism"
|
|
1227 " as containing loop")
|
|
1228 : G_("inner loop uses same OpenACC parallelism"
|
|
1229 " as containing loop"));
|
|
1230 inform (outer->loc, "containing loop here");
|
|
1231 }
|
|
1232 else
|
|
1233 error_at (loop->loc,
|
|
1234 loop->routine
|
|
1235 ? G_("routine call uses OpenACC parallelism disallowed"
|
|
1236 " by containing routine")
|
|
1237 : G_("loop uses OpenACC parallelism disallowed"
|
|
1238 " by containing routine"));
|
|
1239
|
|
1240 if (loop->routine)
|
|
1241 inform (DECL_SOURCE_LOCATION (loop->routine),
|
|
1242 "routine %qD declared here", loop->routine);
|
|
1243 }
|
|
1244 this_mask &= ~outer_mask;
|
|
1245 }
|
|
1246 else
|
|
1247 {
|
|
1248 unsigned outermost = least_bit_hwi (this_mask);
|
|
1249
|
|
1250 if (outermost && outermost <= outer_mask)
|
|
1251 {
|
|
1252 if (noisy)
|
|
1253 {
|
|
1254 error_at (loop->loc,
|
|
1255 "incorrectly nested OpenACC loop parallelism");
|
|
1256
|
|
1257 const oacc_loop *outer;
|
|
1258 for (outer = loop->parent;
|
|
1259 outer->flags && outer->flags < outermost;
|
|
1260 outer = outer->parent)
|
|
1261 continue;
|
|
1262 inform (outer->loc, "containing loop here");
|
|
1263 }
|
|
1264
|
|
1265 this_mask &= ~outermost;
|
|
1266 }
|
|
1267 }
|
|
1268
|
|
1269 mask_all |= this_mask;
|
|
1270
|
|
1271 if (loop->flags & OLF_TILE)
|
|
1272 {
|
|
1273 /* When tiling, vector goes to the element loop, and failing
|
|
1274 that we put worker there. The std doesn't contemplate
|
|
1275 specifying all three. We choose to put worker and vector on
|
|
1276 the element loops in that case. */
|
|
1277 unsigned this_e_mask = this_mask & GOMP_DIM_MASK (GOMP_DIM_VECTOR);
|
|
1278 if (!this_e_mask || this_mask & GOMP_DIM_MASK (GOMP_DIM_GANG))
|
|
1279 this_e_mask |= this_mask & GOMP_DIM_MASK (GOMP_DIM_WORKER);
|
|
1280
|
|
1281 loop->e_mask = this_e_mask;
|
|
1282 this_mask ^= this_e_mask;
|
|
1283 }
|
|
1284
|
|
1285 loop->mask = this_mask;
|
|
1286
|
|
1287 if (dump_file)
|
|
1288 fprintf (dump_file, "Loop %s:%d user specified %d & %d\n",
|
|
1289 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
|
|
1290 loop->mask, loop->e_mask);
|
|
1291
|
|
1292 if (loop->child)
|
|
1293 {
|
|
1294 unsigned tmp_mask = outer_mask | this_mask | loop->e_mask;
|
|
1295 loop->inner = oacc_loop_fixed_partitions (loop->child, tmp_mask);
|
|
1296 mask_all |= loop->inner;
|
|
1297 }
|
|
1298
|
|
1299 if (loop->sibling)
|
|
1300 mask_all |= oacc_loop_fixed_partitions (loop->sibling, outer_mask);
|
|
1301
|
|
1302 return mask_all;
|
|
1303 }
|
|
1304
|
|
1305 /* Walk the OpenACC loop heirarchy to assign auto-partitioned loops.
|
|
1306 OUTER_MASK is the partitioning this loop is contained within.
|
|
1307 OUTER_ASSIGN is true if an outer loop is being auto-partitioned.
|
|
1308 Return the cumulative partitioning used by this loop, siblings and
|
|
1309 children. */
|
|
1310
|
|
1311 static unsigned
|
|
1312 oacc_loop_auto_partitions (oacc_loop *loop, unsigned outer_mask,
|
|
1313 bool outer_assign)
|
|
1314 {
|
|
1315 bool assign = (loop->flags & OLF_AUTO) && (loop->flags & OLF_INDEPENDENT);
|
|
1316 bool noisy = true;
|
|
1317 bool tiling = loop->flags & OLF_TILE;
|
|
1318
|
|
1319 #ifdef ACCEL_COMPILER
|
|
1320 /* When device_type is supported, we want the device compiler to be
|
|
1321 noisy, if the loop parameters are device_type-specific. */
|
|
1322 noisy = false;
|
|
1323 #endif
|
|
1324
|
|
1325 if (assign && (!outer_assign || loop->inner))
|
|
1326 {
|
|
1327 /* Allocate outermost and non-innermost loops at the outermost
|
|
1328 non-innermost available level. */
|
|
1329 unsigned this_mask = GOMP_DIM_MASK (GOMP_DIM_GANG);
|
|
1330
|
|
1331 /* Find the first outermost available partition. */
|
|
1332 while (this_mask <= outer_mask)
|
|
1333 this_mask <<= 1;
|
|
1334
|
|
1335 /* Grab two axes if tiling, and we've not assigned anything */
|
|
1336 if (tiling && !(loop->mask | loop->e_mask))
|
|
1337 this_mask |= this_mask << 1;
|
|
1338
|
|
1339 /* Prohibit the innermost partitioning at the moment. */
|
|
1340 this_mask &= GOMP_DIM_MASK (GOMP_DIM_MAX - 1) - 1;
|
|
1341
|
|
1342 /* Don't use any dimension explicitly claimed by an inner loop. */
|
|
1343 this_mask &= ~loop->inner;
|
|
1344
|
|
1345 if (tiling && !loop->e_mask)
|
|
1346 {
|
|
1347 /* If we got two axes, allocate the inner one to the element
|
|
1348 loop. */
|
|
1349 loop->e_mask = this_mask & (this_mask << 1);
|
|
1350 this_mask ^= loop->e_mask;
|
|
1351 }
|
|
1352
|
|
1353 loop->mask |= this_mask;
|
|
1354 }
|
|
1355
|
|
1356 if (loop->child)
|
|
1357 {
|
|
1358 unsigned tmp_mask = outer_mask | loop->mask | loop->e_mask;
|
|
1359 loop->inner = oacc_loop_auto_partitions (loop->child, tmp_mask,
|
|
1360 outer_assign | assign);
|
|
1361 }
|
|
1362
|
|
1363 if (assign && (!loop->mask || (tiling && !loop->e_mask) || !outer_assign))
|
|
1364 {
|
|
1365 /* Allocate the loop at the innermost available level. Note
|
|
1366 that we do this even if we already assigned this loop the
|
|
1367 outermost available level above. That way we'll partition
|
|
1368 this along 2 axes, if they are available. */
|
|
1369 unsigned this_mask = 0;
|
|
1370
|
|
1371 /* Determine the outermost partitioning used within this loop. */
|
|
1372 this_mask = loop->inner | GOMP_DIM_MASK (GOMP_DIM_MAX);
|
|
1373 this_mask = least_bit_hwi (this_mask);
|
|
1374
|
|
1375 /* Pick the partitioning just inside that one. */
|
|
1376 this_mask >>= 1;
|
|
1377
|
|
1378 /* And avoid picking one use by an outer loop. */
|
|
1379 this_mask &= ~outer_mask;
|
|
1380
|
|
1381 /* If tiling and we failed completely above, grab the next one
|
|
1382 too. Making sure it doesn't hit an outer loop. */
|
|
1383 if (tiling)
|
|
1384 {
|
|
1385 this_mask &= ~(loop->e_mask | loop->mask);
|
|
1386 unsigned tile_mask = ((this_mask >> 1)
|
|
1387 & ~(outer_mask | loop->e_mask | loop->mask));
|
|
1388
|
|
1389 if (tile_mask || loop->mask)
|
|
1390 {
|
|
1391 loop->e_mask |= this_mask;
|
|
1392 this_mask = tile_mask;
|
|
1393 }
|
|
1394 if (!loop->e_mask && noisy)
|
|
1395 warning_at (loop->loc, 0,
|
|
1396 "insufficient partitioning available"
|
|
1397 " to parallelize element loop");
|
|
1398 }
|
|
1399
|
|
1400 loop->mask |= this_mask;
|
|
1401 if (!loop->mask && noisy)
|
|
1402 warning_at (loop->loc, 0,
|
|
1403 tiling
|
|
1404 ? G_("insufficient partitioning available"
|
|
1405 " to parallelize tile loop")
|
|
1406 : G_("insufficient partitioning available"
|
|
1407 " to parallelize loop"));
|
|
1408 }
|
|
1409
|
|
1410 if (assign && dump_file)
|
|
1411 fprintf (dump_file, "Auto loop %s:%d assigned %d & %d\n",
|
|
1412 LOCATION_FILE (loop->loc), LOCATION_LINE (loop->loc),
|
|
1413 loop->mask, loop->e_mask);
|
|
1414
|
|
1415 unsigned inner_mask = 0;
|
|
1416
|
|
1417 if (loop->sibling)
|
|
1418 inner_mask |= oacc_loop_auto_partitions (loop->sibling,
|
|
1419 outer_mask, outer_assign);
|
|
1420
|
|
1421 inner_mask |= loop->inner | loop->mask | loop->e_mask;
|
|
1422
|
|
1423 return inner_mask;
|
|
1424 }
|
|
1425
|
|
1426 /* Walk the OpenACC loop heirarchy to check and assign partitioning
|
|
1427 axes. Return mask of partitioning. */
|
|
1428
|
|
1429 static unsigned
|
|
1430 oacc_loop_partition (oacc_loop *loop, unsigned outer_mask)
|
|
1431 {
|
|
1432 unsigned mask_all = oacc_loop_fixed_partitions (loop, outer_mask);
|
|
1433
|
|
1434 if (mask_all & GOMP_DIM_MASK (GOMP_DIM_MAX))
|
|
1435 {
|
|
1436 mask_all ^= GOMP_DIM_MASK (GOMP_DIM_MAX);
|
|
1437 mask_all |= oacc_loop_auto_partitions (loop, outer_mask, false);
|
|
1438 }
|
|
1439 return mask_all;
|
|
1440 }
|
|
1441
|
|
1442 /* Default fork/join early expander. Delete the function calls if
|
|
1443 there is no RTL expander. */
|
|
1444
|
|
1445 bool
|
|
1446 default_goacc_fork_join (gcall *ARG_UNUSED (call),
|
|
1447 const int *ARG_UNUSED (dims), bool is_fork)
|
|
1448 {
|
|
1449 if (is_fork)
|
|
1450 return targetm.have_oacc_fork ();
|
|
1451 else
|
|
1452 return targetm.have_oacc_join ();
|
|
1453 }
|
|
1454
|
|
1455 /* Default goacc.reduction early expander.
|
|
1456
|
|
1457 LHS-opt = IFN_REDUCTION (KIND, RES_PTR, VAR, LEVEL, OP, OFFSET)
|
|
1458 If RES_PTR is not integer-zerop:
|
|
1459 SETUP - emit 'LHS = *RES_PTR', LHS = NULL
|
|
1460 TEARDOWN - emit '*RES_PTR = VAR'
|
|
1461 If LHS is not NULL
|
|
1462 emit 'LHS = VAR' */
|
|
1463
|
|
1464 void
|
|
1465 default_goacc_reduction (gcall *call)
|
|
1466 {
|
|
1467 unsigned code = (unsigned)TREE_INT_CST_LOW (gimple_call_arg (call, 0));
|
|
1468 gimple_stmt_iterator gsi = gsi_for_stmt (call);
|
|
1469 tree lhs = gimple_call_lhs (call);
|
|
1470 tree var = gimple_call_arg (call, 2);
|
|
1471 gimple_seq seq = NULL;
|
|
1472
|
|
1473 if (code == IFN_GOACC_REDUCTION_SETUP
|
|
1474 || code == IFN_GOACC_REDUCTION_TEARDOWN)
|
|
1475 {
|
|
1476 /* Setup and Teardown need to copy from/to the receiver object,
|
|
1477 if there is one. */
|
|
1478 tree ref_to_res = gimple_call_arg (call, 1);
|
|
1479
|
|
1480 if (!integer_zerop (ref_to_res))
|
|
1481 {
|
|
1482 tree dst = build_simple_mem_ref (ref_to_res);
|
|
1483 tree src = var;
|
|
1484
|
|
1485 if (code == IFN_GOACC_REDUCTION_SETUP)
|
|
1486 {
|
|
1487 src = dst;
|
|
1488 dst = lhs;
|
|
1489 lhs = NULL;
|
|
1490 }
|
|
1491 gimple_seq_add_stmt (&seq, gimple_build_assign (dst, src));
|
|
1492 }
|
|
1493 }
|
|
1494
|
|
1495 /* Copy VAR to LHS, if there is an LHS. */
|
|
1496 if (lhs)
|
|
1497 gimple_seq_add_stmt (&seq, gimple_build_assign (lhs, var));
|
|
1498
|
|
1499 gsi_replace_with_seq (&gsi, seq, true);
|
|
1500 }
|
|
1501
|
|
1502 /* Main entry point for oacc transformations which run on the device
|
|
1503 compiler after LTO, so we know what the target device is at this
|
|
1504 point (including the host fallback). */
|
|
1505
|
|
1506 static unsigned int
|
|
1507 execute_oacc_device_lower ()
|
|
1508 {
|
|
1509 tree attrs = oacc_get_fn_attrib (current_function_decl);
|
|
1510
|
|
1511 if (!attrs)
|
|
1512 /* Not an offloaded function. */
|
|
1513 return 0;
|
|
1514
|
|
1515 /* Parse the default dim argument exactly once. */
|
|
1516 if ((const void *)flag_openacc_dims != &flag_openacc_dims)
|
|
1517 {
|
|
1518 oacc_parse_default_dims (flag_openacc_dims);
|
|
1519 flag_openacc_dims = (char *)&flag_openacc_dims;
|
|
1520 }
|
|
1521
|
|
1522 bool is_oacc_kernels
|
|
1523 = (lookup_attribute ("oacc kernels",
|
|
1524 DECL_ATTRIBUTES (current_function_decl)) != NULL);
|
|
1525 bool is_oacc_kernels_parallelized
|
|
1526 = (lookup_attribute ("oacc kernels parallelized",
|
|
1527 DECL_ATTRIBUTES (current_function_decl)) != NULL);
|
|
1528
|
|
1529 /* Unparallelized OpenACC kernels constructs must get launched as 1 x 1 x 1
|
|
1530 kernels, so remove the parallelism dimensions function attributes
|
|
1531 potentially set earlier on. */
|
|
1532 if (is_oacc_kernels && !is_oacc_kernels_parallelized)
|
|
1533 {
|
|
1534 oacc_set_fn_attrib (current_function_decl, NULL, NULL);
|
|
1535 attrs = oacc_get_fn_attrib (current_function_decl);
|
|
1536 }
|
|
1537
|
|
1538 /* Discover, partition and process the loops. */
|
|
1539 oacc_loop *loops = oacc_loop_discovery ();
|
|
1540 int fn_level = oacc_fn_attrib_level (attrs);
|
|
1541
|
|
1542 if (dump_file)
|
|
1543 {
|
|
1544 if (fn_level >= 0)
|
|
1545 fprintf (dump_file, "Function is OpenACC routine level %d\n",
|
|
1546 fn_level);
|
|
1547 else if (is_oacc_kernels)
|
|
1548 fprintf (dump_file, "Function is %s OpenACC kernels offload\n",
|
|
1549 (is_oacc_kernels_parallelized
|
|
1550 ? "parallelized" : "unparallelized"));
|
|
1551 else
|
|
1552 fprintf (dump_file, "Function is OpenACC parallel offload\n");
|
|
1553 }
|
|
1554
|
|
1555 unsigned outer_mask = fn_level >= 0 ? GOMP_DIM_MASK (fn_level) - 1 : 0;
|
|
1556 unsigned used_mask = oacc_loop_partition (loops, outer_mask);
|
|
1557 /* OpenACC kernels constructs are special: they currently don't use the
|
|
1558 generic oacc_loop infrastructure and attribute/dimension processing. */
|
|
1559 if (is_oacc_kernels && is_oacc_kernels_parallelized)
|
|
1560 {
|
|
1561 /* Parallelized OpenACC kernels constructs use gang parallelism. See
|
|
1562 also tree-parloops.c:create_parallel_loop. */
|
|
1563 used_mask |= GOMP_DIM_MASK (GOMP_DIM_GANG);
|
|
1564 }
|
|
1565
|
|
1566 int dims[GOMP_DIM_MAX];
|
|
1567 oacc_validate_dims (current_function_decl, attrs, dims, fn_level, used_mask);
|
|
1568
|
|
1569 if (dump_file)
|
|
1570 {
|
|
1571 const char *comma = "Compute dimensions [";
|
|
1572 for (int ix = 0; ix != GOMP_DIM_MAX; ix++, comma = ", ")
|
|
1573 fprintf (dump_file, "%s%d", comma, dims[ix]);
|
|
1574 fprintf (dump_file, "]\n");
|
|
1575 }
|
|
1576
|
|
1577 oacc_loop_process (loops);
|
|
1578 if (dump_file)
|
|
1579 {
|
|
1580 fprintf (dump_file, "OpenACC loops\n");
|
|
1581 dump_oacc_loop (dump_file, loops, 0);
|
|
1582 fprintf (dump_file, "\n");
|
|
1583 }
|
145
|
1584 if (dump_enabled_p ())
|
|
1585 {
|
|
1586 oacc_loop *l = loops;
|
|
1587 /* OpenACC kernels constructs are special: they currently don't use the
|
|
1588 generic oacc_loop infrastructure. */
|
|
1589 if (is_oacc_kernels)
|
|
1590 {
|
|
1591 /* Create a fake oacc_loop for diagnostic purposes. */
|
|
1592 l = new_oacc_loop_raw (NULL,
|
|
1593 DECL_SOURCE_LOCATION (current_function_decl));
|
|
1594 l->mask = used_mask;
|
|
1595 }
|
|
1596 else
|
|
1597 {
|
|
1598 /* Skip the outermost, dummy OpenACC loop */
|
|
1599 l = l->child;
|
|
1600 }
|
|
1601 if (l)
|
|
1602 inform_oacc_loop (l);
|
|
1603 if (is_oacc_kernels)
|
|
1604 free_oacc_loop (l);
|
|
1605 }
|
111
|
1606
|
|
1607 /* Offloaded targets may introduce new basic blocks, which require
|
|
1608 dominance information to update SSA. */
|
|
1609 calculate_dominance_info (CDI_DOMINATORS);
|
|
1610
|
|
1611 /* Now lower internal loop functions to target-specific code
|
|
1612 sequences. */
|
|
1613 basic_block bb;
|
|
1614 FOR_ALL_BB_FN (bb, cfun)
|
|
1615 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
|
|
1616 {
|
|
1617 gimple *stmt = gsi_stmt (gsi);
|
|
1618 if (!is_gimple_call (stmt))
|
|
1619 {
|
|
1620 gsi_next (&gsi);
|
|
1621 continue;
|
|
1622 }
|
|
1623
|
|
1624 gcall *call = as_a <gcall *> (stmt);
|
|
1625 if (!gimple_call_internal_p (call))
|
|
1626 {
|
|
1627 gsi_next (&gsi);
|
|
1628 continue;
|
|
1629 }
|
|
1630
|
|
1631 /* Rewind to allow rescan. */
|
|
1632 gsi_prev (&gsi);
|
|
1633 bool rescan = false, remove = false;
|
|
1634 enum internal_fn ifn_code = gimple_call_internal_fn (call);
|
|
1635
|
|
1636 switch (ifn_code)
|
|
1637 {
|
|
1638 default: break;
|
|
1639
|
|
1640 case IFN_GOACC_TILE:
|
|
1641 oacc_xform_tile (call);
|
|
1642 rescan = true;
|
|
1643 break;
|
|
1644
|
|
1645 case IFN_GOACC_LOOP:
|
|
1646 oacc_xform_loop (call);
|
|
1647 rescan = true;
|
|
1648 break;
|
|
1649
|
|
1650 case IFN_GOACC_REDUCTION:
|
|
1651 /* Mark the function for SSA renaming. */
|
|
1652 mark_virtual_operands_for_renaming (cfun);
|
|
1653
|
|
1654 /* If the level is -1, this ended up being an unused
|
|
1655 axis. Handle as a default. */
|
|
1656 if (integer_minus_onep (gimple_call_arg (call, 3)))
|
|
1657 default_goacc_reduction (call);
|
|
1658 else
|
|
1659 targetm.goacc.reduction (call);
|
|
1660 rescan = true;
|
|
1661 break;
|
|
1662
|
|
1663 case IFN_UNIQUE:
|
|
1664 {
|
|
1665 enum ifn_unique_kind kind
|
|
1666 = ((enum ifn_unique_kind)
|
|
1667 TREE_INT_CST_LOW (gimple_call_arg (call, 0)));
|
|
1668
|
|
1669 switch (kind)
|
|
1670 {
|
|
1671 default:
|
|
1672 break;
|
|
1673
|
|
1674 case IFN_UNIQUE_OACC_FORK:
|
|
1675 case IFN_UNIQUE_OACC_JOIN:
|
|
1676 if (integer_minus_onep (gimple_call_arg (call, 2)))
|
|
1677 remove = true;
|
|
1678 else if (!targetm.goacc.fork_join
|
|
1679 (call, dims, kind == IFN_UNIQUE_OACC_FORK))
|
|
1680 remove = true;
|
|
1681 break;
|
|
1682
|
|
1683 case IFN_UNIQUE_OACC_HEAD_MARK:
|
|
1684 case IFN_UNIQUE_OACC_TAIL_MARK:
|
|
1685 remove = true;
|
|
1686 break;
|
|
1687 }
|
|
1688 break;
|
|
1689 }
|
|
1690 }
|
|
1691
|
|
1692 if (gsi_end_p (gsi))
|
|
1693 /* We rewound past the beginning of the BB. */
|
|
1694 gsi = gsi_start_bb (bb);
|
|
1695 else
|
|
1696 /* Undo the rewind. */
|
|
1697 gsi_next (&gsi);
|
|
1698
|
|
1699 if (remove)
|
|
1700 {
|
|
1701 if (gimple_vdef (call))
|
|
1702 replace_uses_by (gimple_vdef (call), gimple_vuse (call));
|
|
1703 if (gimple_call_lhs (call))
|
|
1704 {
|
|
1705 /* Propagate the data dependency var. */
|
|
1706 gimple *ass = gimple_build_assign (gimple_call_lhs (call),
|
|
1707 gimple_call_arg (call, 1));
|
|
1708 gsi_replace (&gsi, ass, false);
|
|
1709 }
|
|
1710 else
|
|
1711 gsi_remove (&gsi, true);
|
|
1712 }
|
|
1713 else if (!rescan)
|
|
1714 /* If not rescanning, advance over the call. */
|
|
1715 gsi_next (&gsi);
|
|
1716 }
|
|
1717
|
|
1718 free_oacc_loop (loops);
|
|
1719
|
|
1720 return 0;
|
|
1721 }
|
|
1722
|
|
1723 /* Default launch dimension validator. Force everything to 1. A
|
|
1724 backend that wants to provide larger dimensions must override this
|
|
1725 hook. */
|
|
1726
|
|
1727 bool
|
|
1728 default_goacc_validate_dims (tree ARG_UNUSED (decl), int *dims,
|
145
|
1729 int ARG_UNUSED (fn_level),
|
|
1730 unsigned ARG_UNUSED (used))
|
111
|
1731 {
|
|
1732 bool changed = false;
|
|
1733
|
|
1734 for (unsigned ix = 0; ix != GOMP_DIM_MAX; ix++)
|
|
1735 {
|
|
1736 if (dims[ix] != 1)
|
|
1737 {
|
|
1738 dims[ix] = 1;
|
|
1739 changed = true;
|
|
1740 }
|
|
1741 }
|
|
1742
|
|
1743 return changed;
|
|
1744 }
|
|
1745
|
|
1746 /* Default dimension bound is unknown on accelerator and 1 on host. */
|
|
1747
|
|
1748 int
|
|
1749 default_goacc_dim_limit (int ARG_UNUSED (axis))
|
|
1750 {
|
|
1751 #ifdef ACCEL_COMPILER
|
|
1752 return 0;
|
|
1753 #else
|
|
1754 return 1;
|
|
1755 #endif
|
|
1756 }
|
|
1757
|
|
1758 namespace {
|
|
1759
|
|
1760 const pass_data pass_data_oacc_device_lower =
|
|
1761 {
|
|
1762 GIMPLE_PASS, /* type */
|
|
1763 "oaccdevlow", /* name */
|
|
1764 OPTGROUP_OMP, /* optinfo_flags */
|
|
1765 TV_NONE, /* tv_id */
|
|
1766 PROP_cfg, /* properties_required */
|
|
1767 0 /* Possibly PROP_gimple_eomp. */, /* properties_provided */
|
|
1768 0, /* properties_destroyed */
|
|
1769 0, /* todo_flags_start */
|
|
1770 TODO_update_ssa | TODO_cleanup_cfg, /* todo_flags_finish */
|
|
1771 };
|
|
1772
|
|
1773 class pass_oacc_device_lower : public gimple_opt_pass
|
|
1774 {
|
|
1775 public:
|
|
1776 pass_oacc_device_lower (gcc::context *ctxt)
|
|
1777 : gimple_opt_pass (pass_data_oacc_device_lower, ctxt)
|
|
1778 {}
|
|
1779
|
|
1780 /* opt_pass methods: */
|
|
1781 virtual bool gate (function *) { return flag_openacc; };
|
|
1782
|
|
1783 virtual unsigned int execute (function *)
|
|
1784 {
|
|
1785 return execute_oacc_device_lower ();
|
|
1786 }
|
|
1787
|
|
1788 }; // class pass_oacc_device_lower
|
|
1789
|
|
1790 } // anon namespace
|
|
1791
|
|
1792 gimple_opt_pass *
|
|
1793 make_pass_oacc_device_lower (gcc::context *ctxt)
|
|
1794 {
|
|
1795 return new pass_oacc_device_lower (ctxt);
|
|
1796 }
|
|
1797
|
|
1798
|
|
1799 /* Rewrite GOMP_SIMT_ENTER_ALLOC call given by GSI and remove the preceding
|
|
1800 GOMP_SIMT_ENTER call identifying the privatized variables, which are
|
|
1801 turned to structure fields and receive a DECL_VALUE_EXPR accordingly.
|
|
1802 Set *REGIMPLIFY to true, except if no privatized variables were seen. */
|
|
1803
|
|
1804 static void
|
|
1805 ompdevlow_adjust_simt_enter (gimple_stmt_iterator *gsi, bool *regimplify)
|
|
1806 {
|
|
1807 gimple *alloc_stmt = gsi_stmt (*gsi);
|
|
1808 tree simtrec = gimple_call_lhs (alloc_stmt);
|
|
1809 tree simduid = gimple_call_arg (alloc_stmt, 0);
|
|
1810 gimple *enter_stmt = SSA_NAME_DEF_STMT (simduid);
|
|
1811 gcc_assert (gimple_call_internal_p (enter_stmt, IFN_GOMP_SIMT_ENTER));
|
|
1812 tree rectype = lang_hooks.types.make_type (RECORD_TYPE);
|
|
1813 TYPE_ARTIFICIAL (rectype) = TYPE_NAMELESS (rectype) = 1;
|
|
1814 TREE_ADDRESSABLE (rectype) = 1;
|
|
1815 TREE_TYPE (simtrec) = build_pointer_type (rectype);
|
|
1816 for (unsigned i = 1; i < gimple_call_num_args (enter_stmt); i++)
|
|
1817 {
|
|
1818 tree *argp = gimple_call_arg_ptr (enter_stmt, i);
|
|
1819 if (*argp == null_pointer_node)
|
|
1820 continue;
|
|
1821 gcc_assert (TREE_CODE (*argp) == ADDR_EXPR
|
|
1822 && VAR_P (TREE_OPERAND (*argp, 0)));
|
|
1823 tree var = TREE_OPERAND (*argp, 0);
|
|
1824
|
|
1825 tree field = build_decl (DECL_SOURCE_LOCATION (var), FIELD_DECL,
|
|
1826 DECL_NAME (var), TREE_TYPE (var));
|
|
1827 SET_DECL_ALIGN (field, DECL_ALIGN (var));
|
|
1828 DECL_USER_ALIGN (field) = DECL_USER_ALIGN (var);
|
|
1829 TREE_THIS_VOLATILE (field) = TREE_THIS_VOLATILE (var);
|
|
1830
|
|
1831 insert_field_into_struct (rectype, field);
|
|
1832
|
|
1833 tree t = build_simple_mem_ref (simtrec);
|
|
1834 t = build3 (COMPONENT_REF, TREE_TYPE (var), t, field, NULL);
|
|
1835 TREE_THIS_VOLATILE (t) = TREE_THIS_VOLATILE (var);
|
|
1836 SET_DECL_VALUE_EXPR (var, t);
|
|
1837 DECL_HAS_VALUE_EXPR_P (var) = 1;
|
|
1838 *regimplify = true;
|
|
1839 }
|
|
1840 layout_type (rectype);
|
|
1841 tree size = TYPE_SIZE_UNIT (rectype);
|
|
1842 tree align = build_int_cst (TREE_TYPE (size), TYPE_ALIGN_UNIT (rectype));
|
|
1843
|
|
1844 alloc_stmt
|
|
1845 = gimple_build_call_internal (IFN_GOMP_SIMT_ENTER_ALLOC, 2, size, align);
|
|
1846 gimple_call_set_lhs (alloc_stmt, simtrec);
|
|
1847 gsi_replace (gsi, alloc_stmt, false);
|
|
1848 gimple_stmt_iterator enter_gsi = gsi_for_stmt (enter_stmt);
|
|
1849 enter_stmt = gimple_build_assign (simduid, gimple_call_arg (enter_stmt, 0));
|
|
1850 gsi_replace (&enter_gsi, enter_stmt, false);
|
|
1851
|
|
1852 use_operand_p use;
|
|
1853 gimple *exit_stmt;
|
|
1854 if (single_imm_use (simtrec, &use, &exit_stmt))
|
|
1855 {
|
|
1856 gcc_assert (gimple_call_internal_p (exit_stmt, IFN_GOMP_SIMT_EXIT));
|
|
1857 gimple_stmt_iterator exit_gsi = gsi_for_stmt (exit_stmt);
|
145
|
1858 tree clobber = build_clobber (rectype);
|
111
|
1859 exit_stmt = gimple_build_assign (build_simple_mem_ref (simtrec), clobber);
|
|
1860 gsi_insert_before (&exit_gsi, exit_stmt, GSI_SAME_STMT);
|
|
1861 }
|
|
1862 else
|
|
1863 gcc_checking_assert (has_zero_uses (simtrec));
|
|
1864 }
|
|
1865
|
|
1866 /* Callback for walk_gimple_stmt used to scan for SIMT-privatized variables. */
|
|
1867
|
|
1868 static tree
|
|
1869 find_simtpriv_var_op (tree *tp, int *walk_subtrees, void *)
|
|
1870 {
|
|
1871 tree t = *tp;
|
|
1872
|
|
1873 if (VAR_P (t)
|
|
1874 && DECL_HAS_VALUE_EXPR_P (t)
|
|
1875 && lookup_attribute ("omp simt private", DECL_ATTRIBUTES (t)))
|
|
1876 {
|
|
1877 *walk_subtrees = 0;
|
|
1878 return t;
|
|
1879 }
|
|
1880 return NULL_TREE;
|
|
1881 }
|
|
1882
|
|
1883 /* Cleanup uses of SIMT placeholder internal functions: on non-SIMT targets,
|
|
1884 VF is 1 and LANE is 0; on SIMT targets, VF is folded to a constant, and
|
|
1885 LANE is kept to be expanded to RTL later on. Also cleanup all other SIMT
|
|
1886 internal functions on non-SIMT targets, and likewise some SIMD internal
|
|
1887 functions on SIMT targets. */
|
|
1888
|
|
1889 static unsigned int
|
|
1890 execute_omp_device_lower ()
|
|
1891 {
|
|
1892 int vf = targetm.simt.vf ? targetm.simt.vf () : 1;
|
|
1893 bool regimplify = false;
|
|
1894 basic_block bb;
|
|
1895 gimple_stmt_iterator gsi;
|
|
1896 FOR_EACH_BB_FN (bb, cfun)
|
|
1897 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
1898 {
|
|
1899 gimple *stmt = gsi_stmt (gsi);
|
|
1900 if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt))
|
|
1901 continue;
|
|
1902 tree lhs = gimple_call_lhs (stmt), rhs = NULL_TREE;
|
|
1903 tree type = lhs ? TREE_TYPE (lhs) : integer_type_node;
|
|
1904 switch (gimple_call_internal_fn (stmt))
|
|
1905 {
|
|
1906 case IFN_GOMP_USE_SIMT:
|
|
1907 rhs = vf == 1 ? integer_zero_node : integer_one_node;
|
|
1908 break;
|
|
1909 case IFN_GOMP_SIMT_ENTER:
|
|
1910 rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE;
|
|
1911 goto simtreg_enter_exit;
|
|
1912 case IFN_GOMP_SIMT_ENTER_ALLOC:
|
|
1913 if (vf != 1)
|
|
1914 ompdevlow_adjust_simt_enter (&gsi, ®implify);
|
|
1915 rhs = vf == 1 ? null_pointer_node : NULL_TREE;
|
|
1916 goto simtreg_enter_exit;
|
|
1917 case IFN_GOMP_SIMT_EXIT:
|
|
1918 simtreg_enter_exit:
|
|
1919 if (vf != 1)
|
|
1920 continue;
|
|
1921 unlink_stmt_vdef (stmt);
|
|
1922 break;
|
|
1923 case IFN_GOMP_SIMT_LANE:
|
|
1924 case IFN_GOMP_SIMT_LAST_LANE:
|
|
1925 rhs = vf == 1 ? build_zero_cst (type) : NULL_TREE;
|
|
1926 break;
|
|
1927 case IFN_GOMP_SIMT_VF:
|
|
1928 rhs = build_int_cst (type, vf);
|
|
1929 break;
|
|
1930 case IFN_GOMP_SIMT_ORDERED_PRED:
|
|
1931 rhs = vf == 1 ? integer_zero_node : NULL_TREE;
|
|
1932 if (rhs || !lhs)
|
|
1933 unlink_stmt_vdef (stmt);
|
|
1934 break;
|
|
1935 case IFN_GOMP_SIMT_VOTE_ANY:
|
|
1936 case IFN_GOMP_SIMT_XCHG_BFLY:
|
|
1937 case IFN_GOMP_SIMT_XCHG_IDX:
|
|
1938 rhs = vf == 1 ? gimple_call_arg (stmt, 0) : NULL_TREE;
|
|
1939 break;
|
|
1940 case IFN_GOMP_SIMD_LANE:
|
|
1941 case IFN_GOMP_SIMD_LAST_LANE:
|
|
1942 rhs = vf != 1 ? build_zero_cst (type) : NULL_TREE;
|
|
1943 break;
|
|
1944 case IFN_GOMP_SIMD_VF:
|
|
1945 rhs = vf != 1 ? build_one_cst (type) : NULL_TREE;
|
|
1946 break;
|
|
1947 default:
|
|
1948 continue;
|
|
1949 }
|
|
1950 if (lhs && !rhs)
|
|
1951 continue;
|
|
1952 stmt = lhs ? gimple_build_assign (lhs, rhs) : gimple_build_nop ();
|
|
1953 gsi_replace (&gsi, stmt, false);
|
|
1954 }
|
|
1955 if (regimplify)
|
|
1956 FOR_EACH_BB_REVERSE_FN (bb, cfun)
|
|
1957 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
|
|
1958 if (walk_gimple_stmt (&gsi, NULL, find_simtpriv_var_op, NULL))
|
|
1959 {
|
|
1960 if (gimple_clobber_p (gsi_stmt (gsi)))
|
|
1961 gsi_remove (&gsi, true);
|
|
1962 else
|
|
1963 gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
|
|
1964 }
|
|
1965 if (vf != 1)
|
|
1966 cfun->has_force_vectorize_loops = false;
|
|
1967 return 0;
|
|
1968 }
|
|
1969
|
|
1970 namespace {
|
|
1971
|
|
1972 const pass_data pass_data_omp_device_lower =
|
|
1973 {
|
|
1974 GIMPLE_PASS, /* type */
|
|
1975 "ompdevlow", /* name */
|
|
1976 OPTGROUP_OMP, /* optinfo_flags */
|
|
1977 TV_NONE, /* tv_id */
|
|
1978 PROP_cfg, /* properties_required */
|
|
1979 PROP_gimple_lomp_dev, /* properties_provided */
|
|
1980 0, /* properties_destroyed */
|
|
1981 0, /* todo_flags_start */
|
|
1982 TODO_update_ssa, /* todo_flags_finish */
|
|
1983 };
|
|
1984
|
|
1985 class pass_omp_device_lower : public gimple_opt_pass
|
|
1986 {
|
|
1987 public:
|
|
1988 pass_omp_device_lower (gcc::context *ctxt)
|
|
1989 : gimple_opt_pass (pass_data_omp_device_lower, ctxt)
|
|
1990 {}
|
|
1991
|
|
1992 /* opt_pass methods: */
|
|
1993 virtual bool gate (function *fun)
|
|
1994 {
|
|
1995 return !(fun->curr_properties & PROP_gimple_lomp_dev);
|
|
1996 }
|
|
1997 virtual unsigned int execute (function *)
|
|
1998 {
|
|
1999 return execute_omp_device_lower ();
|
|
2000 }
|
|
2001
|
|
2002 }; // class pass_expand_omp_ssa
|
|
2003
|
|
2004 } // anon namespace
|
|
2005
|
|
2006 gimple_opt_pass *
|
|
2007 make_pass_omp_device_lower (gcc::context *ctxt)
|
|
2008 {
|
|
2009 return new pass_omp_device_lower (ctxt);
|
|
2010 }
|
|
2011
|
|
2012 /* "omp declare target link" handling pass. */
|
|
2013
|
|
2014 namespace {
|
|
2015
|
|
2016 const pass_data pass_data_omp_target_link =
|
|
2017 {
|
|
2018 GIMPLE_PASS, /* type */
|
|
2019 "omptargetlink", /* name */
|
|
2020 OPTGROUP_OMP, /* optinfo_flags */
|
|
2021 TV_NONE, /* tv_id */
|
|
2022 PROP_ssa, /* properties_required */
|
|
2023 0, /* properties_provided */
|
|
2024 0, /* properties_destroyed */
|
|
2025 0, /* todo_flags_start */
|
|
2026 TODO_update_ssa, /* todo_flags_finish */
|
|
2027 };
|
|
2028
|
|
2029 class pass_omp_target_link : public gimple_opt_pass
|
|
2030 {
|
|
2031 public:
|
|
2032 pass_omp_target_link (gcc::context *ctxt)
|
|
2033 : gimple_opt_pass (pass_data_omp_target_link, ctxt)
|
|
2034 {}
|
|
2035
|
|
2036 /* opt_pass methods: */
|
|
2037 virtual bool gate (function *fun)
|
|
2038 {
|
|
2039 #ifdef ACCEL_COMPILER
|
131
|
2040 return offloading_function_p (fun->decl);
|
111
|
2041 #else
|
|
2042 (void) fun;
|
|
2043 return false;
|
|
2044 #endif
|
|
2045 }
|
|
2046
|
|
2047 virtual unsigned execute (function *);
|
|
2048 };
|
|
2049
|
|
2050 /* Callback for walk_gimple_stmt used to scan for link var operands. */
|
|
2051
|
|
2052 static tree
|
|
2053 find_link_var_op (tree *tp, int *walk_subtrees, void *)
|
|
2054 {
|
|
2055 tree t = *tp;
|
|
2056
|
|
2057 if (VAR_P (t)
|
|
2058 && DECL_HAS_VALUE_EXPR_P (t)
|
|
2059 && is_global_var (t)
|
|
2060 && lookup_attribute ("omp declare target link", DECL_ATTRIBUTES (t)))
|
|
2061 {
|
|
2062 *walk_subtrees = 0;
|
|
2063 return t;
|
|
2064 }
|
|
2065
|
|
2066 return NULL_TREE;
|
|
2067 }
|
|
2068
|
|
2069 unsigned
|
|
2070 pass_omp_target_link::execute (function *fun)
|
|
2071 {
|
|
2072 basic_block bb;
|
|
2073 FOR_EACH_BB_FN (bb, fun)
|
|
2074 {
|
|
2075 gimple_stmt_iterator gsi;
|
|
2076 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
2077 if (walk_gimple_stmt (&gsi, NULL, find_link_var_op, NULL))
|
|
2078 gimple_regimplify_operands (gsi_stmt (gsi), &gsi);
|
|
2079 }
|
|
2080
|
|
2081 return 0;
|
|
2082 }
|
|
2083
|
|
2084 } // anon namespace
|
|
2085
|
|
2086 gimple_opt_pass *
|
|
2087 make_pass_omp_target_link (gcc::context *ctxt)
|
|
2088 {
|
|
2089 return new pass_omp_target_link (ctxt);
|
|
2090 }
|