111
|
1 /* brig-function.cc -- declaration of brig_function class.
|
145
|
2 Copyright (C) 2016-2020 Free Software Foundation, Inc.
|
111
|
3 Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com>
|
|
4 for General Processor Tech.
|
|
5
|
|
6 This file is part of GCC.
|
|
7
|
|
8 GCC is free software; you can redistribute it and/or modify it under
|
|
9 the terms of the GNU General Public License as published by the Free
|
|
10 Software Foundation; either version 3, or (at your option) any later
|
|
11 version.
|
|
12
|
|
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
16 for more details.
|
|
17
|
|
18 You should have received a copy of the GNU General Public License
|
|
19 along with GCC; see the file COPYING3. If not see
|
|
20 <http://www.gnu.org/licenses/>. */
|
|
21
|
|
22 #include <sstream>
|
|
23 #include <iomanip>
|
|
24
|
|
25 #include "brig-function.h"
|
|
26 #include "stringpool.h"
|
|
27 #include "tree-iterator.h"
|
|
28 #include "toplev.h"
|
|
29 #include "gimplify.h"
|
|
30 #include "gimple-expr.h"
|
|
31 #include "print-tree.h"
|
|
32 #include "hsa-brig-format.h"
|
|
33 #include "stor-layout.h"
|
|
34 #include "diagnostic-core.h"
|
|
35 #include "brig-code-entry-handler.h"
|
|
36 #include "brig-machine.h"
|
|
37 #include "brig-util.h"
|
|
38 #include "phsa.h"
|
|
39 #include "tree-pretty-print.h"
|
|
40 #include "dumpfile.h"
|
|
41 #include "profile-count.h"
|
|
42 #include "tree-cfg.h"
|
|
43 #include "errors.h"
|
|
44 #include "function.h"
|
|
45 #include "brig-to-generic.h"
|
|
46 #include "brig-builtins.h"
|
131
|
47 #include "options.h"
|
|
48 #include "fold-const.h"
|
|
49 #include "target.h"
|
|
50 #include "builtins.h"
|
|
51
|
|
52 brig_function::builtin_map brig_function::s_custom_builtins;
|
111
|
53
|
|
54 brig_function::brig_function (const BrigDirectiveExecutable *exec,
|
|
55 brig_to_generic *parent)
|
|
56 : m_brig_def (exec), m_is_kernel (false), m_is_finished (false), m_name (""),
|
|
57 m_current_bind_expr (NULL_TREE), m_func_decl (NULL_TREE),
|
|
58 m_context_arg (NULL_TREE), m_group_base_arg (NULL_TREE),
|
|
59 m_private_base_arg (NULL_TREE), m_ret_value (NULL_TREE),
|
|
60 m_next_kernarg_offset (0), m_kernarg_max_align (0),
|
|
61 m_ret_value_brig_var (NULL), m_has_barriers (false), m_has_allocas (false),
|
|
62 m_has_function_calls_with_barriers (false), m_calls_analyzed (false),
|
|
63 m_is_wg_function (false), m_has_unexpanded_dp_builtins (false),
|
|
64 m_generating_arg_block (false), m_parent (parent)
|
|
65 {
|
|
66 memset (m_regs, 0,
|
|
67 BRIG_2_TREE_HSAIL_TOTAL_REG_COUNT * sizeof (BrigOperandRegister *));
|
|
68 memset (&m_descriptor, 0, sizeof (phsa_descriptor));
|
131
|
69
|
|
70 if (s_custom_builtins.size () > 0) return;
|
|
71
|
|
72 /* Populate the builtin index. */
|
|
73 #undef DEF_HSAIL_ATOMIC_BUILTIN
|
|
74 #undef DEF_HSAIL_CVT_ZEROI_SAT_BUILTIN
|
|
75 #undef DEF_HSAIL_INTR_BUILTIN
|
|
76 #undef DEF_HSAIL_SAT_BUILTIN
|
|
77 #undef DEF_HSAIL_BUILTIN
|
|
78 #define DEF_HSAIL_BUILTIN(ENUM, HSAIL_OPCODE, HSAIL_TYPE, NAME, TYPE, ATTRS) \
|
|
79 s_custom_builtins[std::make_pair (HSAIL_OPCODE, HSAIL_TYPE)] \
|
|
80 = builtin_decl_explicit (ENUM);
|
|
81
|
|
82 #include "brig-builtins.def"
|
111
|
83 }
|
|
84
|
|
85 brig_function::~brig_function ()
|
|
86 {
|
|
87 for (size_t i = 0; i < BRIG_2_TREE_HSAIL_TOTAL_REG_COUNT; ++i)
|
|
88 {
|
|
89 if (m_regs[i] != NULL)
|
|
90 {
|
|
91 delete m_regs[i];
|
|
92 m_regs[i] = NULL;
|
|
93 }
|
|
94 }
|
|
95 }
|
|
96
|
|
97 /* Returns a GENERIC label with the given name in the given function.
|
|
98 Creates it, if not yet found. */
|
|
99
|
|
100 tree
|
|
101 brig_function::label (const std::string &name)
|
|
102 {
|
|
103 label_index::const_iterator i = m_label_index.find (name);
|
|
104 if (i == m_label_index.end ())
|
|
105 {
|
|
106 tree name_identifier
|
|
107 = get_identifier_with_length (name.c_str (), name.size ());
|
|
108
|
|
109 tree label_decl = build_decl (UNKNOWN_LOCATION, LABEL_DECL,
|
|
110 name_identifier, void_type_node);
|
|
111
|
|
112 DECL_CONTEXT (label_decl) = m_func_decl;
|
|
113 DECL_ARTIFICIAL (label_decl) = 0;
|
|
114
|
|
115 m_label_index[name] = label_decl;
|
|
116 return label_decl;
|
|
117 }
|
|
118 else
|
|
119 return (*i).second;
|
|
120 }
|
|
121
|
|
122 /* Record an argument variable for later use. This includes both local
|
|
123 variables inside arg blocks and incoming function arguments. */
|
|
124
|
|
125 void
|
|
126 brig_function::add_arg_variable (const BrigDirectiveVariable *brigVar,
|
|
127 tree treeDecl)
|
|
128 {
|
|
129 m_arg_variables[brigVar] = treeDecl;
|
|
130 }
|
|
131
|
|
132 tree
|
|
133 brig_function::arg_variable (const BrigDirectiveVariable *var) const
|
|
134 {
|
|
135 variable_index::const_iterator i = m_arg_variables.find (var);
|
|
136 if (i == m_arg_variables.end ())
|
|
137 return NULL_TREE;
|
|
138 else
|
|
139 return (*i).second;
|
|
140 }
|
|
141
|
|
142 /* Appends a new kernel argument descriptor for the current kernel's
|
|
143 arg space. */
|
|
144
|
|
145 void
|
|
146 brig_function::append_kernel_arg (const BrigDirectiveVariable *var, size_t size,
|
|
147 size_t alignment)
|
|
148 {
|
|
149 gcc_assert (m_func_decl != NULL_TREE);
|
|
150 gcc_assert (m_is_kernel);
|
|
151
|
|
152 size_t align_padding = m_next_kernarg_offset % alignment == 0 ?
|
|
153 0 : (alignment - m_next_kernarg_offset % alignment);
|
|
154 m_next_kernarg_offset += align_padding;
|
|
155 m_kernarg_offsets[var] = m_next_kernarg_offset;
|
|
156 m_next_kernarg_offset += size;
|
|
157
|
|
158 m_kernarg_max_align
|
|
159 = m_kernarg_max_align < alignment ? alignment : m_kernarg_max_align;
|
|
160 }
|
|
161
|
|
162 size_t
|
|
163 brig_function::kernel_arg_offset (const BrigDirectiveVariable *var) const
|
|
164 {
|
|
165 var_offset_table::const_iterator i = m_kernarg_offsets.find (var);
|
|
166 gcc_assert (i != m_kernarg_offsets.end ());
|
|
167 return (*i).second;
|
|
168 }
|
|
169
|
|
170 /* Add work-item ID variables to the beginning of the kernel function
|
|
171 which can be used for address computation as kernel dispatch packet
|
|
172 instructions can be expanded to GENERIC nodes referring to them. */
|
|
173
|
|
174 void
|
|
175 brig_function::add_id_variables ()
|
|
176 {
|
|
177 tree bind_expr = m_current_bind_expr;
|
|
178 tree stmts = BIND_EXPR_BODY (bind_expr);
|
|
179
|
|
180 /* Initialize the WG limits and local ids. */
|
131
|
181 m_kernel_entry = tsi_start (stmts);
|
111
|
182
|
|
183 for (int i = 0; i < 3; ++i)
|
|
184 {
|
|
185 char dim_char = (char) ((int) 'x' + i);
|
|
186
|
|
187 /* The local sizes are limited to 16b values, but let's still use 32b
|
|
188 to avoid unnecessary casts (the ID functions are 32b). */
|
|
189 m_local_id_vars[i]
|
|
190 = add_local_variable (std::string ("__local_") + dim_char,
|
131
|
191 long_long_integer_type_node);
|
111
|
192
|
|
193 tree workitemid_call
|
|
194 = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_WORKITEMID), 2,
|
|
195 uint32_type_node, uint32_type_node,
|
|
196 build_int_cst (uint32_type_node, i), ptr_type_node,
|
|
197 m_context_arg);
|
|
198
|
|
199 tree id_init = build2 (MODIFY_EXPR, TREE_TYPE (m_local_id_vars[i]),
|
131
|
200 m_local_id_vars[i],
|
|
201 convert (TREE_TYPE (m_local_id_vars[i]),
|
|
202 workitemid_call));
|
111
|
203
|
131
|
204 append_statement (id_init);
|
111
|
205
|
|
206 m_cur_wg_size_vars[i]
|
|
207 = add_local_variable (std::string ("__cur_wg_size_") + dim_char,
|
131
|
208 long_long_integer_type_node);
|
111
|
209
|
131
|
210 tree cwgz_call;
|
|
211 if (flag_assume_phsa)
|
|
212 {
|
|
213 tree_stl_vec operands
|
|
214 = tree_stl_vec (1, build_int_cst (uint32_type_node, i));
|
|
215 cwgz_call
|
|
216 = expand_or_call_builtin (BRIG_OPCODE_CURRENTWORKGROUPSIZE,
|
|
217 BRIG_TYPE_U32, uint32_type_node,
|
|
218 operands);
|
|
219 }
|
|
220 else
|
|
221 cwgz_call = call_builtin
|
|
222 (builtin_decl_explicit (BUILT_IN_HSAIL_CURRENTWORKGROUPSIZE),
|
|
223 2, uint32_type_node, uint32_type_node,
|
|
224 build_int_cst (uint32_type_node, i), ptr_type_node, m_context_arg);
|
111
|
225
|
|
226 tree limit_init = build2 (MODIFY_EXPR, TREE_TYPE (m_cur_wg_size_vars[i]),
|
131
|
227 m_cur_wg_size_vars[i],
|
|
228 convert (TREE_TYPE (m_cur_wg_size_vars[i]),
|
|
229 cwgz_call));
|
111
|
230
|
131
|
231 append_statement (limit_init);
|
111
|
232
|
|
233 m_wg_id_vars[i]
|
|
234 = add_local_variable (std::string ("__workgroupid_") + dim_char,
|
|
235 uint32_type_node);
|
|
236
|
131
|
237 tree wgid_call;
|
|
238 if (flag_assume_phsa)
|
|
239 {
|
|
240 tree_stl_vec operands
|
|
241 = tree_stl_vec (1, build_int_cst (uint32_type_node, i));
|
|
242 wgid_call
|
|
243 = expand_or_call_builtin (BRIG_OPCODE_WORKGROUPID, BRIG_TYPE_U32,
|
|
244 uint32_type_node, operands);
|
|
245 }
|
|
246 else
|
|
247 wgid_call
|
|
248 = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_WORKGROUPID),
|
|
249 2, uint32_type_node, uint32_type_node,
|
|
250 build_int_cst (uint32_type_node, i), ptr_type_node,
|
|
251 m_context_arg);
|
111
|
252
|
|
253 tree wgid_init = build2 (MODIFY_EXPR, TREE_TYPE (m_wg_id_vars[i]),
|
|
254 m_wg_id_vars[i], wgid_call);
|
|
255
|
131
|
256 append_statement (wgid_init);
|
111
|
257
|
|
258 m_wg_size_vars[i]
|
|
259 = add_local_variable (std::string ("__workgroupsize_") + dim_char,
|
|
260 uint32_type_node);
|
|
261
|
131
|
262 tree wgsize_call;
|
|
263 if (flag_assume_phsa)
|
|
264 {
|
|
265 tree_stl_vec operands
|
|
266 = tree_stl_vec (1, build_int_cst (uint32_type_node, i));
|
|
267 wgsize_call
|
|
268 = expand_or_call_builtin (BRIG_OPCODE_WORKGROUPSIZE, BRIG_TYPE_U32,
|
|
269 uint32_type_node, operands);
|
|
270 }
|
|
271 else
|
|
272 wgsize_call
|
|
273 = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_WORKGROUPSIZE),
|
|
274 2, uint32_type_node, uint32_type_node,
|
|
275 build_int_cst (uint32_type_node, i), ptr_type_node,
|
|
276 m_context_arg);
|
111
|
277
|
|
278 tree wgsize_init = build2 (MODIFY_EXPR, TREE_TYPE (m_wg_size_vars[i]),
|
|
279 m_wg_size_vars[i], wgsize_call);
|
|
280
|
131
|
281 append_statement (wgsize_init);
|
111
|
282
|
|
283 m_grid_size_vars[i]
|
|
284 = add_local_variable (std::string ("__gridsize_") + dim_char,
|
|
285 uint32_type_node);
|
|
286
|
|
287 tree gridsize_call
|
|
288 = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_GRIDSIZE), 2,
|
|
289 uint32_type_node, uint32_type_node,
|
|
290 build_int_cst (uint32_type_node, i), ptr_type_node,
|
|
291 m_context_arg);
|
|
292
|
|
293 tree gridsize_init = build2 (MODIFY_EXPR, TREE_TYPE (m_grid_size_vars[i]),
|
|
294 m_grid_size_vars[i], gridsize_call);
|
|
295
|
131
|
296 append_statement (gridsize_init);
|
|
297
|
|
298 m_abs_id_base_vars[i]
|
|
299 = add_local_variable (std::string ("__abs_id_base_") + dim_char,
|
|
300 long_long_integer_type_node);
|
|
301
|
|
302 m_abs_id_vars[i]
|
|
303 = add_local_variable (std::string ("__abs_id_") + dim_char,
|
|
304 long_long_integer_type_node);
|
|
305
|
|
306 tree abs_id_base
|
|
307 = build2 (MULT_EXPR, long_long_integer_type_node,
|
|
308 convert (long_long_integer_type_node, m_wg_id_vars[i]),
|
|
309 convert (long_long_integer_type_node, m_wg_size_vars[i]));
|
|
310 tree abs_id
|
|
311 = build2 (PLUS_EXPR, long_long_integer_type_node, abs_id_base,
|
|
312 convert (long_long_integer_type_node, m_local_id_vars[i]));
|
|
313
|
|
314 tree abs_id_base_init
|
|
315 = build2 (MODIFY_EXPR, TREE_TYPE (m_abs_id_base_vars[i]),
|
|
316 m_abs_id_base_vars[i], abs_id_base);
|
|
317 append_statement (abs_id_base_init);
|
|
318
|
|
319 tree abs_id_init = build2 (MODIFY_EXPR,
|
|
320 TREE_TYPE (m_abs_id_vars[i]),
|
|
321 m_abs_id_vars[i], abs_id);
|
|
322 append_statement (abs_id_init);
|
111
|
323 }
|
|
324 }
|
|
325
|
|
326 /* Creates a new local variable with the given NAME and given GENERIC
|
|
327 TYPE. */
|
|
328
|
|
329 tree
|
|
330 brig_function::add_local_variable (std::string name, tree type)
|
|
331 {
|
|
332 tree name_identifier
|
|
333 = get_identifier_with_length (name.c_str (), name.size ());
|
|
334 tree variable
|
|
335 = build_decl (UNKNOWN_LOCATION, VAR_DECL, name_identifier, type);
|
|
336
|
|
337 DECL_NONLOCAL (variable) = 0;
|
|
338 TREE_ADDRESSABLE (variable) = 0;
|
|
339 TREE_STATIC (variable) = 0;
|
|
340 TREE_USED (variable) = 1;
|
|
341 DECL_ARTIFICIAL (variable) = 0;
|
|
342
|
|
343 tree bind_expr = DECL_SAVED_TREE (m_func_decl);
|
|
344
|
|
345 DECL_CONTEXT (variable) = m_func_decl;
|
|
346
|
|
347 DECL_CHAIN (variable) = BIND_EXPR_VARS (bind_expr);
|
|
348 BIND_EXPR_VARS (bind_expr) = variable;
|
|
349 return variable;
|
|
350 }
|
|
351
|
131
|
352 /* Return tree type for an HSA register.
|
|
353
|
|
354 The tree type can be anything (scalar, vector, int, float, etc.)
|
|
355 but its size is guaranteed to match the HSA register size.
|
|
356
|
|
357 HSA registers are untyped but we select a type based on their use
|
|
358 to reduce (sometimes unoptimizable) VIEW_CONVERT_EXPR nodes (seems
|
|
359 to occur when use or def reaches over current BB). */
|
|
360
|
|
361 tree
|
|
362 brig_function::get_tree_type_for_hsa_reg (const BrigOperandRegister *reg) const
|
|
363 {
|
|
364 size_t reg_size = gccbrig_reg_size (reg);
|
|
365
|
|
366 /* The default type. */
|
|
367 tree type = build_nonstandard_integer_type (reg_size, true);
|
|
368
|
|
369 if (m_parent->m_fn_regs_use_index.count (m_name) == 0)
|
|
370 return type;
|
|
371
|
|
372 const regs_use_index &index = m_parent->m_fn_regs_use_index[m_name];
|
|
373 size_t reg_id = gccbrig_hsa_reg_id (*reg);
|
|
374 if (index.count (reg_id) == 0)
|
|
375 return type;
|
|
376
|
|
377 const reg_use_info &info = index.find (reg_id)->second;
|
|
378 std::vector<std::pair<tree, size_t> >::const_iterator it
|
|
379 = info.m_type_refs.begin ();
|
|
380 std::vector<std::pair<tree, size_t> >::const_iterator it_end
|
|
381 = info.m_type_refs.end ();
|
|
382 size_t max_refs_as_type_count = 0;
|
|
383 for (; it != it_end; it++)
|
|
384 {
|
|
385 size_t type_bit_size = int_size_in_bytes (it->first) * BITS_PER_UNIT;
|
|
386 if (type_bit_size != reg_size) continue;
|
|
387 if (it->second > max_refs_as_type_count)
|
|
388 {
|
|
389 type = it->first;
|
|
390 max_refs_as_type_count = it->second;
|
|
391 }
|
|
392 }
|
|
393
|
|
394 return type;
|
|
395 }
|
|
396
|
111
|
397 /* Returns a DECL_VAR for the given HSAIL operand register.
|
|
398 If it has not been created yet for the function being generated,
|
131
|
399 creates it as a type determined by analysis phase. */
|
111
|
400
|
|
401 tree
|
|
402 brig_function::get_m_var_declfor_reg (const BrigOperandRegister *reg)
|
|
403 {
|
131
|
404 size_t offset = gccbrig_hsa_reg_id (*reg);
|
111
|
405
|
|
406 reg_decl_index_entry *regEntry = m_regs[offset];
|
|
407 if (regEntry == NULL)
|
|
408 {
|
|
409 size_t reg_size = gccbrig_reg_size (reg);
|
|
410 tree type;
|
|
411 if (reg_size > 1)
|
131
|
412 type = get_tree_type_for_hsa_reg (reg);
|
111
|
413 else
|
|
414 type = boolean_type_node;
|
|
415
|
|
416 /* Drop the const qualifier so we do not end up with a read only
|
|
417 register variable which cannot be written to later. */
|
|
418 tree nonconst_type = build_type_variant (type, false, false);
|
|
419
|
|
420 regEntry = new reg_decl_index_entry;
|
|
421
|
|
422 regEntry->m_var_decl
|
|
423 = add_local_variable (gccbrig_reg_name (reg), nonconst_type);
|
|
424 m_regs[offset] = regEntry;
|
|
425 }
|
|
426 return regEntry->m_var_decl;
|
|
427 }
|
|
428
|
|
429 /* Builds a work-item do..while loop for a single DIM. HEADER_ENTRY is
|
|
430 a statement after which the iteration variables should be initialized and
|
|
431 the loop body starts. BRANCH_AFTER is the statement after which the loop
|
|
432 predicate check and the back edge goto will be appended. */
|
|
433
|
|
434 void
|
|
435 brig_function::add_wi_loop (int dim, tree_stmt_iterator *header_entry,
|
|
436 tree_stmt_iterator *branch_after)
|
|
437 {
|
|
438 tree ivar = m_local_id_vars[dim];
|
131
|
439 tree abs_id_base_var = m_abs_id_base_vars[dim];
|
|
440 tree abs_id_var = m_abs_id_vars[dim];
|
111
|
441 tree ivar_max = m_cur_wg_size_vars[dim];
|
|
442 tree_stmt_iterator entry = *header_entry;
|
|
443
|
|
444 /* TODO: this is not a parallel loop as we share the "register variables"
|
|
445 across work-items. Should create a copy of them per WI instance. That
|
|
446 is, declare temporaries for new definitions inside the loop body, not at
|
|
447 function scope. */
|
|
448
|
|
449 tree ivar_init = build2 (MODIFY_EXPR, TREE_TYPE (ivar), ivar,
|
|
450 build_zero_cst (TREE_TYPE (ivar)));
|
|
451 tsi_link_after (&entry, ivar_init, TSI_NEW_STMT);
|
|
452
|
131
|
453 tree abs_id_var_init = build2 (MODIFY_EXPR, TREE_TYPE (abs_id_var),
|
|
454 abs_id_var,
|
|
455 convert (TREE_TYPE (abs_id_var),
|
|
456 abs_id_base_var));
|
|
457 tsi_link_after (&entry, abs_id_var_init, TSI_NEW_STMT);
|
|
458
|
111
|
459 tree loop_body_label
|
|
460 = label (std::string ("__wi_loop_") + (char) ((int) 'x' + dim));
|
|
461 tree loop_body_label_stmt = build_stmt (LABEL_EXPR, loop_body_label);
|
|
462
|
|
463 tsi_link_after (&entry, loop_body_label_stmt, TSI_NEW_STMT);
|
|
464
|
|
465 if (m_has_unexpanded_dp_builtins)
|
|
466 {
|
131
|
467 if (!flag_assume_phsa)
|
|
468 {
|
|
469 tree id_set_builtin
|
|
470 = builtin_decl_explicit (BUILT_IN_HSAIL_SETWORKITEMID);
|
|
471 /* Set the local ID to the current wi-loop iteration variable value
|
|
472 to ensure the builtins see the correct values. */
|
|
473 tree id_set_call
|
|
474 = call_builtin (id_set_builtin, 3,
|
|
475 void_type_node, uint32_type_node,
|
|
476 build_int_cst (uint32_type_node, dim),
|
|
477 uint32_type_node, convert (uint32_type_node, ivar),
|
|
478 ptr_type_node, m_context_arg);
|
|
479 tsi_link_after (&entry, id_set_call, TSI_NEW_STMT);
|
|
480 }
|
|
481 else
|
|
482 {
|
|
483 tree ptr_type = build_pointer_type (uint32_type_node);
|
|
484 tree ctx = build2 (MEM_REF, uint32_type_node, m_context_arg,
|
|
485 build_int_cst (ptr_type, dim * 4));
|
|
486 tree assign = build2 (MODIFY_EXPR, uint32_type_node, ctx,
|
|
487 convert (uint32_type_node, ivar));
|
|
488
|
|
489 tsi_link_after (&entry, assign, TSI_NEW_STMT);
|
|
490 }
|
111
|
491 }
|
|
492
|
|
493 /* Increment the WI iteration variable. */
|
|
494 tree incr = build2 (PREINCREMENT_EXPR, TREE_TYPE (ivar), ivar,
|
|
495 build_one_cst (TREE_TYPE (ivar)));
|
|
496
|
|
497 tsi_link_after (branch_after, incr, TSI_NEW_STMT);
|
|
498
|
131
|
499 /* ...and the abs id variable. */
|
|
500 tree abs_id_incr = build2 (PREINCREMENT_EXPR, TREE_TYPE (abs_id_var),
|
|
501 abs_id_var,
|
|
502 build_one_cst (TREE_TYPE (abs_id_var)));
|
|
503
|
|
504 tsi_link_after (branch_after, abs_id_incr, TSI_NEW_STMT);
|
|
505
|
111
|
506 /* Append the predicate check with the back edge goto. */
|
|
507 tree condition = build2 (LT_EXPR, TREE_TYPE (ivar), ivar, ivar_max);
|
|
508 tree target_goto = build1 (GOTO_EXPR, void_type_node, loop_body_label);
|
|
509 tree if_stmt
|
|
510 = build3 (COND_EXPR, void_type_node, condition, target_goto, NULL_TREE);
|
|
511 tsi_link_after (branch_after, if_stmt, TSI_NEW_STMT);
|
|
512 }
|
|
513
|
|
514 /* Recursively analyzes the function and its callees for barrier usage. */
|
|
515
|
|
516 void
|
|
517 brig_function::analyze_calls ()
|
|
518 {
|
|
519 if (m_calls_analyzed)
|
|
520 return;
|
|
521
|
|
522 /* Set this early to not get stuck in case of recursive call graphs.
|
|
523 This is safe because if the function calls itself, either the function
|
|
524 has barrier calls which implies a call to a function with barrier calls,
|
|
525 or it doesn't in which case the result depends on the later called
|
|
526 functions. */
|
|
527 m_calls_analyzed = true;
|
|
528
|
|
529 for (size_t i = 0; i < m_called_functions.size (); ++i)
|
|
530 {
|
|
531 tree f = m_called_functions[i];
|
|
532 brig_function *called_f = m_parent->get_finished_function (f);
|
|
533 if (called_f == NULL)
|
|
534 {
|
|
535 /* Unfinished function (only declaration within the set of BRIGs)
|
|
536 found. Cannot finish the CG analysis. Have to assume it does have
|
|
537 a barrier for safety. */
|
|
538 m_has_function_calls_with_barriers = true;
|
|
539 m_has_unexpanded_dp_builtins = true;
|
|
540 break;
|
|
541 }
|
|
542 called_f->analyze_calls ();
|
|
543 /* We can assume m_has_barriers has been correctly set during the
|
|
544 construction of the function decl. No need to reanalyze it. */
|
|
545 m_has_function_calls_with_barriers |= called_f->m_has_barriers;
|
|
546
|
|
547 /* If the function or any of its called functions has dispatch
|
|
548 packet builtin calls that require the local id, we need to
|
|
549 set the local id to the context in the work item loop before
|
|
550 the functions are called. If we analyze the opposite, these
|
|
551 function calls can be omitted. */
|
|
552 m_has_unexpanded_dp_builtins |= called_f->m_has_unexpanded_dp_builtins;
|
|
553 }
|
|
554 }
|
|
555
|
|
556 /* Tries to convert the current kernel to a work-group function that executes
|
|
557 all work-items using loops. Returns true in case the conversion was
|
|
558 successful. */
|
|
559
|
|
560 bool
|
|
561 brig_function::convert_to_wg_function ()
|
|
562 {
|
|
563 if (!m_calls_analyzed)
|
|
564 analyze_calls ();
|
|
565
|
|
566 if (m_has_barriers || m_has_function_calls_with_barriers)
|
|
567 return false;
|
|
568
|
|
569 /* The most trivial case: No barriers at all in the kernel.
|
|
570 We can create one big work-item loop around the whole kernel. */
|
|
571 tree bind_expr = m_current_bind_expr;
|
|
572 tree stmts = BIND_EXPR_BODY (bind_expr);
|
|
573
|
|
574 for (int i = 0; i < 3; ++i)
|
|
575 {
|
|
576 /* The previous loop has added a new label to the end of the function,
|
|
577 the next level loop should wrap around it also. */
|
|
578 tree_stmt_iterator function_exit = tsi_last (stmts);
|
|
579 add_wi_loop (i, &m_kernel_entry, &function_exit);
|
|
580 }
|
|
581
|
|
582 m_is_wg_function = true;
|
|
583 return false;
|
|
584 }
|
|
585
|
|
586 /* Emits a kernel description to a special ELF section so it can be
|
|
587 utilized by an HSA runtime implementation. The assembly block
|
|
588 must be emitted to a statement list of an function, which is given
|
|
589 as an argument. Returns the assembly block used to emit the section. */
|
|
590
|
|
591 tree
|
|
592 brig_function::emit_metadata (tree stmt_list)
|
|
593 {
|
|
594 /* Emit an ELF section via an assembly directive that generates a special
|
|
595 ELF section for each kernel that contains raw bytes of a descriptor
|
|
596 object. This is pretty disgusting, but life is never perfect ;) */
|
|
597
|
|
598 /* Use the original kernel name without the '_' prefix in the section name. */
|
|
599 std::string kern_name = m_is_kernel ? m_name.substr (1) : m_name;
|
|
600
|
|
601 std::ostringstream strstr;
|
|
602 strstr << std::endl
|
|
603 << ".pushsection " << PHSA_DESC_SECTION_PREFIX << kern_name
|
|
604 << std::endl
|
|
605 << "\t.p2align 1, 1, 1" << std::endl
|
|
606 << "\t.byte ";
|
|
607
|
|
608 for (size_t i = 0; i < sizeof (phsa_descriptor); ++i)
|
|
609 {
|
|
610 strstr << "0x" << std::setw (2) << std::setfill ('0') << std::hex
|
|
611 << (unsigned) *((unsigned char *) &m_descriptor + i);
|
|
612 if (i + 1 < sizeof (phsa_descriptor))
|
|
613 strstr << ", ";
|
|
614 }
|
|
615
|
|
616 strstr << std::endl << ".popsection" << std::endl << std::endl;
|
|
617
|
|
618 tree metadata_asm
|
|
619 = build_stmt (ASM_EXPR,
|
|
620 build_string (strstr.str ().size (), strstr.str ().c_str ()),
|
|
621 NULL_TREE, NULL_TREE, NULL_TREE, NULL_TREE);
|
|
622
|
|
623 append_to_statement_list_force (metadata_asm, &stmt_list);
|
|
624 return metadata_asm;
|
|
625 }
|
|
626
|
|
627 /* Emits the kernel launcher function. Also emits the metadata section
|
|
628 creation statements in it.
|
|
629
|
|
630 The launcher function calls the device-side runtime
|
|
631 that runs the kernel for all work-items. In C:
|
|
632
|
|
633 void KernelName (void* context, void* group_base_addr)
|
|
634 {
|
|
635 __hsail_launch_kernel (_KernelName, context, group_base_addr);
|
|
636 }
|
|
637
|
|
638 or, in case of a successful conversion to a work-group function:
|
|
639
|
|
640 void KernelName (void* context, void* group_base_addr)
|
|
641 {
|
|
642 __hsail_launch_wg_function (_KernelName, context, group_base_addr);
|
|
643 }
|
|
644
|
|
645 The user/host sees this function as the kernel to call from the
|
|
646 outside. The actual kernel generated from HSAIL was named _KernelName.
|
|
647 */
|
|
648
|
|
649 tree
|
|
650 brig_function::emit_launcher_and_metadata ()
|
|
651 {
|
|
652 /* The original kernel name without the '_' prefix. */
|
|
653 std::string kern_name = m_name.substr (1);
|
|
654
|
|
655 tree name_identifier
|
|
656 = get_identifier_with_length (kern_name.c_str (), kern_name.size ());
|
|
657
|
131
|
658 tree restrict_void_ptr
|
|
659 = build_qualified_type (build_pointer_type (void_type_node),
|
|
660 TYPE_QUAL_RESTRICT);
|
|
661 tree restrict_char_ptr
|
|
662 = build_qualified_type (build_pointer_type (char_type_node),
|
|
663 TYPE_QUAL_RESTRICT);
|
111
|
664 tree launcher
|
|
665 = build_decl (UNKNOWN_LOCATION, FUNCTION_DECL, name_identifier,
|
131
|
666 build_function_type_list (void_type_node, restrict_void_ptr,
|
|
667 restrict_char_ptr, NULL_TREE));
|
111
|
668
|
|
669 TREE_USED (launcher) = 1;
|
|
670 DECL_ARTIFICIAL (launcher) = 1;
|
|
671
|
|
672 tree context_arg = build_decl (UNKNOWN_LOCATION, PARM_DECL,
|
131
|
673 get_identifier ("__context"),
|
|
674 restrict_void_ptr);
|
111
|
675
|
|
676 DECL_ARGUMENTS (launcher) = context_arg;
|
131
|
677 DECL_ARG_TYPE (context_arg) = restrict_void_ptr;
|
111
|
678 DECL_CONTEXT (context_arg) = launcher;
|
|
679 TREE_USED (context_arg) = 1;
|
|
680 DECL_ARTIFICIAL (context_arg) = 1;
|
|
681
|
|
682 tree group_base_addr_arg
|
|
683 = build_decl (UNKNOWN_LOCATION, PARM_DECL,
|
131
|
684 get_identifier ("__group_base_addr"), restrict_char_ptr);
|
111
|
685
|
|
686 chainon (DECL_ARGUMENTS (launcher), group_base_addr_arg);
|
131
|
687 DECL_ARG_TYPE (group_base_addr_arg) = restrict_char_ptr;
|
111
|
688 DECL_CONTEXT (group_base_addr_arg) = launcher;
|
|
689 TREE_USED (group_base_addr_arg) = 1;
|
|
690 DECL_ARTIFICIAL (group_base_addr_arg) = 1;
|
|
691
|
|
692 tree resdecl
|
|
693 = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, void_type_node);
|
|
694
|
|
695 DECL_RESULT (launcher) = resdecl;
|
|
696 DECL_CONTEXT (resdecl) = launcher;
|
|
697
|
|
698 DECL_INITIAL (launcher) = make_node (BLOCK);
|
|
699 TREE_USED (DECL_INITIAL (launcher)) = 1;
|
|
700
|
|
701 tree stmt_list = alloc_stmt_list ();
|
|
702
|
|
703 tree bind_expr = build3 (BIND_EXPR, void_type_node, NULL, stmt_list, NULL);
|
|
704
|
131
|
705 TREE_STATIC (launcher) = 1;
|
111
|
706 TREE_PUBLIC (launcher) = 1;
|
|
707
|
|
708 DECL_SAVED_TREE (launcher) = bind_expr;
|
|
709
|
|
710 if (DECL_STRUCT_FUNCTION (launcher) == NULL)
|
|
711 push_struct_function (launcher);
|
|
712 else
|
|
713 push_cfun (DECL_STRUCT_FUNCTION (launcher));
|
|
714
|
|
715 tree kernel_func_ptr = build1 (ADDR_EXPR, ptr_type_node, m_func_decl);
|
|
716
|
|
717 tree phsail_launch_kernel_call;
|
|
718
|
|
719 /* Compute the local group segment frame start pointer. */
|
|
720 tree group_local_offset_temp
|
|
721 = create_tmp_var (uint32_type_node, "group_local_offset");
|
|
722 tree group_local_offset_arg
|
|
723 = build2 (MODIFY_EXPR, uint32_type_node,
|
|
724 group_local_offset_temp,
|
|
725 build_int_cst (uint32_type_node,
|
|
726 m_parent->m_module_group_variables.size()));
|
|
727
|
|
728 /* Emit a launcher depending whether we converted the kernel function to
|
|
729 a work group function or not. */
|
|
730 if (m_is_wg_function)
|
|
731 phsail_launch_kernel_call
|
|
732 = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_LAUNCH_WG_FUNC),
|
|
733 4, void_type_node,
|
131
|
734 ptr_type_node, kernel_func_ptr, restrict_void_ptr,
|
|
735 context_arg, restrict_char_ptr, group_base_addr_arg,
|
111
|
736 uint32_type_node, group_local_offset_arg);
|
|
737 else
|
|
738 phsail_launch_kernel_call
|
|
739 = call_builtin (builtin_decl_explicit (BUILT_IN_HSAIL_LAUNCH_KERNEL),
|
|
740 4, void_type_node,
|
131
|
741 ptr_type_node, kernel_func_ptr, restrict_void_ptr,
|
|
742 context_arg, restrict_char_ptr, group_base_addr_arg,
|
111
|
743 uint32_type_node, group_local_offset_arg);
|
|
744
|
|
745 append_to_statement_list_force (phsail_launch_kernel_call, &stmt_list);
|
|
746
|
|
747 emit_metadata (stmt_list);
|
|
748
|
131
|
749 set_externally_visible (launcher);
|
|
750
|
111
|
751 return launcher;
|
|
752 }
|
|
753
|
|
754 tree
|
|
755 brig_function::append_statement (tree stmt)
|
|
756 {
|
|
757 gcc_assert (m_func_decl != NULL);
|
|
758
|
|
759 tree bind_expr = m_current_bind_expr;
|
|
760 tree stmts = BIND_EXPR_BODY (bind_expr);
|
|
761
|
|
762 append_to_statement_list_force (stmt, &stmts);
|
|
763 return stmt;
|
|
764 }
|
|
765
|
|
766 /* Creates a new "alloca frame" for the current function by
|
|
767 injecting an alloca frame push in the beginning of the function
|
|
768 and an alloca frame pop before all function exit points. */
|
|
769
|
|
770 void
|
|
771 brig_function::create_alloca_frame ()
|
|
772 {
|
|
773 tree_stmt_iterator entry;
|
|
774
|
|
775 /* Adds the alloca push only after the ids have been initialized
|
|
776 in case of a kernel function. */
|
|
777 if (m_is_kernel)
|
|
778 entry = m_kernel_entry;
|
|
779 else
|
|
780 {
|
|
781 tree bind_expr = m_current_bind_expr;
|
|
782 tree stmts = BIND_EXPR_BODY (bind_expr);
|
|
783 entry = tsi_start (stmts);
|
|
784 }
|
|
785
|
|
786 tree push_frame_builtin = builtin_decl_explicit (BUILT_IN_HSAIL_PUSH_FRAME);
|
|
787 tree push_frame_call
|
|
788 = call_builtin (push_frame_builtin, 1, void_type_node, ptr_type_node,
|
|
789 m_context_arg);
|
|
790
|
|
791 tsi_link_before (&entry, push_frame_call, TSI_NEW_STMT);
|
|
792
|
|
793 tree pop_frame_builtin = builtin_decl_explicit (BUILT_IN_HSAIL_POP_FRAME);
|
|
794
|
|
795 do
|
|
796 {
|
|
797 tree stmt = tsi_stmt (entry);
|
|
798 if (TREE_CODE (stmt) == RETURN_EXPR)
|
|
799 {
|
|
800 tree pop_frame_call
|
|
801 = call_builtin (pop_frame_builtin, 1, void_type_node,
|
|
802 ptr_type_node, m_context_arg);
|
|
803
|
|
804 tsi_link_before (&entry, pop_frame_call, TSI_SAME_STMT);
|
|
805 }
|
|
806 tsi_next (&entry);
|
|
807 }
|
|
808 while (!tsi_end_p (entry));
|
|
809 }
|
|
810
|
|
811 /* Finishes the currently built function. After calling this, no new
|
|
812 statements should be appeneded to the function. */
|
|
813 void
|
|
814 brig_function::finish ()
|
|
815 {
|
|
816 append_return_stmt ();
|
|
817
|
|
818 /* Currently assume single alloca frame per WG. */
|
|
819 if (m_has_allocas)
|
|
820 create_alloca_frame ();
|
|
821 }
|
|
822
|
|
823 void
|
|
824 brig_function::finish_kernel ()
|
|
825 {
|
|
826 /* Kernel functions should have a single exit point.
|
|
827 Let's create one. The return instructions should have
|
|
828 been converted to branches to this label. */
|
|
829 append_statement (build_stmt (LABEL_EXPR, m_exit_label));
|
|
830 /* Attempt to convert the kernel to a work-group function that
|
|
831 executes all work-items of the WG using a loop. */
|
|
832 convert_to_wg_function ();
|
|
833
|
|
834 append_return_stmt ();
|
|
835
|
|
836 /* Currently assume single alloca frame per WG. */
|
|
837 if (m_has_allocas)
|
|
838 create_alloca_frame ();
|
|
839 }
|
|
840
|
|
841 void
|
|
842 brig_function::append_return_stmt ()
|
|
843 {
|
|
844 gcc_assert (m_current_bind_expr != NULL_TREE);
|
|
845 tree stmts = BIND_EXPR_BODY (m_current_bind_expr);
|
|
846
|
|
847 if (STATEMENT_LIST_TAIL (stmts) == NULL)
|
|
848 return; /* Empty function. */
|
|
849
|
|
850 tree last_stmt = tsi_stmt (tsi_last (stmts));
|
|
851
|
|
852 if (TREE_CODE (last_stmt) == RETURN_EXPR)
|
|
853 return;
|
|
854
|
|
855 if (m_ret_value != NULL_TREE)
|
|
856 {
|
|
857 tree result_assign
|
|
858 = build2 (MODIFY_EXPR, TREE_TYPE (m_ret_value), m_ret_value,
|
|
859 m_ret_temp);
|
|
860
|
|
861 tree return_expr
|
|
862 = build1 (RETURN_EXPR, TREE_TYPE (result_assign), result_assign);
|
|
863 append_to_statement_list_force (return_expr, &stmts);
|
|
864 }
|
|
865 else
|
|
866 {
|
|
867 tree return_stmt = build_stmt (RETURN_EXPR, NULL);
|
|
868 append_to_statement_list_force (return_stmt, &stmts);
|
|
869 }
|
|
870 }
|
|
871
|
|
872 bool
|
|
873 brig_function::has_function_scope_var (const BrigBase* var) const
|
|
874 {
|
|
875 return m_function_scope_vars.find (var) != m_function_scope_vars.end ();
|
|
876 }
|
|
877
|
|
878 size_t
|
|
879 brig_function::group_variable_segment_offset (const std::string &name) const
|
|
880 {
|
|
881 if (m_local_group_variables.has_variable (name))
|
|
882 return m_local_group_variables.segment_offset (name);
|
|
883
|
|
884 gcc_assert (m_parent->m_module_group_variables.has_variable (name));
|
|
885 return m_parent->m_module_group_variables.segment_offset (name);
|
|
886 }
|
131
|
887
|
|
888 /* Try to expand the given builtin call to reuse a previously generated
|
|
889 variable, if possible. If not, just call the given builtin.
|
|
890 BRIG_OPCODE and BRIG_TYPE identify the builtin's BRIG opcode/type,
|
|
891 ARITH_TYPE its GENERIC type, and OPERANDS contains the builtin's
|
|
892 input operands. */
|
|
893
|
|
894 tree
|
|
895 brig_function::expand_or_call_builtin (BrigOpcode16_t brig_opcode,
|
|
896 BrigType16_t brig_type,
|
|
897 tree arith_type,
|
|
898 tree_stl_vec &operands)
|
|
899 {
|
|
900 if (needs_workitem_context_data (brig_opcode))
|
|
901 m_has_unexpanded_dp_builtins = true;
|
|
902
|
|
903 if (can_expand_builtin (brig_opcode))
|
|
904 return expand_builtin (brig_opcode, operands);
|
|
905
|
|
906 tree built_in
|
|
907 = get_builtin_for_hsa_opcode (arith_type, brig_opcode, brig_type);
|
|
908
|
|
909 if (!VECTOR_TYPE_P (TREE_TYPE (TREE_TYPE (built_in)))
|
|
910 && arith_type != NULL_TREE && VECTOR_TYPE_P (arith_type)
|
|
911 && brig_opcode != BRIG_OPCODE_LERP
|
|
912 && brig_opcode != BRIG_OPCODE_PACKCVT
|
|
913 && brig_opcode != BRIG_OPCODE_SAD
|
|
914 && brig_opcode != BRIG_OPCODE_SADHI)
|
|
915 {
|
|
916 /* Call the scalar built-in for all elements in the vector. */
|
|
917 tree_stl_vec operand0_elements;
|
|
918 if (operands.size () > 0)
|
|
919 unpack (operands[0], operand0_elements);
|
|
920
|
|
921 tree_stl_vec operand1_elements;
|
|
922 if (operands.size () > 1)
|
|
923 unpack (operands[1], operand1_elements);
|
|
924
|
|
925 tree_stl_vec result_elements;
|
|
926
|
|
927 size_t element_count = gccbrig_type_vector_subparts (arith_type);
|
|
928 for (size_t i = 0; i < element_count; ++i)
|
|
929 {
|
|
930 tree_stl_vec call_operands;
|
|
931 if (operand0_elements.size () > 0)
|
|
932 call_operands.push_back (operand0_elements.at (i));
|
|
933
|
|
934 if (operand1_elements.size () > 0)
|
|
935 call_operands.push_back (operand1_elements.at (i));
|
|
936
|
|
937 result_elements.push_back
|
|
938 (expand_or_call_builtin (brig_opcode, brig_type,
|
|
939 TREE_TYPE (arith_type),
|
|
940 call_operands));
|
|
941 }
|
|
942 return pack (result_elements);
|
|
943 }
|
|
944
|
|
945 tree_stl_vec call_operands;
|
|
946 tree_stl_vec operand_types;
|
|
947
|
|
948 tree arg_type_chain = TYPE_ARG_TYPES (TREE_TYPE (built_in));
|
|
949
|
|
950 for (size_t i = 0; i < operands.size (); ++i)
|
|
951 {
|
|
952 tree operand_type = TREE_VALUE (arg_type_chain);
|
|
953 call_operands.push_back (convert (operand_type, operands[i]));
|
|
954 operand_types.push_back (operand_type);
|
|
955 arg_type_chain = TREE_CHAIN (arg_type_chain);
|
|
956 }
|
|
957
|
|
958 if (needs_workitem_context_data (brig_opcode))
|
|
959 {
|
|
960 call_operands.push_back (m_context_arg);
|
|
961 operand_types.push_back (ptr_type_node);
|
|
962 }
|
|
963
|
|
964 size_t operand_count = call_operands.size ();
|
|
965
|
|
966 call_operands.resize (4, NULL_TREE);
|
|
967 operand_types.resize (4, NULL_TREE);
|
|
968 for (size_t i = 0; i < operand_count; ++i)
|
|
969 call_operands.at (i) = build_resize_convert_view (operand_types.at (i),
|
|
970 call_operands.at (i));
|
|
971
|
|
972 tree fnptr = build_fold_addr_expr (built_in);
|
|
973 return build_call_array (TREE_TYPE (TREE_TYPE (built_in)), fnptr,
|
|
974 operand_count, &call_operands[0]);
|
|
975 }
|
|
976
|
|
977 /* Instead of calling a built-in function, use a more efficient mechanism
|
|
978 such as reuse a previously returned value known to be still valid, or
|
|
979 access the work-item context struct directly. This is beneficial especially
|
|
980 for the work-item identification related builtins as not having them as
|
|
981 unanalyzable black box calls can lead to more easily vectorizable parallel
|
|
982 loops for multi work-item work-groups. BRIG_OPCODE identifies the builtin
|
|
983 and OPERANDS store the operands. */
|
|
984
|
|
985 tree
|
|
986 brig_function::expand_builtin (BrigOpcode16_t brig_opcode,
|
|
987 tree_stl_vec &operands)
|
|
988 {
|
|
989 tree_stl_vec uint32_0 = tree_stl_vec (1, build_int_cst (uint32_type_node, 0));
|
|
990
|
|
991 tree_stl_vec uint32_1 = tree_stl_vec (1, build_int_cst (uint32_type_node, 1));
|
|
992
|
|
993 tree_stl_vec uint32_2 = tree_stl_vec (1, build_int_cst (uint32_type_node, 2));
|
|
994
|
|
995 if (brig_opcode == BRIG_OPCODE_WORKITEMFLATABSID)
|
|
996 {
|
|
997 tree id0 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_0);
|
|
998 id0 = convert (uint64_type_node, id0);
|
|
999
|
|
1000 tree id1 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_1);
|
|
1001 id1 = convert (uint64_type_node, id1);
|
|
1002
|
|
1003 tree id2 = expand_builtin (BRIG_OPCODE_WORKITEMABSID, uint32_2);
|
|
1004 id2 = convert (uint64_type_node, id2);
|
|
1005
|
|
1006 tree max0 = convert (uint64_type_node, m_grid_size_vars[0]);
|
|
1007 tree max1 = convert (uint64_type_node, m_grid_size_vars[1]);
|
|
1008
|
|
1009 tree id2_x_max0_x_max1 = build2 (MULT_EXPR, uint64_type_node, id2, max0);
|
|
1010 id2_x_max0_x_max1
|
|
1011 = build2 (MULT_EXPR, uint64_type_node, id2_x_max0_x_max1, max1);
|
|
1012
|
|
1013 tree id1_x_max0 = build2 (MULT_EXPR, uint64_type_node, id1, max0);
|
|
1014
|
|
1015 tree sum = build2 (PLUS_EXPR, uint64_type_node, id0, id1_x_max0);
|
|
1016 sum = build2 (PLUS_EXPR, uint64_type_node, sum, id2_x_max0_x_max1);
|
|
1017
|
|
1018 return add_temp_var ("workitemflatabsid", sum);
|
|
1019 }
|
|
1020 else if (brig_opcode == BRIG_OPCODE_WORKITEMABSID)
|
|
1021 {
|
|
1022 HOST_WIDE_INT dim = int_constant_value (operands[0]);
|
|
1023 return m_abs_id_vars[dim];
|
|
1024 }
|
|
1025 else if (brig_opcode == BRIG_OPCODE_WORKITEMFLATID)
|
|
1026 {
|
|
1027
|
|
1028 tree wg_size_x = expand_builtin (BRIG_OPCODE_WORKGROUPSIZE, uint32_0);
|
|
1029 tree wg_size_y = expand_builtin (BRIG_OPCODE_WORKGROUPSIZE, uint32_1);
|
|
1030 tree z_x_wgsx_wgsy
|
|
1031 = build2 (MULT_EXPR, uint32_type_node,
|
|
1032 convert (uint32_type_node,
|
|
1033 expand_builtin (BRIG_OPCODE_WORKITEMID, uint32_2)),
|
|
1034 wg_size_x);
|
|
1035 z_x_wgsx_wgsy = build2 (MULT_EXPR, uint32_type_node, z_x_wgsx_wgsy,
|
|
1036 wg_size_y);
|
|
1037
|
|
1038 tree y_x_wgsx
|
|
1039 = build2 (MULT_EXPR, uint32_type_node,
|
|
1040 convert (uint32_type_node,
|
|
1041 expand_builtin (BRIG_OPCODE_WORKITEMID, uint32_1)),
|
|
1042 wg_size_x);
|
|
1043
|
|
1044 tree sum = build2 (PLUS_EXPR, uint32_type_node, y_x_wgsx, z_x_wgsx_wgsy);
|
|
1045 sum = build2 (PLUS_EXPR, uint32_type_node,
|
|
1046 convert (uint32_type_node,
|
|
1047 expand_builtin (BRIG_OPCODE_WORKITEMID, uint32_0)),
|
|
1048 sum);
|
|
1049 return add_temp_var ("workitemflatid", sum);
|
|
1050 }
|
|
1051 else if (brig_opcode == BRIG_OPCODE_WORKGROUPSIZE)
|
|
1052 {
|
|
1053 HOST_WIDE_INT dim = int_constant_value (operands[0]);
|
|
1054 if (flag_assume_phsa)
|
|
1055 {
|
|
1056 tree ptr_type = build_pointer_type (uint32_type_node);
|
|
1057 tree ctx = build2 (MEM_REF, uint32_type_node, m_context_arg,
|
|
1058 build_int_cst (ptr_type,
|
|
1059 PHSA_CONTEXT_WG_SIZES
|
|
1060 + dim * 4));
|
|
1061 std::string name ("wgsize_x");
|
|
1062 name [name.length() - 1] += dim;
|
|
1063 return add_temp_var (name.c_str(), ctx);
|
|
1064 }
|
|
1065 else if (m_is_kernel)
|
|
1066 {
|
|
1067 /* For kernels without phsa we generate certain temps before
|
|
1068 the WI loop, which means we don't need to rely on LICM to get
|
|
1069 them moved out. */
|
|
1070 return m_wg_size_vars[dim];
|
|
1071 }
|
|
1072 else
|
|
1073 gcc_unreachable ();
|
|
1074 }
|
|
1075 else if (brig_opcode == BRIG_OPCODE_WORKITEMID)
|
|
1076 {
|
|
1077 HOST_WIDE_INT dim = int_constant_value (operands[0]);
|
|
1078 if (m_is_kernel)
|
|
1079 {
|
|
1080 return m_local_id_vars [dim];
|
|
1081 }
|
|
1082 else if (flag_assume_phsa)
|
|
1083 {
|
|
1084 tree ptr_type = build_pointer_type (uint32_type_node);
|
|
1085 tree ctx = build2 (MEM_REF, uint32_type_node, m_context_arg,
|
|
1086 build_int_cst (ptr_type,
|
|
1087 PHSA_CONTEXT_OFFS_WI_IDS
|
|
1088 + dim * 4));
|
|
1089 std::string name ("wiid_x");
|
|
1090 name [name.length() - 1] += dim;
|
|
1091 return add_temp_var (name.c_str(), ctx);
|
|
1092 }
|
|
1093 else
|
|
1094 gcc_unreachable ();
|
|
1095 }
|
|
1096 else if (brig_opcode == BRIG_OPCODE_WORKGROUPID)
|
|
1097 {
|
|
1098 HOST_WIDE_INT dim = int_constant_value (operands[0]);
|
|
1099 if (flag_assume_phsa)
|
|
1100 {
|
|
1101 tree ptr_type = build_pointer_type (uint32_type_node);
|
|
1102 tree ctx = build2 (MEM_REF, uint32_type_node, m_context_arg,
|
|
1103 build_int_cst (ptr_type,
|
|
1104 PHSA_CONTEXT_OFFS_WG_IDS
|
|
1105 + dim * 4));
|
|
1106 std::string name ("wgid_x");
|
|
1107 name [name.length() - 1] += dim;
|
|
1108 return add_temp_var (name.c_str(), ctx);
|
|
1109 } else if (m_is_kernel)
|
|
1110 return m_wg_id_vars [dim];
|
|
1111 else
|
|
1112 gcc_unreachable ();
|
|
1113 }
|
|
1114 else if (brig_opcode == BRIG_OPCODE_CURRENTWORKGROUPSIZE)
|
|
1115 {
|
|
1116 HOST_WIDE_INT dim = int_constant_value (operands[0]);
|
|
1117 if (flag_assume_phsa)
|
|
1118 {
|
|
1119 tree ptr_type = build_pointer_type (uint32_type_node);
|
|
1120 tree ctx = build2 (MEM_REF, uint32_type_node, m_context_arg,
|
|
1121 build_int_cst (ptr_type,
|
|
1122 PHSA_CONTEXT_CURRENT_WG_SIZES
|
|
1123 + dim * 4));
|
|
1124 std::string name ("curwgsize_x");
|
|
1125 name [name.length() - 1] += dim;
|
|
1126 return add_temp_var (name.c_str(), ctx);
|
|
1127 } else if (m_is_kernel)
|
|
1128 return m_cur_wg_size_vars[dim];
|
|
1129 else
|
|
1130 gcc_unreachable ();
|
|
1131 }
|
|
1132 else
|
|
1133 gcc_unreachable ();
|
|
1134
|
|
1135 return NULL_TREE;
|
|
1136 }
|
|
1137
|
|
1138 /* Returns true in case the given opcode that would normally be generated
|
|
1139 as a builtin call can be expanded to tree nodes. */
|
|
1140
|
|
1141 bool
|
|
1142 brig_function::can_expand_builtin (BrigOpcode16_t brig_opcode) const
|
|
1143 {
|
|
1144 switch (brig_opcode)
|
|
1145 {
|
|
1146 case BRIG_OPCODE_CURRENTWORKGROUPSIZE:
|
|
1147 case BRIG_OPCODE_WORKITEMFLATID:
|
|
1148 case BRIG_OPCODE_WORKITEMID:
|
|
1149 case BRIG_OPCODE_WORKGROUPID:
|
|
1150 case BRIG_OPCODE_WORKGROUPSIZE:
|
|
1151 return m_is_kernel || flag_assume_phsa;
|
|
1152 case BRIG_OPCODE_WORKITEMFLATABSID:
|
|
1153 case BRIG_OPCODE_WORKITEMABSID:
|
|
1154 return m_is_kernel;
|
|
1155 default:
|
|
1156 return false;
|
|
1157 };
|
|
1158 }
|
|
1159
|
|
1160 /* In case the HSA instruction must be implemented using a builtin,
|
|
1161 this function is called to get the correct builtin function.
|
|
1162 TYPE is the instruction tree type, BRIG_OPCODE the opcode of the
|
|
1163 brig instruction and BRIG_TYPE the brig instruction's type. */
|
|
1164
|
|
1165 tree
|
|
1166 brig_function::get_builtin_for_hsa_opcode
|
|
1167 (tree type, BrigOpcode16_t brig_opcode, BrigType16_t brig_type) const
|
|
1168 {
|
|
1169 tree builtin = NULL_TREE;
|
|
1170 tree builtin_type = type;
|
|
1171
|
|
1172 /* For vector types, first find the scalar version of the builtin. */
|
|
1173 if (type != NULL_TREE && VECTOR_TYPE_P (type))
|
|
1174 builtin_type = TREE_TYPE (type);
|
|
1175 BrigType16_t brig_inner_type = brig_type & BRIG_TYPE_BASE_MASK;
|
|
1176
|
|
1177 /* Some BRIG opcodes can use the same builtins for unsigned and
|
|
1178 signed types. Force these cases to unsigned types. */
|
|
1179
|
|
1180 if (brig_opcode == BRIG_OPCODE_BORROW
|
|
1181 || brig_opcode == BRIG_OPCODE_CARRY
|
|
1182 || brig_opcode == BRIG_OPCODE_LASTBIT
|
|
1183 || brig_opcode == BRIG_OPCODE_BITINSERT)
|
|
1184 {
|
|
1185 if (brig_type == BRIG_TYPE_S32)
|
|
1186 brig_type = BRIG_TYPE_U32;
|
|
1187 else if (brig_type == BRIG_TYPE_S64)
|
|
1188 brig_type = BRIG_TYPE_U64;
|
|
1189 }
|
|
1190
|
|
1191 switch (brig_opcode)
|
|
1192 {
|
|
1193 case BRIG_OPCODE_FLOOR:
|
|
1194 builtin = mathfn_built_in (builtin_type, BUILT_IN_FLOOR);
|
|
1195 break;
|
|
1196 case BRIG_OPCODE_CEIL:
|
|
1197 builtin = mathfn_built_in (builtin_type, BUILT_IN_CEIL);
|
|
1198 break;
|
|
1199 case BRIG_OPCODE_SQRT:
|
|
1200 case BRIG_OPCODE_NSQRT:
|
|
1201 builtin = mathfn_built_in (builtin_type, BUILT_IN_SQRT);
|
|
1202 break;
|
|
1203 case BRIG_OPCODE_RINT:
|
|
1204 builtin = mathfn_built_in (builtin_type, BUILT_IN_RINT);
|
|
1205 break;
|
|
1206 case BRIG_OPCODE_TRUNC:
|
|
1207 builtin = mathfn_built_in (builtin_type, BUILT_IN_TRUNC);
|
|
1208 break;
|
|
1209 case BRIG_OPCODE_COPYSIGN:
|
|
1210 builtin = mathfn_built_in (builtin_type, BUILT_IN_COPYSIGN);
|
|
1211 break;
|
|
1212 case BRIG_OPCODE_NSIN:
|
|
1213 builtin = mathfn_built_in (builtin_type, BUILT_IN_SIN);
|
|
1214 break;
|
|
1215 case BRIG_OPCODE_NLOG2:
|
|
1216 builtin = mathfn_built_in (builtin_type, BUILT_IN_LOG2);
|
|
1217 break;
|
|
1218 case BRIG_OPCODE_NEXP2:
|
|
1219 builtin = mathfn_built_in (builtin_type, BUILT_IN_EXP2);
|
|
1220 break;
|
|
1221 case BRIG_OPCODE_FMA:
|
|
1222 case BRIG_OPCODE_NFMA:
|
|
1223 builtin = mathfn_built_in (builtin_type, BUILT_IN_FMA);
|
|
1224 break;
|
|
1225 case BRIG_OPCODE_NCOS:
|
|
1226 builtin = mathfn_built_in (builtin_type, BUILT_IN_COS);
|
|
1227 break;
|
|
1228 case BRIG_OPCODE_POPCOUNT:
|
|
1229 /* Popcount should be typed by its argument type (the return value
|
|
1230 is always u32). Let's use a b64 version for also for b32 for now. */
|
|
1231 return builtin_decl_explicit (BUILT_IN_POPCOUNTL);
|
|
1232 case BRIG_OPCODE_BORROW:
|
|
1233 /* Borrow uses the same builtin for unsigned and signed types. */
|
|
1234 if (brig_type == BRIG_TYPE_S32 || brig_type == BRIG_TYPE_U32)
|
|
1235 return builtin_decl_explicit (BUILT_IN_HSAIL_BORROW_U32);
|
|
1236 else
|
|
1237 return builtin_decl_explicit (BUILT_IN_HSAIL_BORROW_U64);
|
|
1238 case BRIG_OPCODE_CARRY:
|
|
1239 /* Carry also uses the same builtin for unsigned and signed types. */
|
|
1240 if (brig_type == BRIG_TYPE_S32 || brig_type == BRIG_TYPE_U32)
|
|
1241 return builtin_decl_explicit (BUILT_IN_HSAIL_CARRY_U32);
|
|
1242 else
|
|
1243 return builtin_decl_explicit (BUILT_IN_HSAIL_CARRY_U64);
|
|
1244 default:
|
|
1245
|
|
1246 /* Use our builtin index for finding a proper builtin for the BRIG
|
|
1247 opcode and BRIG type. This takes care most of the builtin cases,
|
|
1248 the special cases are handled in the separate 'case' statements
|
|
1249 above. */
|
|
1250 builtin_map::const_iterator i
|
|
1251 = s_custom_builtins.find (std::make_pair (brig_opcode, brig_type));
|
|
1252 if (i != s_custom_builtins.end ())
|
|
1253 return (*i).second;
|
|
1254
|
|
1255 if (brig_inner_type != brig_type)
|
|
1256 {
|
|
1257 /* Try to find a scalar built-in we could use. */
|
|
1258 i = s_custom_builtins.find
|
|
1259 (std::make_pair (brig_opcode, brig_inner_type));
|
|
1260 if (i != s_custom_builtins.end ())
|
|
1261 return (*i).second;
|
|
1262 }
|
|
1263
|
|
1264 /* In case this is an fp16 operation that is promoted to fp32,
|
|
1265 try to find a fp32 scalar built-in. */
|
|
1266 if (brig_inner_type == BRIG_TYPE_F16)
|
|
1267 {
|
|
1268 i = s_custom_builtins.find
|
|
1269 (std::make_pair (brig_opcode, BRIG_TYPE_F32));
|
|
1270 if (i != s_custom_builtins.end ())
|
|
1271 return (*i).second;
|
|
1272 }
|
|
1273 gcc_unreachable ();
|
|
1274 }
|
|
1275
|
|
1276 if (VECTOR_TYPE_P (type) && builtin != NULL_TREE)
|
|
1277 {
|
|
1278 /* Try to find a vectorized version of the built-in.
|
|
1279 TODO: properly assert that builtin is a mathfn builtin? */
|
|
1280 tree vec_builtin
|
|
1281 = targetm.vectorize.builtin_vectorized_function
|
|
1282 (builtin_mathfn_code (builtin), type, type);
|
|
1283 if (vec_builtin != NULL_TREE)
|
|
1284 return vec_builtin;
|
|
1285 else
|
|
1286 return builtin;
|
|
1287 }
|
|
1288 if (builtin == NULL_TREE)
|
|
1289 gcc_unreachable ();
|
|
1290 return builtin;
|
|
1291 }
|
|
1292
|
|
1293 /* Unpacks the elements of the vector in VALUE to scalars (bit field
|
|
1294 references) in ELEMENTS. */
|
|
1295
|
|
1296 void
|
|
1297 brig_function::unpack (tree value, tree_stl_vec &elements)
|
|
1298 {
|
|
1299 size_t vec_size = int_size_in_bytes (TREE_TYPE (value));
|
|
1300 size_t element_size
|
|
1301 = int_size_in_bytes (TREE_TYPE (TREE_TYPE (value))) * BITS_PER_UNIT;
|
|
1302 size_t element_count
|
|
1303 = vec_size * BITS_PER_UNIT / element_size;
|
|
1304
|
|
1305 tree input_element_type = TREE_TYPE (TREE_TYPE (value));
|
|
1306
|
|
1307 value = add_temp_var ("unpack_input", value);
|
|
1308
|
|
1309 for (size_t i = 0; i < element_count; ++i)
|
|
1310 {
|
|
1311 tree element
|
|
1312 = build3 (BIT_FIELD_REF, input_element_type, value,
|
|
1313 TYPE_SIZE (input_element_type),
|
|
1314 bitsize_int(i * element_size));
|
|
1315
|
|
1316 element = add_temp_var ("scalar", element);
|
|
1317 elements.push_back (element);
|
|
1318 }
|
|
1319 }
|
|
1320
|
|
1321 /* Pack the elements of the scalars in ELEMENTS to the returned vector. */
|
|
1322
|
|
1323 tree
|
|
1324 brig_function::pack (tree_stl_vec &elements)
|
|
1325 {
|
|
1326 size_t element_count = elements.size ();
|
|
1327
|
|
1328 gcc_assert (element_count > 1);
|
|
1329
|
|
1330 tree output_element_type = TREE_TYPE (elements.at (0));
|
|
1331
|
|
1332 vec<constructor_elt, va_gc> *constructor_vals = NULL;
|
|
1333 for (size_t i = 0; i < element_count; ++i)
|
|
1334 CONSTRUCTOR_APPEND_ELT (constructor_vals, NULL_TREE, elements.at (i));
|
|
1335
|
|
1336 tree vec_type = build_vector_type (output_element_type, element_count);
|
|
1337
|
|
1338 /* build_constructor creates a vector type which is not a vector_cst
|
|
1339 that requires compile time constant elements. */
|
|
1340 tree vec = build_constructor (vec_type, constructor_vals);
|
|
1341
|
|
1342 /* Add a temp variable for readability. */
|
|
1343 tree tmp_var = create_tmp_var (vec_type, "vec_out");
|
|
1344 tree vec_tmp_assign = build2 (MODIFY_EXPR, TREE_TYPE (tmp_var), tmp_var, vec);
|
|
1345 append_statement (vec_tmp_assign);
|
|
1346 return tmp_var;
|
|
1347 }
|
|
1348
|
|
1349 /* Returns true in case the given opcode needs to know about work-item context
|
|
1350 data. In such case the context data is passed as a pointer to a work-item
|
|
1351 context object, as the last argument in the builtin call. */
|
|
1352
|
|
1353 bool
|
|
1354 brig_function::needs_workitem_context_data
|
|
1355 (BrigOpcode16_t brig_opcode)
|
|
1356 {
|
|
1357 switch (brig_opcode)
|
|
1358 {
|
|
1359 case BRIG_OPCODE_WORKITEMABSID:
|
|
1360 case BRIG_OPCODE_WORKITEMFLATABSID:
|
|
1361 case BRIG_OPCODE_WORKITEMFLATID:
|
|
1362 case BRIG_OPCODE_CURRENTWORKITEMFLATID:
|
|
1363 case BRIG_OPCODE_WORKITEMID:
|
|
1364 case BRIG_OPCODE_WORKGROUPID:
|
|
1365 case BRIG_OPCODE_WORKGROUPSIZE:
|
|
1366 case BRIG_OPCODE_CURRENTWORKGROUPSIZE:
|
|
1367 case BRIG_OPCODE_GRIDGROUPS:
|
|
1368 case BRIG_OPCODE_GRIDSIZE:
|
|
1369 case BRIG_OPCODE_DIM:
|
|
1370 case BRIG_OPCODE_PACKETID:
|
|
1371 case BRIG_OPCODE_PACKETCOMPLETIONSIG:
|
|
1372 case BRIG_OPCODE_BARRIER:
|
|
1373 case BRIG_OPCODE_WAVEBARRIER:
|
|
1374 case BRIG_OPCODE_ARRIVEFBAR:
|
|
1375 case BRIG_OPCODE_INITFBAR:
|
|
1376 case BRIG_OPCODE_JOINFBAR:
|
|
1377 case BRIG_OPCODE_LEAVEFBAR:
|
|
1378 case BRIG_OPCODE_RELEASEFBAR:
|
|
1379 case BRIG_OPCODE_WAITFBAR:
|
|
1380 case BRIG_OPCODE_CUID:
|
|
1381 case BRIG_OPCODE_MAXCUID:
|
|
1382 case BRIG_OPCODE_DEBUGTRAP:
|
|
1383 case BRIG_OPCODE_GROUPBASEPTR:
|
|
1384 case BRIG_OPCODE_KERNARGBASEPTR:
|
|
1385 case BRIG_OPCODE_ALLOCA:
|
|
1386 return true;
|
|
1387 default:
|
|
1388 return false;
|
|
1389 };
|
|
1390 }
|
|
1391
|
|
1392 /* Appends and returns a new temp variable and an accompanying assignment
|
|
1393 statement that stores the value of the given EXPR and has the given NAME. */
|
|
1394
|
|
1395 tree
|
|
1396 brig_function::add_temp_var (std::string name, tree expr)
|
|
1397 {
|
|
1398 tree temp_var = create_tmp_var (TREE_TYPE (expr), name.c_str ());
|
|
1399 tree assign = build2 (MODIFY_EXPR, TREE_TYPE (temp_var), temp_var, expr);
|
|
1400 append_statement (assign);
|
|
1401 return temp_var;
|
|
1402 }
|
|
1403
|
|
1404 /* Returns the integer constant value of the given node.
|
|
1405 If it's a cast, looks into the source of the cast. */
|
|
1406
|
|
1407 HOST_WIDE_INT
|
|
1408 brig_function::int_constant_value (tree node)
|
|
1409 {
|
|
1410 tree n = node;
|
|
1411 if (TREE_CODE (n) == VIEW_CONVERT_EXPR)
|
|
1412 n = TREE_OPERAND (n, 0);
|
|
1413 return int_cst_value (n);
|
|
1414 }
|
|
1415
|
|
1416 /* Returns the tree code that should be used to implement the given
|
|
1417 HSA instruction opcode (BRIG_OPCODE) for the given type of instruction
|
|
1418 (BRIG_TYPE). In case the opcode cannot be mapped to a TREE node directly,
|
|
1419 returns TREE_LIST (if it can be emulated with a simple chain of tree
|
|
1420 nodes) or CALL_EXPR if the opcode should be implemented using a builtin
|
|
1421 call. */
|
|
1422
|
|
1423 tree_code
|
|
1424 brig_function::get_tree_code_for_hsa_opcode
|
|
1425 (BrigOpcode16_t brig_opcode, BrigType16_t brig_type)
|
|
1426 {
|
|
1427 BrigType16_t brig_inner_type = brig_type & BRIG_TYPE_BASE_MASK;
|
|
1428 switch (brig_opcode)
|
|
1429 {
|
|
1430 case BRIG_OPCODE_NOP:
|
|
1431 return NOP_EXPR;
|
|
1432 case BRIG_OPCODE_ADD:
|
|
1433 return PLUS_EXPR;
|
|
1434 case BRIG_OPCODE_CMOV:
|
|
1435 if (brig_inner_type == brig_type)
|
|
1436 return COND_EXPR;
|
|
1437 else
|
|
1438 return VEC_COND_EXPR;
|
|
1439 case BRIG_OPCODE_SUB:
|
|
1440 return MINUS_EXPR;
|
|
1441 case BRIG_OPCODE_MUL:
|
|
1442 case BRIG_OPCODE_MUL24:
|
|
1443 return MULT_EXPR;
|
|
1444 case BRIG_OPCODE_MULHI:
|
|
1445 case BRIG_OPCODE_MUL24HI:
|
|
1446 return MULT_HIGHPART_EXPR;
|
|
1447 case BRIG_OPCODE_DIV:
|
|
1448 if (gccbrig_is_float_type (brig_inner_type))
|
|
1449 return RDIV_EXPR;
|
|
1450 else
|
|
1451 return TRUNC_DIV_EXPR;
|
|
1452 case BRIG_OPCODE_NEG:
|
|
1453 return NEGATE_EXPR;
|
|
1454 case BRIG_OPCODE_MIN:
|
|
1455 if (gccbrig_is_float_type (brig_inner_type))
|
|
1456 return CALL_EXPR;
|
|
1457 else
|
|
1458 return MIN_EXPR;
|
|
1459 case BRIG_OPCODE_MAX:
|
|
1460 if (gccbrig_is_float_type (brig_inner_type))
|
|
1461 return CALL_EXPR;
|
|
1462 else
|
|
1463 return MAX_EXPR;
|
|
1464 case BRIG_OPCODE_ABS:
|
|
1465 return ABS_EXPR;
|
|
1466 case BRIG_OPCODE_SHL:
|
|
1467 return LSHIFT_EXPR;
|
|
1468 case BRIG_OPCODE_SHR:
|
|
1469 return RSHIFT_EXPR;
|
|
1470 case BRIG_OPCODE_OR:
|
|
1471 return BIT_IOR_EXPR;
|
|
1472 case BRIG_OPCODE_XOR:
|
|
1473 return BIT_XOR_EXPR;
|
|
1474 case BRIG_OPCODE_AND:
|
|
1475 return BIT_AND_EXPR;
|
|
1476 case BRIG_OPCODE_NOT:
|
|
1477 return BIT_NOT_EXPR;
|
|
1478 case BRIG_OPCODE_RET:
|
|
1479 return RETURN_EXPR;
|
|
1480 case BRIG_OPCODE_MOV:
|
|
1481 case BRIG_OPCODE_LDF:
|
|
1482 return MODIFY_EXPR;
|
|
1483 case BRIG_OPCODE_LD:
|
|
1484 case BRIG_OPCODE_ST:
|
|
1485 return MEM_REF;
|
|
1486 case BRIG_OPCODE_BR:
|
|
1487 return GOTO_EXPR;
|
|
1488 case BRIG_OPCODE_REM:
|
|
1489 if (brig_type == BRIG_TYPE_U64 || brig_type == BRIG_TYPE_U32)
|
|
1490 return TRUNC_MOD_EXPR;
|
|
1491 else
|
|
1492 return CALL_EXPR;
|
|
1493 case BRIG_OPCODE_NRCP:
|
|
1494 case BRIG_OPCODE_NRSQRT:
|
|
1495 /* Implement as 1/f (x). gcc should pattern detect that and
|
|
1496 use a native instruction, if available, for it. */
|
|
1497 return TREE_LIST;
|
|
1498 case BRIG_OPCODE_FMA:
|
|
1499 case BRIG_OPCODE_FLOOR:
|
|
1500 case BRIG_OPCODE_CEIL:
|
|
1501 case BRIG_OPCODE_SQRT:
|
|
1502 case BRIG_OPCODE_NSQRT:
|
|
1503 case BRIG_OPCODE_RINT:
|
|
1504 case BRIG_OPCODE_TRUNC:
|
|
1505 case BRIG_OPCODE_POPCOUNT:
|
|
1506 case BRIG_OPCODE_COPYSIGN:
|
|
1507 case BRIG_OPCODE_NCOS:
|
|
1508 case BRIG_OPCODE_NSIN:
|
|
1509 case BRIG_OPCODE_NLOG2:
|
|
1510 case BRIG_OPCODE_NEXP2:
|
|
1511 case BRIG_OPCODE_NFMA:
|
|
1512 /* Class has type B1 regardless of the float type, thus
|
|
1513 the below builtin map search cannot find it. */
|
|
1514 case BRIG_OPCODE_CLASS:
|
|
1515 case BRIG_OPCODE_WORKITEMABSID:
|
|
1516 return CALL_EXPR;
|
|
1517 default:
|
|
1518
|
|
1519 /* Some BRIG opcodes can use the same builtins for unsigned and
|
|
1520 signed types. Force these cases to unsigned types.
|
|
1521 */
|
|
1522
|
|
1523 if (brig_opcode == BRIG_OPCODE_BORROW
|
|
1524 || brig_opcode == BRIG_OPCODE_CARRY
|
|
1525 || brig_opcode == BRIG_OPCODE_LASTBIT
|
|
1526 || brig_opcode == BRIG_OPCODE_BITINSERT)
|
|
1527 {
|
|
1528 if (brig_type == BRIG_TYPE_S32)
|
|
1529 brig_type = BRIG_TYPE_U32;
|
|
1530 else if (brig_type == BRIG_TYPE_S64)
|
|
1531 brig_type = BRIG_TYPE_U64;
|
|
1532 }
|
|
1533
|
|
1534
|
|
1535 builtin_map::const_iterator i
|
|
1536 = s_custom_builtins.find (std::make_pair (brig_opcode, brig_type));
|
|
1537 if (i != s_custom_builtins.end ())
|
|
1538 return CALL_EXPR;
|
|
1539 else if (s_custom_builtins.find
|
|
1540 (std::make_pair (brig_opcode, brig_inner_type))
|
|
1541 != s_custom_builtins.end ())
|
|
1542 return CALL_EXPR;
|
|
1543 if (brig_inner_type == BRIG_TYPE_F16
|
|
1544 && s_custom_builtins.find
|
|
1545 (std::make_pair (brig_opcode, BRIG_TYPE_F32))
|
|
1546 != s_custom_builtins.end ())
|
|
1547 return CALL_EXPR;
|
|
1548 break;
|
|
1549 }
|
|
1550 return TREE_LIST; /* Emulate using a chain of nodes. */
|
|
1551 }
|
|
1552
|
|
1553 /* Inform of an update to the REG_VAR. */
|
|
1554
|
|
1555 void
|
|
1556 brig_function::add_reg_var_update (tree reg_var, tree var)
|
|
1557 {
|
|
1558 if (var == m_abs_id_vars[0] || var == m_abs_id_vars[1]
|
|
1559 || var == m_abs_id_vars[2] || var == m_local_id_vars[0]
|
|
1560 || var == m_local_id_vars[1] || var == m_local_id_vars[2])
|
|
1561 m_id_val_defs [reg_var] = var;
|
|
1562 else
|
|
1563 {
|
|
1564 /* Possible overwrite of an ID value. */
|
|
1565
|
|
1566 id_val_map::iterator i = m_id_val_defs.find (reg_var);
|
|
1567 if (i != m_id_val_defs.end())
|
|
1568 m_id_val_defs.erase (i);
|
|
1569 }
|
|
1570 }
|
|
1571
|
|
1572 /* If the REG_VAR is known to contain an ID value at this point in
|
|
1573 the basic block, return true. */
|
|
1574
|
|
1575 bool
|
|
1576 brig_function::is_id_val (tree reg_var)
|
|
1577 {
|
|
1578 id_val_map::iterator i = m_id_val_defs.find (reg_var);
|
|
1579 return i != m_id_val_defs.end();
|
|
1580 }
|
|
1581
|
|
1582 /* Return an ID value for the given REG_VAR if its known to contain
|
|
1583 one at this point in the BB, NULL_TREE otherwise. */
|
|
1584
|
|
1585 tree
|
|
1586 brig_function::id_val (tree reg_var)
|
|
1587 {
|
|
1588 id_val_map::iterator i = m_id_val_defs.find (reg_var);
|
|
1589 if (i != m_id_val_defs.end())
|
|
1590 return (*i).second;
|
|
1591 else
|
|
1592 return NULL_TREE;
|
|
1593 }
|
|
1594
|
|
1595 /* Informs of starting a new basic block. Called when generating
|
|
1596 a label, a call, a jump, or a return. */
|
|
1597
|
|
1598 void
|
|
1599 brig_function::start_new_bb ()
|
|
1600 {
|
|
1601 m_id_val_defs.clear ();
|
|
1602 }
|