111
|
1 /* brig2tree.cc -- brig to gcc generic/gimple tree conversion
|
131
|
2 Copyright (C) 2016-2018 Free Software Foundation, Inc.
|
111
|
3 Contributed by Pekka Jaaskelainen <pekka.jaaskelainen@parmance.com>
|
|
4 for General Processor Tech.
|
|
5
|
|
6 This file is part of GCC.
|
|
7
|
|
8 GCC is free software; you can redistribute it and/or modify it under
|
|
9 the terms of the GNU General Public License as published by the Free
|
|
10 Software Foundation; either version 3, or (at your option) any later
|
|
11 version.
|
|
12
|
|
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
16 for more details.
|
|
17
|
|
18 You should have received a copy of the GNU General Public License
|
|
19 along with GCC; see the file COPYING3. If not see
|
|
20 <http://www.gnu.org/licenses/>. */
|
|
21
|
|
22 #include <cassert>
|
|
23 #include <iostream>
|
|
24 #include <iomanip>
|
|
25 #include <sstream>
|
|
26
|
|
27 #include "config.h"
|
|
28 #include "system.h"
|
|
29 #include "coretypes.h"
|
|
30 #include "target.h"
|
|
31 #include "function.h"
|
|
32 #include "brig-to-generic.h"
|
|
33 #include "stringpool.h"
|
|
34 #include "tree-iterator.h"
|
|
35 #include "toplev.h"
|
|
36 #include "gimplify.h"
|
|
37 #include "gimple-expr.h"
|
|
38 #include "print-tree.h"
|
|
39 #include "hsa-brig-format.h"
|
|
40 #include "stor-layout.h"
|
|
41 #include "diagnostic-core.h"
|
|
42 #include "brig-code-entry-handler.h"
|
|
43 #include "brig-machine.h"
|
|
44 #include "brig-util.h"
|
|
45 #include "phsa.h"
|
|
46 #include "tree-pretty-print.h"
|
|
47 #include "dumpfile.h"
|
|
48 #include "profile-count.h"
|
|
49 #include "tree-cfg.h"
|
|
50 #include "errors.h"
|
|
51 #include "fold-const.h"
|
|
52 #include "cgraph.h"
|
|
53 #include "dumpfile.h"
|
|
54 #include "tree-pretty-print.h"
|
131
|
55 #include "attribs.h"
|
111
|
56
|
|
57 extern int gccbrig_verbose;
|
|
58
|
|
59 tree brig_to_generic::s_fp16_type;
|
|
60 tree brig_to_generic::s_fp32_type;
|
|
61 tree brig_to_generic::s_fp64_type;
|
|
62
|
|
63 brig_to_generic::brig_to_generic ()
|
|
64 : m_cf (NULL), m_analyzing (true), m_total_group_segment_usage (0),
|
|
65 m_brig (NULL), m_next_private_offset (0)
|
|
66 {
|
|
67 m_globals = NULL_TREE;
|
|
68
|
|
69 /* Initialize the basic REAL types.
|
|
70 This doesn't work straight away because most of the targets
|
|
71 do not support fp16 natively. Let's by default convert
|
|
72 to fp32 and back before and after each instruction (handle it as
|
|
73 a storage format only), and later add an optimization pass
|
|
74 that removes the extra converts (in case of multiple fp16 ops
|
|
75 in a row). */
|
|
76 s_fp16_type = make_node (REAL_TYPE);
|
|
77 TYPE_PRECISION (s_fp16_type) = 16;
|
|
78 TYPE_SIZE (s_fp16_type) = bitsize_int (16);
|
|
79 TYPE_SIZE_UNIT (s_fp16_type) = size_int (2);
|
|
80 SET_TYPE_ALIGN (s_fp16_type, 16);
|
|
81 layout_type (s_fp16_type);
|
|
82
|
|
83 s_fp32_type = gccbrig_tree_type_for_hsa_type (BRIG_TYPE_F32);
|
|
84 s_fp64_type = gccbrig_tree_type_for_hsa_type (BRIG_TYPE_F64);
|
|
85
|
|
86 /* TODO: (machine)query the preferred rounding mode that is set by
|
|
87 the machine by default. This can be redefined by each BRIG module
|
|
88 header. */
|
|
89 m_default_float_rounding_mode = BRIG_ROUND_FLOAT_ZERO;
|
|
90
|
|
91 m_dump_file = dump_begin (TDI_original, &m_dump_flags);
|
|
92 }
|
|
93
|
|
94 class unimplemented_entry_handler : public brig_code_entry_handler
|
|
95 {
|
|
96 public:
|
|
97 unimplemented_entry_handler (brig_to_generic &parent)
|
|
98 : brig_code_entry_handler (parent)
|
|
99 {
|
|
100 }
|
|
101
|
|
102 size_t
|
|
103 operator () (const BrigBase *base)
|
|
104 {
|
|
105 gcc_unreachable ();
|
|
106 return base->byteCount;
|
|
107 }
|
|
108 };
|
|
109
|
|
110 /* Handler for entries that can be (and are) safely skipped for the purposes
|
|
111 of GENERIC generation. */
|
|
112
|
|
113 class skipped_entry_handler : public brig_code_entry_handler
|
|
114 {
|
|
115 public:
|
|
116 skipped_entry_handler (brig_to_generic &parent)
|
|
117 : brig_code_entry_handler (parent)
|
|
118 {
|
|
119 }
|
|
120
|
|
121 size_t
|
|
122 operator () (const BrigBase *base)
|
|
123 {
|
|
124 return base->byteCount;
|
|
125 }
|
|
126 };
|
|
127
|
131
|
128 class brig_reg_use_analyzer : public brig_code_entry_handler
|
|
129 {
|
|
130 public:
|
|
131 brig_reg_use_analyzer (brig_to_generic &parent)
|
|
132 : brig_code_entry_handler (parent)
|
|
133 {
|
|
134 }
|
|
135
|
|
136 size_t
|
|
137 operator () (const BrigBase *base)
|
|
138 {
|
|
139 const BrigInstBase *brig_inst = (const BrigInstBase *) base;
|
|
140 analyze_operands (*brig_inst);
|
|
141 return base->byteCount;
|
|
142 }
|
|
143
|
|
144 };
|
|
145
|
111
|
146 /* Helper struct for pairing a BrigKind and a BrigCodeEntryHandler that
|
|
147 should handle its data. */
|
|
148
|
|
149 struct code_entry_handler_info
|
|
150 {
|
|
151 BrigKind kind;
|
|
152 brig_code_entry_handler *handler;
|
|
153 };
|
|
154
|
|
155
|
|
156 /* Finds the BRIG file sections in the currently processed file. */
|
|
157
|
|
158 void
|
|
159 brig_to_generic::find_brig_sections ()
|
|
160 {
|
|
161 m_data = m_code = m_operand = NULL;
|
|
162 const BrigModuleHeader *mheader = (const BrigModuleHeader *) m_brig;
|
|
163
|
|
164 /* Find the positions of the different sections. */
|
|
165 for (uint32_t sec = 0; sec < mheader->sectionCount; ++sec)
|
|
166 {
|
|
167 uint64_t offset
|
|
168 = ((const uint64_t *) (m_brig + mheader->sectionIndex))[sec];
|
|
169
|
|
170 const BrigSectionHeader *section_header
|
|
171 = (const BrigSectionHeader *) (m_brig + offset);
|
|
172
|
|
173 std::string name ((const char *) (§ion_header->name),
|
|
174 section_header->nameLength);
|
|
175
|
|
176 if (sec == BRIG_SECTION_INDEX_DATA && name == "hsa_data")
|
|
177 {
|
|
178 m_data = (const char *) section_header;
|
|
179 m_data_size = section_header->byteCount;
|
|
180 }
|
|
181 else if (sec == BRIG_SECTION_INDEX_CODE && name == "hsa_code")
|
|
182 {
|
|
183 m_code = (const char *) section_header;
|
|
184 m_code_size = section_header->byteCount;
|
|
185 }
|
|
186 else if (sec == BRIG_SECTION_INDEX_OPERAND && name == "hsa_operand")
|
|
187 {
|
|
188 m_operand = (const char *) section_header;
|
|
189 m_operand_size = section_header->byteCount;
|
|
190 }
|
|
191 else
|
|
192 {
|
|
193 gcc_unreachable ();
|
|
194 }
|
|
195 }
|
|
196
|
|
197 if (m_code == NULL)
|
|
198 gcc_unreachable ();
|
|
199 if (m_data == NULL)
|
|
200 gcc_unreachable ();
|
|
201 if (m_operand == NULL)
|
|
202 gcc_unreachable ();
|
|
203
|
|
204 }
|
|
205
|
|
206 /* Does a first pass over the given BRIG to collect data needed for the
|
|
207 actual parsing. Currently this includes only collecting the
|
|
208 group segment variable usage to support the experimental HSA PRM feature
|
|
209 where group variables can be declared also in module and function scope
|
|
210 (in addition to kernel scope).
|
|
211 */
|
|
212
|
|
213 void
|
|
214 brig_to_generic::analyze (const char *brig_blob)
|
|
215 {
|
|
216 const BrigModuleHeader *mheader = (const BrigModuleHeader *) brig_blob;
|
|
217
|
|
218 if (strncmp (mheader->identification, "HSA BRIG", 8) != 0)
|
|
219 fatal_error (UNKNOWN_LOCATION, PHSA_ERROR_PREFIX_INCOMPATIBLE_MODULE
|
|
220 "Unrecognized file format.");
|
|
221 if (mheader->brigMajor != 1 || mheader->brigMinor != 0)
|
|
222 fatal_error (UNKNOWN_LOCATION, PHSA_ERROR_PREFIX_INCOMPATIBLE_MODULE
|
|
223 "BRIG version not supported. BRIG 1.0 required.");
|
|
224
|
|
225 m_brig = brig_blob;
|
|
226
|
|
227 find_brig_sections ();
|
|
228
|
|
229 brig_directive_variable_handler var_handler (*this);
|
|
230 brig_directive_fbarrier_handler fbar_handler (*this);
|
|
231 brig_directive_function_handler func_handler (*this);
|
131
|
232 brig_reg_use_analyzer reg_use_analyzer (*this);
|
111
|
233
|
|
234 /* Need this for grabbing the module names for mangling the
|
|
235 group variable names. */
|
|
236 brig_directive_module_handler module_handler (*this);
|
|
237 skipped_entry_handler skipped_handler (*this);
|
|
238
|
|
239 const BrigSectionHeader *csection_header = (const BrigSectionHeader *) m_code;
|
|
240
|
|
241 code_entry_handler_info handlers[]
|
131
|
242 = {{BRIG_KIND_INST_BASIC, ®_use_analyzer},
|
|
243 {BRIG_KIND_INST_MOD, ®_use_analyzer},
|
|
244 {BRIG_KIND_INST_CMP, ®_use_analyzer},
|
|
245 {BRIG_KIND_INST_MEM, ®_use_analyzer},
|
|
246 {BRIG_KIND_INST_CVT, ®_use_analyzer},
|
|
247 {BRIG_KIND_INST_SEG_CVT, ®_use_analyzer},
|
|
248 {BRIG_KIND_INST_SEG, ®_use_analyzer},
|
|
249 {BRIG_KIND_INST_ADDR, ®_use_analyzer},
|
|
250 {BRIG_KIND_INST_SOURCE_TYPE, ®_use_analyzer},
|
|
251 {BRIG_KIND_INST_ATOMIC, ®_use_analyzer},
|
|
252 {BRIG_KIND_INST_SIGNAL, ®_use_analyzer},
|
|
253 {BRIG_KIND_INST_BR, ®_use_analyzer},
|
|
254 {BRIG_KIND_INST_LANE, ®_use_analyzer},
|
|
255 {BRIG_KIND_INST_QUEUE, ®_use_analyzer},
|
|
256 {BRIG_KIND_DIRECTIVE_VARIABLE, &var_handler},
|
111
|
257 {BRIG_KIND_DIRECTIVE_FBARRIER, &fbar_handler},
|
|
258 {BRIG_KIND_DIRECTIVE_KERNEL, &func_handler},
|
|
259 {BRIG_KIND_DIRECTIVE_MODULE, &module_handler},
|
|
260 {BRIG_KIND_DIRECTIVE_FUNCTION, &func_handler}};
|
|
261
|
|
262 m_analyzing = true;
|
|
263 for (size_t b = csection_header->headerByteCount; b < m_code_size;)
|
|
264 {
|
|
265 const BrigBase *entry = (const BrigBase *) (m_code + b);
|
|
266
|
|
267 brig_code_entry_handler *handler = &skipped_handler;
|
|
268
|
|
269 if (m_cf != NULL && b >= m_cf->m_brig_def->nextModuleEntry)
|
|
270 {
|
|
271 /* The function definition ended. We can just discard the place
|
|
272 holder function. */
|
|
273 m_total_group_segment_usage += m_cf->m_local_group_variables.size ();
|
|
274 delete m_cf;
|
|
275 m_cf = NULL;
|
|
276 }
|
|
277
|
|
278 /* Find a handler. */
|
|
279 for (size_t i = 0;
|
|
280 i < sizeof (handlers) / sizeof (code_entry_handler_info); ++i)
|
|
281 {
|
|
282 if (handlers[i].kind == entry->kind)
|
|
283 handler = handlers[i].handler;
|
|
284 }
|
|
285
|
|
286 int bytes_processed = (*handler) (entry);
|
|
287 if (bytes_processed == 0)
|
|
288 fatal_error (UNKNOWN_LOCATION, PHSA_ERROR_PREFIX_CORRUPTED_MODULE
|
|
289 "Element with 0 bytes.");
|
|
290 b += bytes_processed;
|
|
291 }
|
|
292
|
|
293 if (m_cf != NULL)
|
|
294 {
|
|
295 m_total_group_segment_usage += m_cf->m_local_group_variables.size ();
|
|
296 delete m_cf;
|
|
297 m_cf = NULL;
|
|
298 }
|
|
299
|
|
300 m_total_group_segment_usage += m_module_group_variables.size ();
|
|
301 m_analyzing = false;
|
|
302 }
|
|
303
|
|
304 /* Parses the given BRIG blob. */
|
|
305
|
|
306 void
|
|
307 brig_to_generic::parse (const char *brig_blob)
|
|
308 {
|
|
309 m_brig = brig_blob;
|
|
310 find_brig_sections ();
|
|
311
|
|
312 brig_basic_inst_handler inst_handler (*this);
|
|
313 brig_branch_inst_handler branch_inst_handler (*this);
|
|
314 brig_cvt_inst_handler cvt_inst_handler (*this);
|
|
315 brig_seg_inst_handler seg_inst_handler (*this);
|
|
316 brig_copy_move_inst_handler copy_move_inst_handler (*this);
|
|
317 brig_signal_inst_handler signal_inst_handler (*this);
|
|
318 brig_atomic_inst_handler atomic_inst_handler (*this);
|
|
319 brig_cmp_inst_handler cmp_inst_handler (*this);
|
|
320 brig_mem_inst_handler mem_inst_handler (*this);
|
|
321 brig_inst_mod_handler inst_mod_handler (*this);
|
|
322 brig_directive_label_handler label_handler (*this);
|
|
323 brig_directive_variable_handler var_handler (*this);
|
|
324 brig_directive_fbarrier_handler fbar_handler (*this);
|
|
325 brig_directive_comment_handler comment_handler (*this);
|
|
326 brig_directive_function_handler func_handler (*this);
|
|
327 brig_directive_control_handler control_handler (*this);
|
|
328 brig_directive_arg_block_handler arg_block_handler (*this);
|
|
329 brig_directive_module_handler module_handler (*this);
|
|
330 brig_lane_inst_handler lane_inst_handler (*this);
|
|
331 brig_queue_inst_handler queue_inst_handler (*this);
|
|
332 skipped_entry_handler skipped_handler (*this);
|
|
333 unimplemented_entry_handler unimplemented_handler (*this);
|
|
334
|
|
335 struct code_entry_handler_info
|
|
336 {
|
|
337 BrigKind kind;
|
|
338 brig_code_entry_handler *handler;
|
|
339 };
|
|
340
|
|
341 /* TODO: Convert to a hash table / map. For now, put the more common
|
|
342 entries to the top to keep the scan fast on average. */
|
|
343 code_entry_handler_info handlers[]
|
|
344 = {{BRIG_KIND_INST_BASIC, &inst_handler},
|
|
345 {BRIG_KIND_INST_CMP, &cmp_inst_handler},
|
|
346 {BRIG_KIND_INST_MEM, &mem_inst_handler},
|
|
347 {BRIG_KIND_INST_MOD, &inst_mod_handler},
|
|
348 {BRIG_KIND_INST_CVT, &cvt_inst_handler},
|
|
349 {BRIG_KIND_INST_SEG_CVT, &seg_inst_handler},
|
|
350 {BRIG_KIND_INST_SEG, &seg_inst_handler},
|
|
351 {BRIG_KIND_INST_ADDR, ©_move_inst_handler},
|
|
352 {BRIG_KIND_INST_SOURCE_TYPE, ©_move_inst_handler},
|
|
353 {BRIG_KIND_INST_ATOMIC, &atomic_inst_handler},
|
|
354 {BRIG_KIND_INST_SIGNAL, &signal_inst_handler},
|
|
355 {BRIG_KIND_INST_BR, &branch_inst_handler},
|
|
356 {BRIG_KIND_INST_LANE, &lane_inst_handler},
|
|
357 {BRIG_KIND_INST_QUEUE, &queue_inst_handler},
|
|
358 /* Assuming fences are not needed. FIXME: call builtins
|
|
359 when porting to a platform where they are. */
|
|
360 {BRIG_KIND_INST_MEM_FENCE, &skipped_handler},
|
|
361 {BRIG_KIND_DIRECTIVE_LABEL, &label_handler},
|
|
362 {BRIG_KIND_DIRECTIVE_VARIABLE, &var_handler},
|
|
363 {BRIG_KIND_DIRECTIVE_ARG_BLOCK_START, &arg_block_handler},
|
|
364 {BRIG_KIND_DIRECTIVE_ARG_BLOCK_END, &arg_block_handler},
|
|
365 {BRIG_KIND_DIRECTIVE_FBARRIER, &fbar_handler},
|
|
366 {BRIG_KIND_DIRECTIVE_COMMENT, &comment_handler},
|
|
367 {BRIG_KIND_DIRECTIVE_KERNEL, &func_handler},
|
|
368 {BRIG_KIND_DIRECTIVE_SIGNATURE, &func_handler},
|
|
369 {BRIG_KIND_DIRECTIVE_FUNCTION, &func_handler},
|
|
370 {BRIG_KIND_DIRECTIVE_INDIRECT_FUNCTION, &func_handler},
|
|
371 {BRIG_KIND_DIRECTIVE_MODULE, &module_handler},
|
|
372 /* Skipping debug locations for now as not needed for conformance. */
|
|
373 {BRIG_KIND_DIRECTIVE_LOC, &skipped_handler},
|
|
374 /* There are no supported pragmas at this moment. */
|
|
375 {BRIG_KIND_DIRECTIVE_PRAGMA, &skipped_handler},
|
|
376 {BRIG_KIND_DIRECTIVE_CONTROL, &control_handler},
|
|
377 {BRIG_KIND_DIRECTIVE_EXTENSION, &skipped_handler},
|
|
378 /* BRIG_KIND_NONE entries are valid anywhere. They can be used
|
|
379 for patching BRIGs before finalization. */
|
|
380 {BRIG_KIND_NONE, &skipped_handler}};
|
|
381
|
|
382 const BrigSectionHeader *csection_header = (const BrigSectionHeader *) m_code;
|
|
383
|
|
384 for (size_t b = csection_header->headerByteCount; b < m_code_size;)
|
|
385 {
|
|
386 const BrigBase *entry = (const BrigBase *) (m_code + b);
|
|
387
|
|
388 brig_code_entry_handler *handler = &unimplemented_handler;
|
|
389
|
|
390 if (m_cf != NULL && b >= m_cf->m_brig_def->nextModuleEntry)
|
|
391 finish_function (); /* The function definition ended. */
|
|
392
|
|
393 /* Find a handler. */
|
|
394 for (size_t i = 0;
|
|
395 i < sizeof (handlers) / sizeof (code_entry_handler_info); ++i)
|
|
396 {
|
|
397 if (handlers[i].kind == entry->kind)
|
|
398 handler = handlers[i].handler;
|
|
399 }
|
|
400 b += (*handler) (entry);
|
|
401 }
|
|
402
|
|
403 finish_function ();
|
|
404 }
|
|
405
|
|
406 const BrigData *
|
|
407 brig_to_generic::get_brig_data_entry (size_t entry_offset) const
|
|
408 {
|
|
409 return (const BrigData *) (m_data + entry_offset);
|
|
410 }
|
|
411
|
|
412 const BrigBase *
|
|
413 brig_to_generic::get_brig_operand_entry (size_t entry_offset) const
|
|
414 {
|
|
415 return (const BrigBase *) (m_operand + entry_offset);
|
|
416 }
|
|
417
|
|
418 const BrigBase *
|
|
419 brig_to_generic::get_brig_code_entry (size_t entry_offset) const
|
|
420 {
|
|
421 return (const BrigBase *) (m_code + entry_offset);
|
|
422 }
|
|
423
|
|
424 void
|
|
425 brig_to_generic::append_global (tree g)
|
|
426 {
|
|
427 if (m_globals == NULL_TREE)
|
|
428 {
|
|
429 m_globals = g;
|
|
430 return;
|
|
431 }
|
|
432 else
|
|
433 {
|
|
434 tree last = tree_last (m_globals);
|
|
435 TREE_CHAIN (last) = g;
|
|
436 }
|
|
437 }
|
|
438
|
|
439 tree
|
|
440 brig_to_generic::global_variable (const std::string &name) const
|
|
441 {
|
|
442 label_index::const_iterator i = m_global_variables.find (name);
|
|
443 if (i == m_global_variables.end ())
|
|
444 return NULL_TREE;
|
|
445 else
|
|
446 return (*i).second;
|
|
447 }
|
|
448
|
|
449 /* Returns a function declaration with the given name. Assumes it has been
|
|
450 created previously via a DirectiveFunction or similar. */
|
|
451
|
|
452 tree
|
|
453 brig_to_generic::function_decl (const std::string &name)
|
|
454 {
|
|
455 label_index::const_iterator i = m_function_index.find (name);
|
|
456 if (i == m_function_index.end ())
|
|
457 return NULL_TREE;
|
|
458 return (*i).second;
|
|
459 }
|
|
460
|
|
461 void
|
|
462 brig_to_generic::add_function_decl (const std::string &name, tree func_decl)
|
|
463 {
|
|
464 m_function_index[name] = func_decl;
|
|
465 }
|
|
466
|
|
467 /* Adds a GENERIC global variable VAR_DECL with the given NAME to the
|
|
468 current module. If we have generated a host def var ptr (a place holder
|
|
469 for variables that are defined by the HSA host code) for this global
|
|
470 variable definition (because there was a declaration earlier which looked
|
|
471 like it might have been a host defined variable), we now have
|
|
472 to assign its address and make it private to allow the references to
|
|
473 point to the defined variable instead. */
|
|
474
|
|
475 void
|
|
476 brig_to_generic::add_global_variable (const std::string &name, tree var_decl)
|
|
477 {
|
|
478 append_global (var_decl);
|
|
479 m_global_variables[name] = var_decl;
|
|
480
|
|
481 std::string host_def_var_name
|
|
482 = std::string (PHSA_HOST_DEF_PTR_PREFIX) + name;
|
|
483 tree host_def_var = global_variable (host_def_var_name);
|
|
484 if (host_def_var == NULL_TREE)
|
|
485 return;
|
|
486
|
|
487 tree ptype = build_pointer_type (TREE_TYPE (var_decl));
|
|
488 tree var_addr = build1 (ADDR_EXPR, ptype, var_decl);
|
|
489
|
|
490 DECL_INITIAL (host_def_var) = var_addr;
|
131
|
491 TREE_PUBLIC (host_def_var) = 1;
|
|
492
|
|
493 set_externally_visible (host_def_var);
|
111
|
494 }
|
|
495
|
|
496 /* Adds an indirection pointer for a potential host-defined program scope
|
|
497 variable declaration. */
|
|
498
|
|
499 void
|
|
500 brig_to_generic::add_host_def_var_ptr (const std::string &name, tree var_decl)
|
|
501 {
|
|
502 std::string var_name = std::string (PHSA_HOST_DEF_PTR_PREFIX) + name;
|
|
503
|
|
504 tree name_identifier = get_identifier (var_name.c_str ());
|
|
505
|
|
506 tree ptr_var = build_decl (UNKNOWN_LOCATION, VAR_DECL, name_identifier,
|
|
507 build_pointer_type (TREE_TYPE (var_decl)));
|
|
508 DECL_EXTERNAL (ptr_var) = 0;
|
|
509 DECL_ARTIFICIAL (ptr_var) = 0;
|
|
510
|
|
511 TREE_PUBLIC (ptr_var) = 1;
|
|
512 TREE_USED (ptr_var) = 1;
|
|
513 TREE_ADDRESSABLE (ptr_var) = 1;
|
|
514 TREE_STATIC (ptr_var) = 1;
|
|
515
|
131
|
516 set_externally_visible (ptr_var);
|
|
517
|
111
|
518 append_global (ptr_var);
|
|
519 m_global_variables[var_name] = ptr_var;
|
|
520 }
|
|
521
|
131
|
522 void
|
|
523 brig_to_generic::add_decl_call (tree call)
|
|
524 {
|
|
525 m_decl_call.push_back (call);
|
|
526 }
|
|
527
|
111
|
528 /* Produce a "mangled name" for the given brig function or kernel.
|
|
529 The mangling is used to make unique global symbol name in case of
|
|
530 module scope functions. Program scope functions are not mangled
|
|
531 (except for dropping the leading &), which makes the functions
|
|
532 directly visible for linking using the original function name. */
|
|
533
|
|
534 std::string
|
|
535 brig_to_generic::get_mangled_name
|
|
536 (const BrigDirectiveExecutable *func) const
|
|
537 {
|
|
538 /* Strip the leading &. */
|
|
539 std::string func_name = get_string (func->name).substr (1);
|
|
540 if (func->linkage == BRIG_LINKAGE_MODULE)
|
|
541 {
|
|
542 /* Mangle the module scope function names with the module name and
|
|
543 make them public so they can be queried by the HSA runtime from
|
|
544 the produced binary. Assume it's the currently processed function
|
|
545 we are always referring to. */
|
|
546 func_name = "gccbrig." + m_module_name + "." + func_name;
|
|
547 }
|
|
548 return func_name;
|
|
549 }
|
|
550
|
|
551 std::string
|
|
552 brig_to_generic::get_string (size_t entry_offset) const
|
|
553 {
|
|
554 const BrigData *data_item = get_brig_data_entry (entry_offset);
|
|
555 return std::string ((const char *) &data_item->bytes, data_item->byteCount);
|
|
556 }
|
|
557
|
|
558 /* Adapted from c-semantics.c. */
|
|
559
|
|
560 tree
|
|
561 build_stmt (enum tree_code code, ...)
|
|
562 {
|
|
563 tree ret;
|
|
564 int length, i;
|
|
565 va_list p;
|
|
566 bool side_effects;
|
|
567
|
|
568 /* This function cannot be used to construct variably-sized nodes. */
|
|
569 gcc_assert (TREE_CODE_CLASS (code) != tcc_vl_exp);
|
|
570
|
|
571 va_start (p, code);
|
|
572
|
|
573 ret = make_node (code);
|
|
574 TREE_TYPE (ret) = void_type_node;
|
|
575 length = TREE_CODE_LENGTH (code);
|
|
576
|
|
577 /* TREE_SIDE_EFFECTS will already be set for statements with
|
|
578 implicit side effects. Here we make sure it is set for other
|
|
579 expressions by checking whether the parameters have side
|
|
580 effects. */
|
|
581
|
|
582 side_effects = false;
|
|
583 for (i = 0; i < length; i++)
|
|
584 {
|
|
585 tree t = va_arg (p, tree);
|
|
586 if (t && !TYPE_P (t))
|
|
587 side_effects |= TREE_SIDE_EFFECTS (t);
|
|
588 TREE_OPERAND (ret, i) = t;
|
|
589 }
|
|
590
|
|
591 TREE_SIDE_EFFECTS (ret) |= side_effects;
|
|
592
|
|
593 va_end (p);
|
|
594 return ret;
|
|
595 }
|
|
596
|
|
597 /* BRIG regs are untyped, but GENERIC is not. We need to add implicit casts
|
|
598 in case treating the operand with an instruction with a type different
|
|
599 than the created reg var type in order to select correct instruction type
|
|
600 later on. This function creates the necessary reinterpret type cast from
|
|
601 a source variable to the destination type. In case no cast is needed to
|
131
|
602 the same type, SOURCE is returned directly.
|
|
603
|
|
604 In case of mismatched type sizes, casting:
|
|
605 - to narrower type the upper bits are clipped and
|
|
606 - to wider type the source value is zero extended. */
|
111
|
607
|
|
608 tree
|
131
|
609 build_resize_convert_view (tree destination_type, tree source)
|
111
|
610 {
|
|
611
|
|
612 gcc_assert (source && destination_type && TREE_TYPE (source) != NULL_TREE
|
|
613 && destination_type != NULL_TREE);
|
|
614
|
|
615 tree source_type = TREE_TYPE (source);
|
|
616 if (TREE_CODE (source) == CALL_EXPR)
|
|
617 {
|
|
618 tree func_decl = TREE_OPERAND (TREE_OPERAND (source, 1), 0);
|
|
619 source_type = TREE_TYPE (TREE_TYPE (func_decl));
|
|
620 }
|
|
621
|
|
622 if (destination_type == source_type)
|
|
623 return source;
|
|
624
|
|
625 size_t src_size = int_size_in_bytes (source_type);
|
|
626 size_t dst_size = int_size_in_bytes (destination_type);
|
|
627 if (src_size == dst_size)
|
|
628 return build1 (VIEW_CONVERT_EXPR, destination_type, source);
|
131
|
629 else /* src_size != dst_size */
|
111
|
630 {
|
|
631 /* The src_size can be smaller at least with f16 scalars which are
|
|
632 stored to 32b register variables. First convert to an equivalent
|
|
633 size unsigned type, then extend to an unsigned type of the
|
|
634 target width, after which VIEW_CONVERT_EXPR can be used to
|
|
635 force to the target type. */
|
131
|
636 tree resized = convert (get_scalar_unsigned_int_type (destination_type),
|
|
637 build_reinterpret_to_uint (source));
|
|
638 gcc_assert ((size_t)int_size_in_bytes (TREE_TYPE (resized)) == dst_size);
|
|
639 return build_resize_convert_view (destination_type, resized);
|
111
|
640 }
|
131
|
641 }
|
|
642
|
|
643 /* Reinterprets SOURCE as a scalar unsigned int with the size
|
|
644 corresponding to the orignal. */
|
|
645
|
|
646 tree build_reinterpret_to_uint (tree source)
|
|
647 {
|
|
648 tree src_type = TREE_TYPE (source);
|
|
649 if (INTEGRAL_TYPE_P (src_type) && TYPE_UNSIGNED (src_type))
|
|
650 return source;
|
|
651 tree dest_type = get_scalar_unsigned_int_type (src_type);
|
|
652 return build1 (VIEW_CONVERT_EXPR, dest_type, source);
|
111
|
653 }
|
|
654
|
|
655 /* Returns the finished brig_function for the given generic FUNC_DECL,
|
|
656 or NULL, if not found. */
|
|
657
|
|
658 brig_function *
|
|
659 brig_to_generic::get_finished_function (tree func_decl)
|
|
660 {
|
|
661 std::string func_name
|
|
662 = identifier_to_locale (IDENTIFIER_POINTER (DECL_NAME (func_decl)));
|
|
663 std::map<std::string, brig_function *>::iterator i
|
|
664 = m_finished_functions.find (func_name);
|
|
665 if (i != m_finished_functions.end ())
|
|
666 return (*i).second;
|
|
667 else
|
|
668 return NULL;
|
|
669 }
|
|
670
|
|
671 /* Adds a group variable to a correct book keeping structure depending
|
|
672 on its segment. */
|
|
673
|
|
674 void
|
|
675 brig_to_generic::add_group_variable (const std::string &name, size_t size,
|
|
676 size_t alignment, bool function_scope)
|
|
677 {
|
|
678 /* Module and function scope group region variables are an experimental
|
|
679 feature. We implement module scope group variables with a separate
|
|
680 book keeping inside brig_to_generic which is populated in the 'analyze()'
|
|
681 prepass. This is to ensure we know the group segment offsets when
|
|
682 processing the functions that might refer to them. */
|
|
683 if (!function_scope)
|
|
684 {
|
|
685 if (!m_module_group_variables.has_variable (name))
|
|
686 m_module_group_variables.add (name, size, alignment);
|
|
687 return;
|
|
688 }
|
|
689
|
|
690 if (!m_cf->m_local_group_variables.has_variable (name))
|
|
691 m_cf->m_local_group_variables.add (name, size, alignment);
|
|
692 }
|
|
693
|
|
694 /* Finalizes the currently handled function. Should be called before
|
|
695 setting a new function. */
|
|
696
|
|
697 void
|
|
698 brig_to_generic::finish_function ()
|
|
699 {
|
|
700 if (m_cf == NULL || m_cf->m_func_decl == NULL_TREE)
|
|
701 {
|
|
702 /* It can be a finished func declaration fingerprint, in that case we
|
|
703 don't have m_func_decl. */
|
|
704 m_cf = NULL;
|
|
705 return;
|
|
706 }
|
|
707
|
|
708 if (!m_cf->m_is_kernel)
|
|
709 {
|
|
710 tree bind_expr = m_cf->m_current_bind_expr;
|
|
711 tree stmts = BIND_EXPR_BODY (bind_expr);
|
|
712 m_cf->finish ();
|
|
713 m_cf->emit_metadata (stmts);
|
|
714 dump_function (m_dump_file, m_cf);
|
|
715 }
|
|
716 else
|
|
717 /* Emit the kernel only at the very end so we can analyze the total
|
|
718 group and private memory usage. */
|
|
719 m_kernels.push_back (m_cf);
|
|
720
|
|
721 pop_cfun ();
|
|
722
|
|
723 m_finished_functions[m_cf->m_name] = m_cf;
|
|
724 m_cf = NULL;
|
|
725 }
|
|
726
|
|
727 /* Initializes a new currently handled function. */
|
|
728
|
|
729 void
|
|
730 brig_to_generic::start_function (tree f)
|
|
731 {
|
|
732 if (DECL_STRUCT_FUNCTION (f) == NULL)
|
|
733 push_struct_function (f);
|
|
734 else
|
|
735 push_cfun (DECL_STRUCT_FUNCTION (f));
|
|
736
|
|
737 m_cf->m_func_decl = f;
|
|
738 }
|
|
739
|
|
740 /* Appends a new variable to the current kernel's private segment. */
|
|
741
|
|
742 void
|
|
743 brig_to_generic::append_private_variable (const std::string &name,
|
|
744 size_t size, size_t alignment)
|
|
745 {
|
|
746 /* We need to take care of two cases of alignment with private
|
|
747 variables because of the layout where the same variable for
|
|
748 each work-item is laid out in successive addresses.
|
|
749
|
|
750 1) Ensure the first work-item's variable is in an aligned
|
|
751 offset: */
|
|
752 size_t align_padding = m_next_private_offset % alignment == 0 ?
|
|
753 0 : (alignment - m_next_private_offset % alignment);
|
|
754
|
|
755 /* 2) Each successive per-work-item copy should be aligned.
|
|
756 If the variable has wider alignment than size then we need
|
|
757 to add extra padding to ensure it. The padding must be
|
|
758 included in the size to allow per-work-item offset computation
|
|
759 to find their own aligned copy. */
|
|
760
|
|
761 size_t per_var_padding = size % alignment == 0 ?
|
|
762 0 : (alignment - size % alignment);
|
|
763 m_private_data_sizes[name] = size + per_var_padding;
|
|
764
|
|
765 m_next_private_offset += align_padding;
|
|
766 m_private_offsets[name] = m_next_private_offset;
|
|
767 m_next_private_offset += size + per_var_padding;
|
|
768 }
|
|
769
|
|
770 size_t
|
|
771 brig_to_generic::private_variable_segment_offset
|
|
772 (const std::string &name) const
|
|
773 {
|
|
774 var_offset_table::const_iterator i = m_private_offsets.find (name);
|
|
775 gcc_assert (i != m_private_offsets.end ());
|
|
776 return (*i).second;
|
|
777 }
|
|
778
|
|
779 bool
|
|
780 brig_to_generic::has_private_variable (const std::string &name) const
|
|
781 {
|
|
782 std::map<std::string, size_t>::const_iterator i
|
|
783 = m_private_data_sizes.find (name);
|
|
784 return i != m_private_data_sizes.end ();
|
|
785 }
|
|
786
|
|
787 size_t
|
|
788 brig_to_generic::private_variable_size (const std::string &name) const
|
|
789 {
|
|
790 std::map<std::string, size_t>::const_iterator i
|
|
791 = m_private_data_sizes.find (name);
|
|
792 gcc_assert (i != m_private_data_sizes.end ());
|
|
793 return (*i).second;
|
|
794 }
|
|
795
|
|
796
|
|
797 /* The size of private segment required by a single work-item executing
|
|
798 the currently processed kernel. */
|
|
799
|
|
800 size_t
|
|
801 brig_to_generic::private_segment_size () const
|
|
802 {
|
|
803 return m_next_private_offset;
|
|
804 }
|
|
805
|
|
806 /* Cached builtins indexed by name. */
|
|
807
|
|
808 typedef std::map<std::string, tree> builtin_index;
|
|
809 builtin_index builtin_cache_;
|
|
810
|
|
811 /* Build a call to a builtin function. PDECL is the builtin function to
|
|
812 call. NARGS is the number of input arguments, RETTYPE the built-in
|
|
813 functions return value type, and ... is the list of arguments passed to
|
|
814 the call with type first, then the value. */
|
|
815
|
|
816 tree
|
|
817 call_builtin (tree pdecl, int nargs, tree rettype, ...)
|
|
818 {
|
|
819 if (rettype == error_mark_node)
|
|
820 return error_mark_node;
|
|
821
|
|
822 tree *types = new tree[nargs];
|
|
823 tree *args = new tree[nargs];
|
|
824
|
|
825 va_list ap;
|
|
826 va_start (ap, rettype);
|
|
827 for (int i = 0; i < nargs; ++i)
|
|
828 {
|
|
829 types[i] = va_arg (ap, tree);
|
|
830 tree arg = va_arg (ap, tree);
|
131
|
831 args[i] = build_resize_convert_view (types[i], arg);
|
111
|
832 if (types[i] == error_mark_node || args[i] == error_mark_node)
|
|
833 {
|
|
834 delete[] types;
|
|
835 delete[] args;
|
|
836 va_end (ap);
|
|
837 return error_mark_node;
|
|
838 }
|
|
839 }
|
|
840 va_end (ap);
|
|
841
|
|
842 tree fnptr = build_fold_addr_expr (pdecl);
|
|
843
|
|
844 tree ret = build_call_array (rettype, fnptr, nargs, args);
|
|
845
|
|
846 delete[] types;
|
|
847 delete[] args;
|
|
848
|
|
849 return ret;
|
|
850 }
|
|
851
|
|
852 /* Generate all global declarations. Should be called after the last
|
|
853 BRIG has been fed in. */
|
|
854
|
|
855 void
|
|
856 brig_to_generic::write_globals ()
|
|
857 {
|
131
|
858
|
|
859 /* Replace calls to declarations with calls to definitions. Otherwise
|
|
860 inlining will fail to find the definition to inline from. */
|
|
861
|
|
862 for (size_t i = 0; i < m_decl_call.size(); ++i)
|
|
863 {
|
|
864 tree decl_call = m_decl_call.at(i);
|
|
865 tree func_decl = get_callee_fndecl (decl_call);
|
|
866 brig_function *brig_function = get_finished_function (func_decl);
|
|
867
|
|
868 if (brig_function && brig_function->m_func_decl
|
|
869 && DECL_EXTERNAL (brig_function->m_func_decl) == 0
|
|
870 && brig_function->m_func_decl != func_decl)
|
|
871 {
|
|
872
|
|
873 decl_call = CALL_EXPR_FN (decl_call);
|
|
874 STRIP_NOPS (decl_call);
|
|
875 if (TREE_CODE (decl_call) == ADDR_EXPR
|
|
876 && TREE_CODE (TREE_OPERAND (decl_call, 0)) == FUNCTION_DECL)
|
|
877 TREE_OPERAND (decl_call, 0) = brig_function->m_func_decl;
|
|
878 }
|
|
879 }
|
|
880
|
|
881 for (std::map<std::string, brig_function *>::iterator i
|
|
882 = m_finished_functions.begin(), e = m_finished_functions.end();
|
|
883 i != e; ++i)
|
|
884 {
|
|
885 brig_function *brig_f = (*i).second;
|
|
886 if (brig_f->m_is_kernel)
|
|
887 continue;
|
|
888
|
|
889 /* Finalize only at this point to allow the cgraph analysis to
|
|
890 see definitions to calls to later functions. */
|
|
891 gimplify_function_tree (brig_f->m_func_decl);
|
|
892 cgraph_node::finalize_function (brig_f->m_func_decl, true);
|
|
893 }
|
|
894
|
111
|
895 /* Now that the whole BRIG module has been processed, build a launcher
|
|
896 and a metadata section for each built kernel. */
|
|
897 for (size_t i = 0; i < m_kernels.size (); ++i)
|
|
898 {
|
|
899 brig_function *f = m_kernels[i];
|
|
900
|
|
901 /* Finish kernels now that we know the call graphs and their barrier
|
|
902 usage. */
|
|
903 f->finish_kernel ();
|
|
904
|
|
905 dump_function (m_dump_file, f);
|
|
906 gimplify_function_tree (f->m_func_decl);
|
|
907 cgraph_node::finalize_function (f->m_func_decl, true);
|
|
908
|
|
909 f->m_descriptor.is_kernel = 1;
|
|
910 /* TODO: analyze the kernel's actual private and group segment usage
|
|
911 using call graph. Now the mem size is overly
|
|
912 pessimistic in case of multiple kernels in the same module.
|
|
913 */
|
|
914 f->m_descriptor.group_segment_size = m_total_group_segment_usage;
|
|
915 f->m_descriptor.private_segment_size = private_segment_size ();
|
|
916
|
|
917 /* The kernarg size is rounded up to a multiple of 16 according to
|
|
918 the PRM specs. */
|
|
919 f->m_descriptor.kernarg_segment_size = f->m_next_kernarg_offset;
|
|
920 if (f->m_descriptor.kernarg_segment_size % 16 > 0)
|
|
921 f->m_descriptor.kernarg_segment_size
|
|
922 += 16 - f->m_next_kernarg_offset % 16;
|
|
923 f->m_descriptor.kernarg_max_align = f->m_kernarg_max_align;
|
|
924
|
|
925 tree launcher = f->emit_launcher_and_metadata ();
|
|
926
|
|
927 append_global (launcher);
|
|
928
|
131
|
929 if (m_dump_file)
|
|
930 {
|
|
931 std::string kern_name = f->m_name.substr (1);
|
|
932 fprintf (m_dump_file, "\n;; Function %s", kern_name.c_str());
|
|
933 fprintf (m_dump_file, "\n;; enabled by -%s\n\n",
|
|
934 dump_flag_name (TDI_original));
|
|
935 print_generic_decl (m_dump_file, launcher, TDF_NONE);
|
|
936 print_generic_expr (m_dump_file, DECL_SAVED_TREE (launcher),
|
|
937 TDF_NONE);
|
|
938 fprintf (m_dump_file, "\n");
|
|
939 }
|
|
940
|
111
|
941 gimplify_function_tree (launcher);
|
|
942 cgraph_node::finalize_function (launcher, true);
|
|
943 pop_cfun ();
|
|
944 }
|
|
945
|
|
946 int no_globals = list_length (m_globals);
|
|
947 tree *vec = new tree[no_globals];
|
|
948
|
|
949 int i = 0;
|
|
950 tree global = m_globals;
|
|
951 while (global)
|
|
952 {
|
|
953 vec[i] = global;
|
|
954 ++i;
|
|
955 global = TREE_CHAIN (global);
|
|
956 }
|
|
957
|
|
958 wrapup_global_declarations (vec, no_globals);
|
|
959
|
|
960 delete[] vec;
|
|
961
|
|
962 }
|
|
963
|
|
964 /* Returns an type with unsigned int elements corresponding to the
|
|
965 size and element count of ORIGINAL_TYPE. */
|
|
966
|
|
967 tree
|
|
968 get_unsigned_int_type (tree original_type)
|
|
969 {
|
|
970 if (VECTOR_TYPE_P (original_type))
|
|
971 {
|
|
972 size_t esize
|
|
973 = int_size_in_bytes (TREE_TYPE (original_type)) * BITS_PER_UNIT;
|
131
|
974 poly_uint64 ecount = TYPE_VECTOR_SUBPARTS (original_type);
|
111
|
975 return build_vector_type (build_nonstandard_integer_type (esize, true),
|
|
976 ecount);
|
|
977 }
|
|
978 else
|
|
979 return build_nonstandard_integer_type (int_size_in_bytes (original_type)
|
|
980 * BITS_PER_UNIT,
|
|
981 true);
|
|
982 }
|
|
983
|
131
|
984 /* Returns a type with unsigned int corresponding to the size
|
|
985 ORIGINAL_TYPE. */
|
|
986
|
|
987 tree
|
|
988 get_scalar_unsigned_int_type (tree original_type)
|
|
989 {
|
|
990 return build_nonstandard_integer_type (int_size_in_bytes (original_type)
|
|
991 * BITS_PER_UNIT, true);
|
|
992 }
|
|
993
|
|
994 /* Set the declaration externally visible so it won't get removed by
|
|
995 whole program optimizations. */
|
|
996
|
|
997 void
|
|
998 set_externally_visible (tree decl)
|
|
999 {
|
|
1000 if (!lookup_attribute ("externally_visible", DECL_ATTRIBUTES (decl)))
|
|
1001 DECL_ATTRIBUTES (decl) = tree_cons (get_identifier ("externally_visible"),
|
|
1002 NULL, DECL_ATTRIBUTES (decl));
|
|
1003 }
|
|
1004
|
|
1005 void
|
|
1006 set_inline (tree decl)
|
|
1007 {
|
|
1008 if (!lookup_attribute ("inline", DECL_ATTRIBUTES (decl)))
|
|
1009 DECL_ATTRIBUTES (decl) = tree_cons (get_identifier ("inline"),
|
|
1010 NULL, DECL_ATTRIBUTES (decl));
|
|
1011 }
|
|
1012
|
111
|
1013 void
|
|
1014 dump_function (FILE *dump_file, brig_function *f)
|
|
1015 {
|
|
1016 /* Dump the BRIG-specific tree IR. */
|
|
1017 if (dump_file)
|
|
1018 {
|
|
1019 fprintf (dump_file, "\n;; Function %s", f->m_name.c_str ());
|
|
1020 fprintf (dump_file, "\n;; enabled by -%s\n\n",
|
|
1021 dump_flag_name (TDI_original));
|
131
|
1022 print_generic_decl (dump_file, f->m_func_decl, TDF_NONE);
|
|
1023 print_generic_expr (dump_file, f->m_current_bind_expr, TDF_NONE);
|
111
|
1024 fprintf (dump_file, "\n");
|
|
1025 }
|
|
1026 }
|
131
|
1027
|
|
1028 /* Records use of the BRIG_REG as a TYPE in the current function. */
|
|
1029
|
|
1030 void
|
|
1031 brig_to_generic::add_reg_used_as_type (const BrigOperandRegister &brig_reg,
|
|
1032 tree type)
|
|
1033 {
|
|
1034 gcc_assert (m_cf);
|
|
1035 reg_use_info &info
|
|
1036 = m_fn_regs_use_index[m_cf->m_name][gccbrig_hsa_reg_id (brig_reg)];
|
|
1037
|
|
1038 if (info.m_type_refs_lookup.count (type))
|
|
1039 info.m_type_refs[info.m_type_refs_lookup[type]].second++;
|
|
1040 else
|
|
1041 {
|
|
1042 info.m_type_refs.push_back (std::make_pair (type, 1));
|
|
1043 info.m_type_refs_lookup[type] = info.m_type_refs.size () - 1;
|
|
1044 }
|
|
1045 }
|