111
|
1 /* HSAIL IL Register allocation and out-of-SSA.
|
145
|
2 Copyright (C) 2013-2020 Free Software Foundation, Inc.
|
111
|
3 Contributed by Michael Matz <matz@suse.de>
|
|
4
|
|
5 This file is part of GCC.
|
|
6
|
|
7 GCC is free software; you can redistribute it and/or modify
|
|
8 it under the terms of the GNU General Public License as published by
|
|
9 the Free Software Foundation; either version 3, or (at your option)
|
|
10 any later version.
|
|
11
|
|
12 GCC is distributed in the hope that it will be useful,
|
|
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
15 GNU General Public License for more details.
|
|
16
|
|
17 You should have received a copy of the GNU General Public License
|
|
18 along with GCC; see the file COPYING3. If not see
|
|
19 <http://www.gnu.org/licenses/>. */
|
|
20
|
|
21 #include "config.h"
|
|
22 #include "system.h"
|
|
23 #include "coretypes.h"
|
|
24 #include "tm.h"
|
|
25 #include "is-a.h"
|
|
26 #include "vec.h"
|
|
27 #include "tree.h"
|
|
28 #include "dominance.h"
|
|
29 #include "basic-block.h"
|
131
|
30 #include "function.h"
|
111
|
31 #include "cfganal.h"
|
131
|
32 #include "cfg.h"
|
111
|
33 #include "bitmap.h"
|
|
34 #include "dumpfile.h"
|
|
35 #include "cgraph.h"
|
|
36 #include "print-tree.h"
|
|
37 #include "cfghooks.h"
|
145
|
38 #include "alloc-pool.h"
|
111
|
39 #include "symbol-summary.h"
|
|
40 #include "hsa-common.h"
|
|
41
|
|
42
|
|
43 /* Process a PHI node PHI of basic block BB as a part of naive out-f-ssa. */
|
|
44
|
|
45 static void
|
|
46 naive_process_phi (hsa_insn_phi *phi, const vec<edge> &predecessors)
|
|
47 {
|
|
48 unsigned count = phi->operand_count ();
|
|
49 for (unsigned i = 0; i < count; i++)
|
|
50 {
|
|
51 gcc_checking_assert (phi->get_op (i));
|
|
52 hsa_op_base *op = phi->get_op (i);
|
|
53 hsa_bb *hbb;
|
|
54 edge e;
|
|
55
|
|
56 if (!op)
|
|
57 break;
|
|
58
|
|
59 e = predecessors[i];
|
|
60 if (single_succ_p (e->src))
|
|
61 hbb = hsa_bb_for_bb (e->src);
|
|
62 else
|
|
63 {
|
|
64 basic_block old_dest = e->dest;
|
|
65 hbb = hsa_init_new_bb (split_edge (e));
|
|
66
|
|
67 /* If switch insn used this edge, fix jump table. */
|
|
68 hsa_bb *source = hsa_bb_for_bb (e->src);
|
|
69 hsa_insn_sbr *sbr;
|
|
70 if (source->m_last_insn
|
|
71 && (sbr = dyn_cast <hsa_insn_sbr *> (source->m_last_insn)))
|
|
72 sbr->replace_all_labels (old_dest, hbb->m_bb);
|
|
73 }
|
|
74
|
|
75 hsa_build_append_simple_mov (phi->m_dest, op, hbb);
|
|
76 }
|
|
77 }
|
|
78
|
|
79 /* Naive out-of SSA. */
|
|
80
|
|
81 static void
|
|
82 naive_outof_ssa (void)
|
|
83 {
|
|
84 basic_block bb;
|
|
85
|
|
86 hsa_cfun->m_in_ssa = false;
|
|
87
|
|
88 FOR_ALL_BB_FN (bb, cfun)
|
|
89 {
|
|
90 hsa_bb *hbb = hsa_bb_for_bb (bb);
|
|
91 hsa_insn_phi *phi;
|
|
92
|
|
93 /* naive_process_phi can call split_edge on an incoming edge which order if
|
|
94 the incoming edges to the basic block and thus make it inconsistent with
|
|
95 the ordering of PHI arguments, so we collect them in advance. */
|
|
96 auto_vec<edge, 8> predecessors;
|
|
97 unsigned pred_count = EDGE_COUNT (bb->preds);
|
|
98 for (unsigned i = 0; i < pred_count; i++)
|
|
99 predecessors.safe_push (EDGE_PRED (bb, i));
|
|
100
|
|
101 for (phi = hbb->m_first_phi;
|
|
102 phi;
|
|
103 phi = phi->m_next ? as_a <hsa_insn_phi *> (phi->m_next) : NULL)
|
|
104 naive_process_phi (phi, predecessors);
|
|
105
|
|
106 /* Zap PHI nodes, they will be deallocated when everything else will. */
|
|
107 hbb->m_first_phi = NULL;
|
|
108 hbb->m_last_phi = NULL;
|
|
109 }
|
|
110 }
|
|
111
|
|
112 /* Return register class number for the given HSA TYPE. 0 means the 'c' one
|
|
113 bit register class, 1 means 's' 32 bit class, 2 stands for 'd' 64 bit class
|
|
114 and 3 for 'q' 128 bit class. */
|
|
115
|
|
116 static int
|
|
117 m_reg_class_for_type (BrigType16_t type)
|
|
118 {
|
|
119 switch (type)
|
|
120 {
|
|
121 case BRIG_TYPE_B1:
|
|
122 return 0;
|
|
123
|
|
124 case BRIG_TYPE_U8:
|
|
125 case BRIG_TYPE_U16:
|
|
126 case BRIG_TYPE_U32:
|
|
127 case BRIG_TYPE_S8:
|
|
128 case BRIG_TYPE_S16:
|
|
129 case BRIG_TYPE_S32:
|
|
130 case BRIG_TYPE_F16:
|
|
131 case BRIG_TYPE_F32:
|
|
132 case BRIG_TYPE_B8:
|
|
133 case BRIG_TYPE_B16:
|
|
134 case BRIG_TYPE_B32:
|
|
135 case BRIG_TYPE_U8X4:
|
|
136 case BRIG_TYPE_S8X4:
|
|
137 case BRIG_TYPE_U16X2:
|
|
138 case BRIG_TYPE_S16X2:
|
|
139 case BRIG_TYPE_F16X2:
|
|
140 return 1;
|
|
141
|
|
142 case BRIG_TYPE_U64:
|
|
143 case BRIG_TYPE_S64:
|
|
144 case BRIG_TYPE_F64:
|
|
145 case BRIG_TYPE_B64:
|
|
146 case BRIG_TYPE_U8X8:
|
|
147 case BRIG_TYPE_S8X8:
|
|
148 case BRIG_TYPE_U16X4:
|
|
149 case BRIG_TYPE_S16X4:
|
|
150 case BRIG_TYPE_F16X4:
|
|
151 case BRIG_TYPE_U32X2:
|
|
152 case BRIG_TYPE_S32X2:
|
|
153 case BRIG_TYPE_F32X2:
|
|
154 return 2;
|
|
155
|
|
156 case BRIG_TYPE_B128:
|
|
157 case BRIG_TYPE_U8X16:
|
|
158 case BRIG_TYPE_S8X16:
|
|
159 case BRIG_TYPE_U16X8:
|
|
160 case BRIG_TYPE_S16X8:
|
|
161 case BRIG_TYPE_F16X8:
|
|
162 case BRIG_TYPE_U32X4:
|
|
163 case BRIG_TYPE_U64X2:
|
|
164 case BRIG_TYPE_S32X4:
|
|
165 case BRIG_TYPE_S64X2:
|
|
166 case BRIG_TYPE_F32X4:
|
|
167 case BRIG_TYPE_F64X2:
|
|
168 return 3;
|
|
169
|
|
170 default:
|
|
171 gcc_unreachable ();
|
|
172 }
|
|
173 }
|
|
174
|
|
175 /* If the Ith operands of INSN is or contains a register (in an address),
|
|
176 return the address of that register operand. If not return NULL. */
|
|
177
|
|
178 static hsa_op_reg **
|
|
179 insn_reg_addr (hsa_insn_basic *insn, int i)
|
|
180 {
|
|
181 hsa_op_base *op = insn->get_op (i);
|
|
182 if (!op)
|
|
183 return NULL;
|
|
184 hsa_op_reg *reg = dyn_cast <hsa_op_reg *> (op);
|
|
185 if (reg)
|
|
186 return (hsa_op_reg **) insn->get_op_addr (i);
|
|
187 hsa_op_address *addr = dyn_cast <hsa_op_address *> (op);
|
|
188 if (addr && addr->m_reg)
|
|
189 return &addr->m_reg;
|
|
190 return NULL;
|
|
191 }
|
|
192
|
|
193 struct m_reg_class_desc
|
|
194 {
|
|
195 unsigned next_avail, max_num;
|
|
196 unsigned used_num, max_used;
|
|
197 uint64_t used[2];
|
|
198 char cl_char;
|
|
199 };
|
|
200
|
|
201 /* Rewrite the instructions in BB to observe spilled live ranges.
|
|
202 CLASSES is the global register class state. */
|
|
203
|
|
204 static void
|
|
205 rewrite_code_bb (basic_block bb, struct m_reg_class_desc *classes)
|
|
206 {
|
|
207 hsa_bb *hbb = hsa_bb_for_bb (bb);
|
|
208 hsa_insn_basic *insn, *next_insn;
|
|
209
|
|
210 for (insn = hbb->m_first_insn; insn; insn = next_insn)
|
|
211 {
|
|
212 next_insn = insn->m_next;
|
|
213 unsigned count = insn->operand_count ();
|
|
214 for (unsigned i = 0; i < count; i++)
|
|
215 {
|
|
216 gcc_checking_assert (insn->get_op (i));
|
|
217 hsa_op_reg **regaddr = insn_reg_addr (insn, i);
|
|
218
|
|
219 if (regaddr)
|
|
220 {
|
|
221 hsa_op_reg *reg = *regaddr;
|
|
222 if (reg->m_reg_class)
|
|
223 continue;
|
|
224 gcc_assert (reg->m_spill_sym);
|
|
225
|
|
226 int cl = m_reg_class_for_type (reg->m_type);
|
|
227 hsa_op_reg *tmp, *tmp2;
|
|
228 if (insn->op_output_p (i))
|
|
229 tmp = hsa_spill_out (insn, reg, &tmp2);
|
|
230 else
|
|
231 tmp = hsa_spill_in (insn, reg, &tmp2);
|
|
232
|
|
233 *regaddr = tmp;
|
|
234
|
|
235 tmp->m_reg_class = classes[cl].cl_char;
|
|
236 tmp->m_hard_num = (char) (classes[cl].max_num + i);
|
|
237 if (tmp2)
|
|
238 {
|
|
239 gcc_assert (cl == 0);
|
|
240 tmp2->m_reg_class = classes[1].cl_char;
|
|
241 tmp2->m_hard_num = (char) (classes[1].max_num + i);
|
|
242 }
|
|
243 }
|
|
244 }
|
|
245 }
|
|
246 }
|
|
247
|
|
248 /* Dump current function to dump file F, with info specific
|
|
249 to register allocation. */
|
|
250
|
|
251 void
|
|
252 dump_hsa_cfun_regalloc (FILE *f)
|
|
253 {
|
|
254 basic_block bb;
|
|
255
|
|
256 fprintf (f, "\nHSAIL IL for %s\n", hsa_cfun->m_name);
|
|
257
|
|
258 FOR_ALL_BB_FN (bb, cfun)
|
|
259 {
|
145
|
260 hsa_bb *hbb = (class hsa_bb *) bb->aux;
|
111
|
261 bitmap_print (dump_file, hbb->m_livein, "m_livein ", "\n");
|
|
262 dump_hsa_bb (f, hbb);
|
|
263 bitmap_print (dump_file, hbb->m_liveout, "m_liveout ", "\n");
|
|
264 }
|
|
265 }
|
|
266
|
|
267 /* Given the global register allocation state CLASSES and a
|
|
268 register REG, try to give it a hardware register. If successful,
|
|
269 store that hardreg in REG and return it, otherwise return -1.
|
|
270 Also changes CLASSES to accommodate for the allocated register. */
|
|
271
|
|
272 static int
|
|
273 try_alloc_reg (struct m_reg_class_desc *classes, hsa_op_reg *reg)
|
|
274 {
|
|
275 int cl = m_reg_class_for_type (reg->m_type);
|
|
276 int ret = -1;
|
|
277 if (classes[1].used_num + classes[2].used_num * 2 + classes[3].used_num * 4
|
|
278 >= 128 - 5)
|
|
279 return -1;
|
|
280 if (classes[cl].used_num < classes[cl].max_num)
|
|
281 {
|
|
282 unsigned int i;
|
|
283 classes[cl].used_num++;
|
|
284 if (classes[cl].used_num > classes[cl].max_used)
|
|
285 classes[cl].max_used = classes[cl].used_num;
|
|
286 for (i = 0; i < classes[cl].used_num; i++)
|
|
287 if (! (classes[cl].used[i / 64] & (((uint64_t)1) << (i & 63))))
|
|
288 break;
|
|
289 ret = i;
|
|
290 classes[cl].used[i / 64] |= (((uint64_t)1) << (i & 63));
|
|
291 reg->m_reg_class = classes[cl].cl_char;
|
|
292 reg->m_hard_num = i;
|
|
293 }
|
|
294 return ret;
|
|
295 }
|
|
296
|
|
297 /* Free up hardregs used by REG, into allocation state CLASSES. */
|
|
298
|
|
299 static void
|
|
300 free_reg (struct m_reg_class_desc *classes, hsa_op_reg *reg)
|
|
301 {
|
|
302 int cl = m_reg_class_for_type (reg->m_type);
|
|
303 int ret = reg->m_hard_num;
|
|
304 gcc_assert (reg->m_reg_class == classes[cl].cl_char);
|
|
305 classes[cl].used_num--;
|
|
306 classes[cl].used[ret / 64] &= ~(((uint64_t)1) << (ret & 63));
|
|
307 }
|
|
308
|
|
309 /* Note that the live range for REG ends at least at END. */
|
|
310
|
|
311 static void
|
|
312 note_lr_end (hsa_op_reg *reg, int end)
|
|
313 {
|
|
314 if (reg->m_lr_end < end)
|
|
315 reg->m_lr_end = end;
|
|
316 }
|
|
317
|
|
318 /* Note that the live range for REG starts at least at BEGIN. */
|
|
319
|
|
320 static void
|
|
321 note_lr_begin (hsa_op_reg *reg, int begin)
|
|
322 {
|
|
323 if (reg->m_lr_begin > begin)
|
|
324 reg->m_lr_begin = begin;
|
|
325 }
|
|
326
|
|
327 /* Given two registers A and B, return -1, 0 or 1 if A's live range
|
|
328 starts before, at or after B's live range. */
|
|
329
|
|
330 static int
|
|
331 cmp_begin (const void *a, const void *b)
|
|
332 {
|
|
333 const hsa_op_reg * const *rega = (const hsa_op_reg * const *)a;
|
|
334 const hsa_op_reg * const *regb = (const hsa_op_reg * const *)b;
|
|
335 int ret;
|
|
336 if (rega == regb)
|
|
337 return 0;
|
|
338 ret = (*rega)->m_lr_begin - (*regb)->m_lr_begin;
|
|
339 if (ret)
|
|
340 return ret;
|
|
341 return ((*rega)->m_order - (*regb)->m_order);
|
|
342 }
|
|
343
|
|
344 /* Given two registers REGA and REGB, return true if REGA's
|
|
345 live range ends after REGB's. This results in a sorting order
|
|
346 with earlier end points at the end. */
|
|
347
|
|
348 static bool
|
|
349 cmp_end (hsa_op_reg * const ®a, hsa_op_reg * const ®b)
|
|
350 {
|
|
351 int ret;
|
|
352 if (rega == regb)
|
|
353 return false;
|
|
354 ret = (regb)->m_lr_end - (rega)->m_lr_end;
|
|
355 if (ret)
|
|
356 return ret < 0;
|
|
357 return (((regb)->m_order - (rega)->m_order)) < 0;
|
|
358 }
|
|
359
|
|
360 /* Expire all old intervals in ACTIVE (a per-regclass vector),
|
|
361 that is, those that end before the interval REG starts. Give
|
|
362 back resources freed so into the state CLASSES. */
|
|
363
|
|
364 static void
|
|
365 expire_old_intervals (hsa_op_reg *reg, vec<hsa_op_reg*> *active,
|
|
366 struct m_reg_class_desc *classes)
|
|
367 {
|
|
368 for (int i = 0; i < 4; i++)
|
|
369 while (!active[i].is_empty ())
|
|
370 {
|
|
371 hsa_op_reg *a = active[i].pop ();
|
|
372 if (a->m_lr_end > reg->m_lr_begin)
|
|
373 {
|
|
374 active[i].quick_push (a);
|
|
375 break;
|
|
376 }
|
|
377 free_reg (classes, a);
|
|
378 }
|
|
379 }
|
|
380
|
|
381 /* The interval REG didn't get a hardreg. Spill it or one of those
|
|
382 from ACTIVE (if the latter, then REG will become allocated to the
|
|
383 hardreg that formerly was used by it). */
|
|
384
|
|
385 static void
|
|
386 spill_at_interval (hsa_op_reg *reg, vec<hsa_op_reg*> *active)
|
|
387 {
|
|
388 int cl = m_reg_class_for_type (reg->m_type);
|
|
389 gcc_assert (!active[cl].is_empty ());
|
|
390 hsa_op_reg *cand = active[cl][0];
|
|
391 if (cand->m_lr_end > reg->m_lr_end)
|
|
392 {
|
|
393 reg->m_reg_class = cand->m_reg_class;
|
|
394 reg->m_hard_num = cand->m_hard_num;
|
|
395 active[cl].ordered_remove (0);
|
|
396 unsigned place = active[cl].lower_bound (reg, cmp_end);
|
|
397 active[cl].quick_insert (place, reg);
|
|
398 }
|
|
399 else
|
|
400 cand = reg;
|
|
401
|
|
402 gcc_assert (!cand->m_spill_sym);
|
|
403 BrigType16_t type = cand->m_type;
|
|
404 if (type == BRIG_TYPE_B1)
|
|
405 type = BRIG_TYPE_U8;
|
|
406 cand->m_reg_class = 0;
|
|
407 cand->m_spill_sym = hsa_get_spill_symbol (type);
|
|
408 cand->m_spill_sym->m_name_number = cand->m_order;
|
|
409 }
|
|
410
|
|
411 /* Given the global register state CLASSES allocate all HSA virtual
|
|
412 registers either to hardregs or to a spill symbol. */
|
|
413
|
|
414 static void
|
|
415 linear_scan_regalloc (struct m_reg_class_desc *classes)
|
|
416 {
|
|
417 /* Compute liveness. */
|
|
418 bool changed;
|
|
419 int i, n;
|
|
420 int insn_order;
|
|
421 int *bbs = XNEWVEC (int, n_basic_blocks_for_fn (cfun));
|
|
422 bitmap work = BITMAP_ALLOC (NULL);
|
|
423 vec<hsa_op_reg*> ind2reg = vNULL;
|
|
424 vec<hsa_op_reg*> active[4] = {vNULL, vNULL, vNULL, vNULL};
|
|
425 hsa_insn_basic *m_last_insn;
|
|
426
|
|
427 /* We will need the reverse post order for linearization,
|
|
428 and the post order for liveness analysis, which is the same
|
|
429 backward. */
|
|
430 n = pre_and_rev_post_order_compute (NULL, bbs, true);
|
|
431 ind2reg.safe_grow_cleared (hsa_cfun->m_reg_count);
|
|
432
|
|
433 /* Give all instructions a linearized number, at the same time
|
|
434 build a mapping from register index to register. */
|
|
435 insn_order = 1;
|
|
436 for (i = 0; i < n; i++)
|
|
437 {
|
|
438 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]);
|
|
439 hsa_bb *hbb = hsa_bb_for_bb (bb);
|
|
440 hsa_insn_basic *insn;
|
|
441 for (insn = hbb->m_first_insn; insn; insn = insn->m_next)
|
|
442 {
|
|
443 unsigned opi;
|
|
444 insn->m_number = insn_order++;
|
|
445 for (opi = 0; opi < insn->operand_count (); opi++)
|
|
446 {
|
|
447 gcc_checking_assert (insn->get_op (opi));
|
|
448 hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
|
|
449 if (regaddr)
|
|
450 ind2reg[(*regaddr)->m_order] = *regaddr;
|
|
451 }
|
|
452 }
|
|
453 }
|
|
454
|
|
455 /* Initialize all live ranges to [after-end, 0). */
|
|
456 for (i = 0; i < hsa_cfun->m_reg_count; i++)
|
|
457 if (ind2reg[i])
|
|
458 ind2reg[i]->m_lr_begin = insn_order, ind2reg[i]->m_lr_end = 0;
|
|
459
|
|
460 /* Classic liveness analysis, as long as something changes:
|
|
461 m_liveout is union (m_livein of successors)
|
|
462 m_livein is m_liveout minus defs plus uses. */
|
|
463 do
|
|
464 {
|
|
465 changed = false;
|
|
466 for (i = n - 1; i >= 0; i--)
|
|
467 {
|
|
468 edge e;
|
|
469 edge_iterator ei;
|
|
470 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]);
|
|
471 hsa_bb *hbb = hsa_bb_for_bb (bb);
|
|
472
|
|
473 /* Union of successors m_livein (or empty if none). */
|
|
474 bool first = true;
|
|
475 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
476 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
|
|
477 {
|
|
478 hsa_bb *succ = hsa_bb_for_bb (e->dest);
|
|
479 if (first)
|
|
480 {
|
|
481 bitmap_copy (work, succ->m_livein);
|
|
482 first = false;
|
|
483 }
|
|
484 else
|
|
485 bitmap_ior_into (work, succ->m_livein);
|
|
486 }
|
|
487 if (first)
|
|
488 bitmap_clear (work);
|
|
489
|
|
490 bitmap_copy (hbb->m_liveout, work);
|
|
491
|
|
492 /* Remove defs, include uses in a backward insn walk. */
|
|
493 hsa_insn_basic *insn;
|
|
494 for (insn = hbb->m_last_insn; insn; insn = insn->m_prev)
|
|
495 {
|
|
496 unsigned opi;
|
|
497 unsigned ndefs = insn->input_count ();
|
|
498 for (opi = 0; opi < ndefs && insn->get_op (opi); opi++)
|
|
499 {
|
|
500 gcc_checking_assert (insn->get_op (opi));
|
|
501 hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
|
|
502 if (regaddr)
|
|
503 bitmap_clear_bit (work, (*regaddr)->m_order);
|
|
504 }
|
|
505 for (; opi < insn->operand_count (); opi++)
|
|
506 {
|
|
507 gcc_checking_assert (insn->get_op (opi));
|
|
508 hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
|
|
509 if (regaddr)
|
|
510 bitmap_set_bit (work, (*regaddr)->m_order);
|
|
511 }
|
|
512 }
|
|
513
|
|
514 /* Note if that changed something. */
|
|
515 if (bitmap_ior_into (hbb->m_livein, work))
|
|
516 changed = true;
|
|
517 }
|
|
518 }
|
|
519 while (changed);
|
|
520
|
|
521 /* Make one pass through all instructions in linear order,
|
|
522 noting and merging possible live range start and end points. */
|
|
523 m_last_insn = NULL;
|
|
524 for (i = n - 1; i >= 0; i--)
|
|
525 {
|
|
526 basic_block bb = BASIC_BLOCK_FOR_FN (cfun, bbs[i]);
|
|
527 hsa_bb *hbb = hsa_bb_for_bb (bb);
|
|
528 hsa_insn_basic *insn;
|
|
529 int after_end_number;
|
|
530 unsigned bit;
|
|
531 bitmap_iterator bi;
|
|
532
|
|
533 if (m_last_insn)
|
|
534 after_end_number = m_last_insn->m_number;
|
|
535 else
|
|
536 after_end_number = insn_order;
|
|
537 /* Everything live-out in this BB has at least an end point
|
|
538 after us. */
|
|
539 EXECUTE_IF_SET_IN_BITMAP (hbb->m_liveout, 0, bit, bi)
|
|
540 note_lr_end (ind2reg[bit], after_end_number);
|
|
541
|
|
542 for (insn = hbb->m_last_insn; insn; insn = insn->m_prev)
|
|
543 {
|
|
544 unsigned opi;
|
|
545 unsigned ndefs = insn->input_count ();
|
|
546 for (opi = 0; opi < insn->operand_count (); opi++)
|
|
547 {
|
|
548 gcc_checking_assert (insn->get_op (opi));
|
|
549 hsa_op_reg **regaddr = insn_reg_addr (insn, opi);
|
|
550 if (regaddr)
|
|
551 {
|
|
552 hsa_op_reg *reg = *regaddr;
|
|
553 if (opi < ndefs)
|
|
554 note_lr_begin (reg, insn->m_number);
|
|
555 else
|
|
556 note_lr_end (reg, insn->m_number);
|
|
557 }
|
|
558 }
|
|
559 }
|
|
560
|
|
561 /* Everything live-in in this BB has a start point before
|
|
562 our first insn. */
|
|
563 int before_start_number;
|
|
564 if (hbb->m_first_insn)
|
|
565 before_start_number = hbb->m_first_insn->m_number;
|
|
566 else
|
|
567 before_start_number = after_end_number;
|
|
568 before_start_number--;
|
|
569 EXECUTE_IF_SET_IN_BITMAP (hbb->m_livein, 0, bit, bi)
|
|
570 note_lr_begin (ind2reg[bit], before_start_number);
|
|
571
|
|
572 if (hbb->m_first_insn)
|
|
573 m_last_insn = hbb->m_first_insn;
|
|
574 }
|
|
575
|
|
576 for (i = 0; i < hsa_cfun->m_reg_count; i++)
|
|
577 if (ind2reg[i])
|
|
578 {
|
|
579 /* All regs that have still their start at after all code actually
|
|
580 are defined at the start of the routine (prologue). */
|
|
581 if (ind2reg[i]->m_lr_begin == insn_order)
|
|
582 ind2reg[i]->m_lr_begin = 0;
|
|
583 /* All regs that have no use but a def will have lr_end == 0,
|
|
584 they are actually live from def until after the insn they are
|
|
585 defined in. */
|
|
586 if (ind2reg[i]->m_lr_end == 0)
|
|
587 ind2reg[i]->m_lr_end = ind2reg[i]->m_lr_begin + 1;
|
|
588 }
|
|
589
|
|
590 /* Sort all intervals by increasing start point. */
|
|
591 gcc_assert (ind2reg.length () == (size_t) hsa_cfun->m_reg_count);
|
|
592
|
|
593 if (flag_checking)
|
|
594 for (unsigned i = 0; i < ind2reg.length (); i++)
|
|
595 gcc_assert (ind2reg[i]);
|
|
596
|
|
597 ind2reg.qsort (cmp_begin);
|
|
598 for (i = 0; i < 4; i++)
|
|
599 active[i].reserve_exact (hsa_cfun->m_reg_count);
|
|
600
|
|
601 /* Now comes the linear scan allocation. */
|
|
602 for (i = 0; i < hsa_cfun->m_reg_count; i++)
|
|
603 {
|
|
604 hsa_op_reg *reg = ind2reg[i];
|
|
605 if (!reg)
|
|
606 continue;
|
|
607 expire_old_intervals (reg, active, classes);
|
|
608 int cl = m_reg_class_for_type (reg->m_type);
|
|
609 if (try_alloc_reg (classes, reg) >= 0)
|
|
610 {
|
|
611 unsigned place = active[cl].lower_bound (reg, cmp_end);
|
|
612 active[cl].quick_insert (place, reg);
|
|
613 }
|
|
614 else
|
|
615 spill_at_interval (reg, active);
|
|
616
|
|
617 /* Some interesting dumping as we go. */
|
|
618 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
619 {
|
|
620 fprintf (dump_file, " reg%d: [%5d, %5d)->",
|
|
621 reg->m_order, reg->m_lr_begin, reg->m_lr_end);
|
|
622 if (reg->m_reg_class)
|
|
623 fprintf (dump_file, "$%c%i", reg->m_reg_class, reg->m_hard_num);
|
|
624 else
|
|
625 fprintf (dump_file, "[%%__%s_%i]",
|
|
626 hsa_seg_name (reg->m_spill_sym->m_segment),
|
|
627 reg->m_spill_sym->m_name_number);
|
|
628 for (int cl = 0; cl < 4; cl++)
|
|
629 {
|
|
630 bool first = true;
|
|
631 hsa_op_reg *r;
|
|
632 fprintf (dump_file, " {");
|
|
633 for (int j = 0; active[cl].iterate (j, &r); j++)
|
|
634 if (first)
|
|
635 {
|
|
636 fprintf (dump_file, "%d", r->m_order);
|
|
637 first = false;
|
|
638 }
|
|
639 else
|
|
640 fprintf (dump_file, ", %d", r->m_order);
|
|
641 fprintf (dump_file, "}");
|
|
642 }
|
|
643 fprintf (dump_file, "\n");
|
|
644 }
|
|
645 }
|
|
646
|
|
647 BITMAP_FREE (work);
|
|
648 free (bbs);
|
|
649
|
|
650 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
651 {
|
|
652 fprintf (dump_file, "------- After liveness: -------\n");
|
|
653 dump_hsa_cfun_regalloc (dump_file);
|
|
654 fprintf (dump_file, " ----- Intervals:\n");
|
|
655 for (i = 0; i < hsa_cfun->m_reg_count; i++)
|
|
656 {
|
|
657 hsa_op_reg *reg = ind2reg[i];
|
|
658 if (!reg)
|
|
659 continue;
|
|
660 fprintf (dump_file, " reg%d: [%5d, %5d)->", reg->m_order,
|
|
661 reg->m_lr_begin, reg->m_lr_end);
|
|
662 if (reg->m_reg_class)
|
|
663 fprintf (dump_file, "$%c%i\n", reg->m_reg_class, reg->m_hard_num);
|
|
664 else
|
|
665 fprintf (dump_file, "[%%__%s_%i]\n",
|
|
666 hsa_seg_name (reg->m_spill_sym->m_segment),
|
|
667 reg->m_spill_sym->m_name_number);
|
|
668 }
|
|
669 }
|
|
670
|
|
671 for (i = 0; i < 4; i++)
|
|
672 active[i].release ();
|
|
673 ind2reg.release ();
|
|
674 }
|
|
675
|
|
676 /* Entry point for register allocation. */
|
|
677
|
|
678 static void
|
|
679 regalloc (void)
|
|
680 {
|
|
681 basic_block bb;
|
|
682 m_reg_class_desc classes[4];
|
|
683
|
|
684 /* If there are no registers used in the function, exit right away. */
|
|
685 if (hsa_cfun->m_reg_count == 0)
|
|
686 return;
|
|
687
|
|
688 memset (classes, 0, sizeof (classes));
|
|
689 classes[0].next_avail = 0;
|
|
690 classes[0].max_num = 7;
|
|
691 classes[0].cl_char = 'c';
|
|
692 classes[1].cl_char = 's';
|
|
693 classes[2].cl_char = 'd';
|
|
694 classes[3].cl_char = 'q';
|
|
695
|
|
696 for (int i = 1; i < 4; i++)
|
|
697 {
|
|
698 classes[i].next_avail = 0;
|
|
699 classes[i].max_num = 20;
|
|
700 }
|
|
701
|
|
702 linear_scan_regalloc (classes);
|
|
703
|
|
704 FOR_ALL_BB_FN (bb, cfun)
|
|
705 rewrite_code_bb (bb, classes);
|
|
706 }
|
|
707
|
|
708 /* Out of SSA and register allocation on HSAIL IL. */
|
|
709
|
|
710 void
|
|
711 hsa_regalloc (void)
|
|
712 {
|
|
713 hsa_cfun->update_dominance ();
|
|
714 naive_outof_ssa ();
|
|
715
|
|
716 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
717 {
|
|
718 fprintf (dump_file, "------- After out-of-SSA: -------\n");
|
|
719 dump_hsa_cfun (dump_file);
|
|
720 }
|
|
721
|
|
722 regalloc ();
|
|
723
|
|
724 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
725 {
|
|
726 fprintf (dump_file, "------- After register allocation: -------\n");
|
|
727 dump_hsa_cfun (dump_file);
|
|
728 }
|
|
729 }
|