comparison gcc/config/aarch64/falkor-tag-collision-avoidance.c @ 131:84e7813d76e9

gcc-8.2
author mir3636
date Thu, 25 Oct 2018 07:37:49 +0900
parents
children 1830386684a0
comparison
equal deleted inserted replaced
111:04ced10e8804 131:84e7813d76e9
1 /* Tag Collision Avoidance pass for Falkor.
2 Copyright (C) 2018 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
9 any later version.
10
11 GCC is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 #define IN_TARGET_CODE 1
21
22 #include "config.h"
23 #define INCLUDE_LIST
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "tree-pass.h"
31 #include "aarch64-protos.h"
32 #include "hash-map.h"
33 #include "cfgloop.h"
34 #include "cfgrtl.h"
35 #include "rtl-iter.h"
36 #include "df.h"
37 #include "memmodel.h"
38 #include "optabs.h"
39 #include "regs.h"
40 #include "recog.h"
41 #include "regrename.h"
42 #include "print-rtl.h"
43
44 /* The Falkor hardware prefetching system uses the encoding of the registers
45 and offsets of loads to decide which of the multiple hardware prefetchers to
46 assign the load to. This has the positive effect of accelerating prefetches
47 when all related loads with uniform strides are assigned to the same
48 prefetcher unit. The down side is that because of the way the assignment
49 works, multiple unrelated loads may end up on the same prefetch unit, thus
50 causing the unit to bounce between different sets of addresses and never
51 train correctly. The point of this pass is to avoid such collisions so that
52 unrelated loads are spread out to different prefetchers. It also makes a
53 rudimentary attempt to ensure that related loads with the same tags don't
54 get moved out unnecessarily.
55
56 Perhaps a future enhancement would be to make a more concerted attempt to
57 get related loads under the same tag. See the memcpy/memset implementation
58 for falkor in glibc to understand the kind of impact this can have on
59 falkor.
60
61 The assignment of loads is based on a tag that is computed from the encoding
62 of the first destination register (the only destination in case of LDR), the
63 base register and the offset (either the register or the immediate value, as
64 encoded in the instruction). This is what the 14 bit tag looks like:
65
66 |<- 6 bits ->|<- 4b ->|<- 4b ->|
67 --------------------------------
68 | OFFSET | SRC | DST |
69 --------------------------------
70
71 For all cases, the SRC and DST are the 4 LSB of the encoding of the register
72 in the instruction. Offset computation is more involved and is as follows:
73
74 - For register offset addressing: 4 LSB of the offset register with the MSB
75 of the 6 bits set to 1.
76
77 - For immediate offset: 4 LSB of the encoded immediate offset. The encoding
78 depends on the width of the load and is expressed as multiples of the
79 width.
80
81 - For loads with update: 4 LSB of the offset. The encoding here is the
82 exact number by which the base is offset and incremented.
83
84 Based on the above it is clear that registers 0 and 16 will result in
85 collisions, 1 and 17 and so on. This pass detects such collisions within a
86 def/use chain of the source register in a loop and tries to resolve the
87 collision by renaming one of the destination registers. */
88
89 /* Get the destination part of the tag. */
90 #define TAG_GET_DEST(__tag) ((__tag) & 0xf)
91
92 /* Get the tag with the destination part updated. */
93 #define TAG_UPDATE_DEST(__tag, __dest) (((__tag) & ~0xf) | (__dest & 0xf))
94
95 #define MAX_PREFETCH_STRIDE 2048
96
97 /* The instruction information structure. This is used to cache information
98 about the INSN that we derive when traversing through all of the insns in
99 loops. */
100 class tag_insn_info
101 {
102 public:
103 rtx_insn *insn;
104 rtx dest;
105 rtx base;
106 rtx offset;
107 bool writeback;
108 bool ldp;
109
110 tag_insn_info (rtx_insn *i, rtx d, rtx b, rtx o, bool w, bool p)
111 : insn (i), dest (d), base (b), offset (o), writeback (w), ldp (p)
112 {}
113
114 /* Compute the tag based on BASE, DEST and OFFSET of the load. */
115 unsigned tag ()
116 {
117 unsigned int_offset = 0;
118 rtx offset = this->offset;
119 unsigned dest = REGNO (this->dest);
120 unsigned base = REGNO (this->base);
121 machine_mode dest_mode = GET_MODE (this->dest);
122
123 /* Falkor does not support SVE; GET_LOAD_INFO ensures that the
124 destination mode is constant here. */
125 unsigned dest_mode_size = GET_MODE_SIZE (dest_mode).to_constant ();
126
127 /* For loads of larger than 16 bytes, the DEST part of the tag is 0. */
128 if ((dest_mode_size << this->ldp) > 16)
129 dest = 0;
130
131 if (offset && REG_P (offset))
132 int_offset = (1 << 5) | REGNO (offset);
133 else if (offset && CONST_INT_P (offset))
134 {
135 int_offset = INTVAL (offset);
136 int_offset /= dest_mode_size;
137 if (!this->writeback)
138 int_offset >>= 2;
139 }
140 return ((dest & 0xf)
141 | ((base & 0xf) << 4)
142 | ((int_offset & 0x3f) << 8));
143 }
144 };
145
146 /* Hash map to traverse and process instructions with colliding tags. */
147 typedef hash_map <rtx, auto_vec <tag_insn_info *> > tag_map_t;
148
149 /* Vector of instructions with colliding tags. */
150 typedef auto_vec <tag_insn_info *> insn_info_list_t;
151
152 /* Pair of instruction information and unavailable register set to pass to
153 CHECK_COLLIDING_TAGS. */
154 typedef std::pair <tag_insn_info *, HARD_REG_SET *> arg_pair_t;
155
156
157 /* Callback to free all tag_insn_info objects. */
158 bool
159 free_insn_info (const rtx &t ATTRIBUTE_UNUSED, insn_info_list_t *v,
160 void *arg ATTRIBUTE_UNUSED)
161 {
162 while (v->length () > 0)
163 delete v->pop ();
164
165 return true;
166 }
167
168
169 /* Add all aliases of the register to the unavailable register set. REG is the
170 smallest register number that can then be used to reference its aliases.
171 UNAVAILABLE is the hard register set to add the ignored register numbers to
172 and MODE is the mode in which the registers would have been used. */
173 static void
174 ignore_all_aliases (HARD_REG_SET *unavailable, machine_mode mode, unsigned reg)
175 {
176 add_to_hard_reg_set (unavailable, mode, reg);
177 add_to_hard_reg_set (unavailable, mode, reg + 16);
178 add_to_hard_reg_set (unavailable, mode, reg + 32);
179 add_to_hard_reg_set (unavailable, mode, reg + 48);
180 }
181
182
183 /* Callback to check which destination registers are unavailable to us for
184 renaming because of the base and offset colliding. This is a callback that
185 gets called for every name value pair (T, V) in the TAG_MAP. The ARG is an
186 std::pair of the tag_insn_info of the original insn and the hard register
187 set UNAVAILABLE that is used to record hard register numbers that cannot be
188 used for the renaming. This always returns true since we want to traverse
189 through the entire TAG_MAP. */
190 bool
191 check_colliding_tags (const rtx &t, const insn_info_list_t &v, arg_pair_t *arg)
192 {
193 HARD_REG_SET *unavailable = arg->second;
194 unsigned orig_tag = arg->first->tag ();
195 unsigned tag = INTVAL (t);
196 machine_mode mode = GET_MODE (arg->first->dest);
197
198 /* Can't collide with emptiness. */
199 if (v.length () == 0)
200 return true;
201
202 /* Drop all aliased destination registers that result in the same
203 tag. It is not necessary to drop all of them but we do anyway
204 because it is quicker than checking ranges. */
205 if (TAG_UPDATE_DEST (tag, 0) == TAG_UPDATE_DEST (orig_tag, 0))
206 ignore_all_aliases (unavailable, mode, TAG_GET_DEST (tag));
207
208 return true;
209 }
210
211
212 /* Initialize and build a set of hard register numbers UNAVAILABLE to avoid for
213 renaming. INSN_INFO is the original insn, TAG_MAP is the map of the list of
214 insns indexed by their tags, HEAD is the def/use chain head of the
215 destination register of the original insn. The routine returns the super
216 class of register classes that may be used during the renaming. */
217 static enum reg_class
218 init_unavailable (tag_insn_info *insn_info, tag_map_t &tag_map, du_head_p head,
219 HARD_REG_SET *unavailable)
220 {
221 unsigned dest = head->regno;
222 enum reg_class super_class = NO_REGS;
223 machine_mode mode = GET_MODE (insn_info->dest);
224
225 CLEAR_HARD_REG_SET (*unavailable);
226
227 for (struct du_chain *tmp = head->first; tmp; tmp = tmp->next_use)
228 {
229 if (DEBUG_INSN_P (tmp->insn))
230 continue;
231
232 IOR_COMPL_HARD_REG_SET (*unavailable, reg_class_contents[tmp->cl]);
233 super_class = reg_class_superunion[(int) super_class][(int) tmp->cl];
234 }
235
236 for (unsigned i = 0; i < FIRST_PSEUDO_REGISTER; i++)
237 if (fixed_regs[i] || global_regs[i])
238 add_to_hard_reg_set (unavailable, mode, i);
239
240 arg_pair_t arg = arg_pair_t (insn_info, unavailable);
241
242 /* Exclude all registers that would lead to collisions with other loads. */
243 tag_map.traverse <arg_pair_t *, check_colliding_tags> (&arg);
244
245 /* Finally, also ignore all aliases of the current reg. */
246 ignore_all_aliases (unavailable, mode, dest & 0xf);
247
248 return super_class;
249 }
250
251
252 /* Find a suitable and available register and rename the chain of occurrences
253 of the register defined in the def/use chain headed by HEAD in which INSN
254 exists. CUR_TAG, TAGS and TAG_MAP are used to determine which registers are
255 unavailable due to a potential collision due to the rename. The routine
256 returns the register number in case of a successful rename or -1 to indicate
257 failure. */
258 static int
259 rename_chain (tag_insn_info *insn_info, tag_map_t &tag_map, du_head_p head)
260 {
261 unsigned dest_regno = head->regno;
262
263 if (head->cannot_rename || head->renamed)
264 return -1;
265
266 HARD_REG_SET unavailable;
267
268 enum reg_class super_class = init_unavailable (insn_info, tag_map, head,
269 &unavailable);
270
271 unsigned new_regno = find_rename_reg (head, super_class, &unavailable,
272 dest_regno, false);
273
274 /* Attempt to rename as long as regrename doesn't just throw the same
275 register at us. */
276 if (new_regno != dest_regno && regrename_do_replace (head, new_regno))
277 {
278 if (dump_file && (dump_flags & TDF_DETAILS))
279 fprintf (dump_file, "\tInsn %d: Renamed %d to %d\n",
280 INSN_UID (insn_info->insn), dest_regno, new_regno);
281
282 return new_regno;
283 }
284
285 return -1;
286 }
287
288
289 /* Return true if REGNO is not safe to rename. */
290 static bool
291 unsafe_rename_p (unsigned regno)
292 {
293 /* Avoid renaming registers used for argument passing and return value. In
294 future we could be a little less conservative and walk through the basic
295 blocks to see if there are any call or syscall sites. */
296 if (regno <= R8_REGNUM
297 || (regno >= V0_REGNUM && regno < V8_REGNUM))
298 return true;
299
300 /* Don't attempt to rename registers that may have specific meanings. */
301 switch (regno)
302 {
303 case LR_REGNUM:
304 case HARD_FRAME_POINTER_REGNUM:
305 case FRAME_POINTER_REGNUM:
306 case STACK_POINTER_REGNUM:
307 return true;
308 }
309
310 return false;
311 }
312
313
314 /* Go through the def/use chains for the register and find the chain for this
315 insn to rename. The function returns the hard register number in case of a
316 successful rename and -1 otherwise. */
317 static int
318 rename_dest (tag_insn_info *insn_info, tag_map_t &tag_map)
319 {
320 struct du_chain *chain = NULL;
321 du_head_p head = NULL;
322 int i;
323
324 unsigned dest_regno = REGNO (insn_info->dest);
325
326 if (unsafe_rename_p (dest_regno))
327 return -1;
328
329 /* Search the chain where this instruction is (one of) the root. */
330 rtx_insn *insn = insn_info->insn;
331 operand_rr_info *dest_op_info = insn_rr[INSN_UID (insn)].op_info;
332
333 for (i = 0; i < dest_op_info->n_chains; i++)
334 {
335 /* The register tracked by this chain does not match the
336 destination register of insn. */
337 if (dest_op_info->heads[i]->regno != dest_regno)
338 continue;
339
340 head = dest_op_info->heads[i];
341 /* The chain was merged in another, find the new head. */
342 if (!head->first)
343 head = regrename_chain_from_id (head->id);
344
345 for (chain = head->first; chain; chain = chain->next_use)
346 /* Found the insn in the chain, so try renaming the register in this
347 chain. */
348 if (chain->insn == insn)
349 return rename_chain (insn_info, tag_map, head);
350 }
351
352 return -1;
353 }
354
355
356 /* Flag to track if the map has changed. */
357 static bool map_changed = false;
358
359 /* The actual reallocation logic. For each vector of collisions V, try to
360 resolve the collision by attempting to rename the destination register of
361 all but one of the loads. This is a callback that is invoked for each
362 name-value pair (T, V) in TAG_MAP. The function returns true whenever it
363 returns unchanged and false otherwise to halt traversal. */
364 bool
365 avoid_collisions_1 (const rtx &t, insn_info_list_t *v, tag_map_t *tag_map)
366 {
367 /* We need at least two loads to cause a tag collision, return unchanged. */
368 if (v->length () < 2)
369 return true;
370
371 tag_insn_info *vec_start = v->pop ();
372 tag_insn_info *insn_info = vec_start;
373
374 /* Try to rename at least one register to reduce the collision. If we
375 iterate all the way through, we end up dropping one of the loads from the
376 list. This is fine because we want at most one element to ensure that a
377 subsequent rename attempt does not end up worsening the collision. */
378 do
379 {
380 int new_regno;
381
382 if ((new_regno = rename_dest (insn_info, *tag_map)) != -1)
383 {
384 rtx new_tag = GEN_INT (TAG_UPDATE_DEST (INTVAL (t), new_regno));
385
386 tag_map->get_or_insert (new_tag).safe_push (insn_info);
387 df_set_regs_ever_live (new_regno, true);
388 map_changed = true;
389 return false;
390 }
391
392 v->safe_insert (0, insn_info);
393 insn_info = v->pop ();
394 }
395 while (insn_info != vec_start);
396
397 if (dump_file)
398 fprintf (dump_file, "\t>> Failed to rename destination in insn %d\n\t>>",
399 INSN_UID (insn_info->insn));
400
401 /* Drop the last element and move on to the next tag. */
402 delete insn_info;
403 return true;
404 }
405
406
407 /* For each set of collisions, attempt to rename the registers or insert a move
408 to avoid the collision. We repeatedly traverse through TAG_MAP using
409 AVOID_COLLISIONS_1 trying to rename registers to avoid collisions until a
410 full traversal results in no change in the map. */
411 static void
412 avoid_collisions (tag_map_t &tag_map)
413 {
414 do
415 {
416 map_changed = false;
417 tag_map.traverse <tag_map_t *, avoid_collisions_1> (&tag_map);
418 }
419 while (map_changed);
420 }
421
422
423
424 /* Find the use def chain in which INSN exists and then see if there is a
425 definition inside the loop and outside it. We use this as a simple
426 approximation to determine whether the base register is an IV. The basic
427 idea is to find INSN in the use-def chains for its base register and find
428 all definitions that reach it. Of all these definitions, there should be at
429 least one definition that is a simple addition of a constant value, either
430 as a binary operation or a pre or post update.
431
432 The function returns true if the base register is estimated to be an IV. */
433 static bool
434 iv_p (rtx_insn *insn, rtx reg, struct loop *loop)
435 {
436 df_ref ause;
437 unsigned regno = REGNO (reg);
438
439 /* Ignore loads from the stack. */
440 if (regno == SP_REGNUM)
441 return false;
442
443 for (ause = DF_REG_USE_CHAIN (regno); ause; ause = DF_REF_NEXT_REG (ause))
444 {
445 if (!DF_REF_INSN_INFO (ause)
446 || !NONDEBUG_INSN_P (DF_REF_INSN (ause)))
447 continue;
448
449 if (insn != DF_REF_INSN (ause))
450 continue;
451
452 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
453 df_ref def_rec;
454
455 FOR_EACH_INSN_INFO_DEF (def_rec, insn_info)
456 {
457 rtx_insn *insn = DF_REF_INSN (def_rec);
458 basic_block bb = BLOCK_FOR_INSN (insn);
459
460 if (dominated_by_p (CDI_DOMINATORS, bb, loop->header)
461 && bb->loop_father == loop)
462 {
463 if (recog_memoized (insn) < 0)
464 continue;
465
466 rtx pat = PATTERN (insn);
467
468 /* Prefetch or clobber; unlikely to be a constant stride. The
469 falkor software prefetcher tuning is pretty conservative, so
470 its presence indicates that the access pattern is probably
471 strided but most likely with an unknown stride size or a
472 stride size that is quite large. */
473 if (GET_CODE (pat) != SET)
474 continue;
475
476 rtx x = SET_SRC (pat);
477 if (GET_CODE (x) == ZERO_EXTRACT
478 || GET_CODE (x) == ZERO_EXTEND
479 || GET_CODE (x) == SIGN_EXTEND)
480 x = XEXP (x, 0);
481
482 /* Loading the value from memory; unlikely to be a constant
483 stride. */
484 if (MEM_P (x))
485 continue;
486
487 /* An increment or decrement by a constant MODE_SIZE amount or
488 the result of a binary expression is likely to be an IV. */
489 if (GET_CODE (x) == POST_INC
490 || GET_CODE (x) == POST_DEC
491 || GET_CODE (x) == PRE_INC
492 || GET_CODE (x) == PRE_DEC)
493 return true;
494 else if (BINARY_P (x)
495 && (CONST_INT_P (XEXP (x, 0))
496 || CONST_INT_P (XEXP (x, 1))))
497 {
498 rtx stride = (CONST_INT_P (XEXP (x, 0))
499 ? XEXP (x, 0) : XEXP (x, 1));
500
501 /* Don't bother with very long strides because the prefetcher
502 is unable to train on them anyway. */
503 if (INTVAL (stride) < MAX_PREFETCH_STRIDE)
504 return true;
505 }
506 }
507 }
508 return false;
509 }
510 return false;
511 }
512
513
514 /* Return true if SRC is a strided load in the LOOP, false otherwise.
515 If it is a strided load, set the BASE and OFFSET. Also, if this is
516 a pre/post increment load, set PRE_POST to true. */
517 static bool
518 valid_src_p (rtx src, rtx_insn *insn, struct loop *loop, bool *pre_post,
519 rtx *base, rtx *offset, bool load_pair)
520 {
521 subrtx_var_iterator::array_type array;
522 rtx x = NULL_RTX;
523
524 FOR_EACH_SUBRTX_VAR (iter, array, src, NONCONST)
525 if (MEM_P (*iter))
526 {
527 x = *iter;
528 break;
529 }
530
531 if (!x)
532 return false;
533
534 struct aarch64_address_info addr;
535 machine_mode mode = GET_MODE (x);
536
537 if (!aarch64_classify_address (&addr, XEXP (x, 0), mode, true))
538 return false;
539
540 unsigned regno = REGNO (addr.base);
541 if (global_regs[regno] || fixed_regs[regno])
542 return false;
543
544 if (addr.type == ADDRESS_REG_WB)
545 {
546 unsigned code = GET_CODE (XEXP (x, 0));
547
548 *pre_post = true;
549 *base = addr.base;
550
551 if (code == PRE_MODIFY || code == POST_MODIFY)
552 *offset = addr.offset;
553 else
554 {
555 /*Writeback is only supported for fixed-width modes. */
556 unsigned int_offset = GET_MODE_SIZE (mode).to_constant ();
557
558 /* For post-incremented load pairs we would increment the base twice
559 over, so make that adjustment. */
560 if (load_pair && (code == POST_INC || code == POST_DEC))
561 int_offset *= 2;
562
563 *offset = GEN_INT (int_offset);
564 }
565 return true;
566 }
567 else if (addr.type == ADDRESS_REG_IMM || addr.type == ADDRESS_REG_REG)
568 {
569 /* Check if the load is strided. */
570 if (!iv_p (insn, addr.base, loop))
571 return false;
572
573 *base = addr.base;
574 *offset = addr.offset;
575 return true;
576 }
577
578 return false;
579 }
580
581
582 /* Return true if INSN is a strided load in LOOP. If it is a strided load, set
583 the DEST, BASE and OFFSET. Also, if this is a pre/post increment load, set
584 PRE_POST to true.
585
586 The routine does checks on the destination of the insn and depends on
587 STRIDED_LOAD_P to check the source and fill in the BASE and OFFSET. */
588 static bool
589 get_load_info (rtx_insn *insn, struct loop *loop, rtx *dest, rtx *base,
590 rtx *offset, bool *pre_post, bool *ldp)
591 {
592 if (!INSN_P (insn) || recog_memoized (insn) < 0)
593 return false;
594
595 rtx pat = PATTERN (insn);
596 unsigned code = GET_CODE (pat);
597 bool load_pair = (code == PARALLEL);
598
599 /* For a load pair we need only the first base and destination
600 registers. We however need to ensure that our pre/post increment
601 offset is doubled; we do that in STRIDED_LOAD_P. */
602 if (load_pair)
603 {
604 pat = XVECEXP (pat, 0, 0);
605 code = GET_CODE (pat);
606 }
607
608 if (code != SET)
609 return false;
610
611 rtx dest_rtx = SET_DEST (pat);
612
613 if (!REG_P (dest_rtx))
614 return false;
615
616 unsigned regno = REGNO (dest_rtx);
617 machine_mode mode = GET_MODE (dest_rtx);
618 machine_mode inner_mode = GET_MODE_INNER (mode);
619
620 /* Falkor does not support SVE vectors. */
621 if (!GET_MODE_SIZE (mode).is_constant ())
622 return false;
623
624 /* Ignore vector struct or lane loads. */
625 if (GET_MODE_SIZE (mode).to_constant ()
626 != GET_MODE_SIZE (inner_mode).to_constant ())
627 return false;
628
629 /* The largest width we want to bother with is a load of a pair of
630 quad-words. */
631 if ((GET_MODE_SIZE (mode).to_constant () << load_pair)
632 > GET_MODE_SIZE (OImode))
633 return false;
634
635 /* Ignore loads into the stack pointer because it is unlikely to be a
636 stream. */
637 if (regno == SP_REGNUM)
638 return false;
639
640 if (valid_src_p (SET_SRC (pat), insn, loop, pre_post, base, offset,
641 load_pair))
642 {
643 *dest = dest_rtx;
644 *ldp = load_pair;
645
646 return true;
647 }
648
649 return false;
650 }
651
652
653 /* Return whether INSN and CAND are in the same def/use chain. */
654 static bool
655 in_same_chain (rtx_insn *insn, rtx_insn *cand, unsigned regno)
656 {
657 struct du_chain *chain = NULL;
658 du_head_p head = NULL;
659 int i;
660
661 /* Search the chain where this instruction is (one of) the root. */
662 operand_rr_info *op_info = insn_rr[INSN_UID (insn)].op_info;
663
664 for (i = 0; i < op_info->n_chains; i++)
665 {
666 /* The register tracked by this chain does not match the
667 dest register of insn. */
668 if (op_info->heads[i]->regno != regno)
669 continue;
670
671 head = op_info->heads[i];
672 /* The chain was merged in another, find the new head. */
673 if (!head->first)
674 head = regrename_chain_from_id (head->id);
675
676 bool found_insn = false, found_cand = false;
677
678 for (chain = head->first; chain; chain = chain->next_use)
679 {
680 rtx *loc = &SET_DEST (PATTERN (chain->insn));
681
682 if (chain->loc != loc)
683 continue;
684
685 if (chain->insn == insn)
686 found_insn = true;
687
688 if (chain->insn == cand)
689 found_cand = true;
690
691 if (found_insn && found_cand)
692 return true;
693 }
694 }
695
696 return false;
697 }
698
699
700 /* Callback function to traverse the tag map and drop loads that have the same
701 destination and and in the same chain of occurrence. Routine always returns
702 true to allow traversal through all of TAG_MAP. */
703 bool
704 single_dest_per_chain (const rtx &t ATTRIBUTE_UNUSED, insn_info_list_t *v,
705 void *arg ATTRIBUTE_UNUSED)
706 {
707 for (int i = v->length () - 1; i>= 1; i--)
708 {
709 tag_insn_info *insn_info = (*v)[i];
710
711 for (int j = v->length () - 2; j >= 0; j--)
712 {
713 /* Filter out destinations in the same chain. */
714 if (in_same_chain (insn_info->insn, (*v)[j]->insn,
715 REGNO (insn_info->dest)))
716 {
717 v->ordered_remove (j);
718 i = v->length ();
719 break;
720 }
721 }
722 }
723
724 return true;
725 }
726
727
728 /* Callback invoked for each name-value pair (T, INSN_INFO) to dump the insn
729 list INSN_INFO for tag T. */
730 bool
731 dump_insn_list (const rtx &t, const insn_info_list_t &insn_info,
732 void *unused ATTRIBUTE_UNUSED)
733 {
734 gcc_assert (dump_file);
735 fprintf (dump_file, "Tag 0x%lx ::\n", INTVAL (t));
736
737 for (unsigned i = 0; i < insn_info.length (); i++)
738 dump_insn_slim (dump_file, insn_info[i]->insn);
739
740 fprintf (dump_file, "\n");
741
742 return true;
743 }
744
745
746 /* Record all loads in LOOP into TAG_MAP indexed by the falkor hardware
747 prefetcher memory tags. */
748 static void
749 record_loads (tag_map_t &tag_map, struct loop *loop)
750 {
751 rtx_insn *insn;
752 basic_block *body, bb;
753
754 body = get_loop_body (loop);
755
756 for (unsigned i = 0; i < loop->num_nodes; i++)
757 {
758 bb = body[i];
759 FOR_BB_INSNS (bb, insn)
760 {
761 rtx base = NULL_RTX;
762 rtx dest = NULL_RTX;
763 rtx offset = NULL_RTX;
764 bool writeback = false;
765 bool ldp = false;
766
767 if (!INSN_P (insn) || DEBUG_INSN_P (insn))
768 continue;
769
770 if (get_load_info (insn, loop, &dest, &base, &offset, &writeback,
771 &ldp))
772 {
773 tag_insn_info *i = new tag_insn_info (insn, dest, base, offset,
774 writeback, ldp);
775 rtx tag = GEN_INT (i->tag ());
776 tag_map.get_or_insert (tag).safe_push (i);
777 }
778 }
779 }
780
781 if (dump_file)
782 {
783 fprintf (dump_file, "Loop %d: Tag map generated.\n", loop->num);
784 tag_map.traverse <void *, dump_insn_list> (NULL);
785 }
786
787 /* Try to reduce the dataset before launching into the rename attempt. Drop
788 destinations in the same collision chain that appear in the same def/use
789 chain, all as defs. These chains will move together in a rename so
790 there's no point in keeping both in there. */
791 tag_map.traverse <void *, single_dest_per_chain> (NULL);
792 }
793
794
795 /* Tag collision avoidance pass for Falkor. The pass runs in two phases for
796 each loop; the first phase collects all loads that we consider as
797 interesting for renaming into a tag-indexed map of lists. The second phase
798 renames the destination register of the loads in an attempt to spread out
799 the loads into different tags. */
800 void
801 execute_tag_collision_avoidance ()
802 {
803 struct loop *loop;
804
805 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
806 df_chain_add_problem (DF_UD_CHAIN);
807 df_compute_regs_ever_live (true);
808 df_analyze ();
809 df_set_flags (DF_DEFER_INSN_RESCAN);
810
811 regrename_init (true);
812 regrename_analyze (NULL);
813
814 compute_bb_for_insn ();
815 calculate_dominance_info (CDI_DOMINATORS);
816 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
817
818 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
819 {
820 tag_map_t tag_map (512);
821
822 record_loads (tag_map, loop);
823 avoid_collisions (tag_map);
824 if (dump_file)
825 {
826 fprintf (dump_file, "Loop %d: Completed rename.\n", loop->num);
827 tag_map.traverse <void *, dump_insn_list> (NULL);
828 }
829 tag_map.traverse <void *, free_insn_info> (NULL);
830 }
831
832 loop_optimizer_finalize ();
833 free_dominance_info (CDI_DOMINATORS);
834 regrename_finish ();
835 }
836
837
838 const pass_data pass_data_tag_collision_avoidance =
839 {
840 RTL_PASS, /* type */
841 "tag_collision_avoidance", /* name */
842 OPTGROUP_NONE, /* optinfo_flags */
843 TV_NONE, /* tv_id */
844 0, /* properties_required */
845 0, /* properties_provided */
846 0, /* properties_destroyed */
847 0, /* todo_flags_start */
848 TODO_df_finish, /* todo_flags_finish */
849 };
850
851
852 class pass_tag_collision_avoidance : public rtl_opt_pass
853 {
854 public:
855 pass_tag_collision_avoidance (gcc::context *ctxt)
856 : rtl_opt_pass (pass_data_tag_collision_avoidance, ctxt)
857 {}
858
859 /* opt_pass methods: */
860 virtual bool gate (function *)
861 {
862 return ((aarch64_tune_params.extra_tuning_flags
863 & AARCH64_EXTRA_TUNE_RENAME_LOAD_REGS)
864 && optimize >= 2);
865 }
866
867 virtual unsigned int execute (function *)
868 {
869 execute_tag_collision_avoidance ();
870 return 0;
871 }
872
873 }; // class pass_tag_collision_avoidance
874
875
876 /* Create a new pass instance. */
877 rtl_opt_pass *
878 make_pass_tag_collision_avoidance (gcc::context *ctxt)
879 {
880 return new pass_tag_collision_avoidance (ctxt);
881 }