Mercurial > hg > CbC > CbC_gcc
comparison gcc/config/aarch64/falkor-tag-collision-avoidance.c @ 131:84e7813d76e9
gcc-8.2
author | mir3636 |
---|---|
date | Thu, 25 Oct 2018 07:37:49 +0900 |
parents | |
children | 1830386684a0 |
comparison
equal
deleted
inserted
replaced
111:04ced10e8804 | 131:84e7813d76e9 |
---|---|
1 /* Tag Collision Avoidance pass for Falkor. | |
2 Copyright (C) 2018 Free Software Foundation, Inc. | |
3 | |
4 This file is part of GCC. | |
5 | |
6 GCC is free software; you can redistribute it and/or modify it | |
7 under the terms of the GNU General Public License as published by | |
8 the Free Software Foundation; either version 3, or (at your option) | |
9 any later version. | |
10 | |
11 GCC is distributed in the hope that it will be useful, but | |
12 WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 General Public License for more details. | |
15 | |
16 You should have received a copy of the GNU General Public License | |
17 along with GCC; see the file COPYING3. If not see | |
18 <http://www.gnu.org/licenses/>. */ | |
19 | |
20 #define IN_TARGET_CODE 1 | |
21 | |
22 #include "config.h" | |
23 #define INCLUDE_LIST | |
24 #include "system.h" | |
25 #include "coretypes.h" | |
26 #include "backend.h" | |
27 #include "target.h" | |
28 #include "rtl.h" | |
29 #include "tree.h" | |
30 #include "tree-pass.h" | |
31 #include "aarch64-protos.h" | |
32 #include "hash-map.h" | |
33 #include "cfgloop.h" | |
34 #include "cfgrtl.h" | |
35 #include "rtl-iter.h" | |
36 #include "df.h" | |
37 #include "memmodel.h" | |
38 #include "optabs.h" | |
39 #include "regs.h" | |
40 #include "recog.h" | |
41 #include "regrename.h" | |
42 #include "print-rtl.h" | |
43 | |
44 /* The Falkor hardware prefetching system uses the encoding of the registers | |
45 and offsets of loads to decide which of the multiple hardware prefetchers to | |
46 assign the load to. This has the positive effect of accelerating prefetches | |
47 when all related loads with uniform strides are assigned to the same | |
48 prefetcher unit. The down side is that because of the way the assignment | |
49 works, multiple unrelated loads may end up on the same prefetch unit, thus | |
50 causing the unit to bounce between different sets of addresses and never | |
51 train correctly. The point of this pass is to avoid such collisions so that | |
52 unrelated loads are spread out to different prefetchers. It also makes a | |
53 rudimentary attempt to ensure that related loads with the same tags don't | |
54 get moved out unnecessarily. | |
55 | |
56 Perhaps a future enhancement would be to make a more concerted attempt to | |
57 get related loads under the same tag. See the memcpy/memset implementation | |
58 for falkor in glibc to understand the kind of impact this can have on | |
59 falkor. | |
60 | |
61 The assignment of loads is based on a tag that is computed from the encoding | |
62 of the first destination register (the only destination in case of LDR), the | |
63 base register and the offset (either the register or the immediate value, as | |
64 encoded in the instruction). This is what the 14 bit tag looks like: | |
65 | |
66 |<- 6 bits ->|<- 4b ->|<- 4b ->| | |
67 -------------------------------- | |
68 | OFFSET | SRC | DST | | |
69 -------------------------------- | |
70 | |
71 For all cases, the SRC and DST are the 4 LSB of the encoding of the register | |
72 in the instruction. Offset computation is more involved and is as follows: | |
73 | |
74 - For register offset addressing: 4 LSB of the offset register with the MSB | |
75 of the 6 bits set to 1. | |
76 | |
77 - For immediate offset: 4 LSB of the encoded immediate offset. The encoding | |
78 depends on the width of the load and is expressed as multiples of the | |
79 width. | |
80 | |
81 - For loads with update: 4 LSB of the offset. The encoding here is the | |
82 exact number by which the base is offset and incremented. | |
83 | |
84 Based on the above it is clear that registers 0 and 16 will result in | |
85 collisions, 1 and 17 and so on. This pass detects such collisions within a | |
86 def/use chain of the source register in a loop and tries to resolve the | |
87 collision by renaming one of the destination registers. */ | |
88 | |
89 /* Get the destination part of the tag. */ | |
90 #define TAG_GET_DEST(__tag) ((__tag) & 0xf) | |
91 | |
92 /* Get the tag with the destination part updated. */ | |
93 #define TAG_UPDATE_DEST(__tag, __dest) (((__tag) & ~0xf) | (__dest & 0xf)) | |
94 | |
95 #define MAX_PREFETCH_STRIDE 2048 | |
96 | |
97 /* The instruction information structure. This is used to cache information | |
98 about the INSN that we derive when traversing through all of the insns in | |
99 loops. */ | |
100 class tag_insn_info | |
101 { | |
102 public: | |
103 rtx_insn *insn; | |
104 rtx dest; | |
105 rtx base; | |
106 rtx offset; | |
107 bool writeback; | |
108 bool ldp; | |
109 | |
110 tag_insn_info (rtx_insn *i, rtx d, rtx b, rtx o, bool w, bool p) | |
111 : insn (i), dest (d), base (b), offset (o), writeback (w), ldp (p) | |
112 {} | |
113 | |
114 /* Compute the tag based on BASE, DEST and OFFSET of the load. */ | |
115 unsigned tag () | |
116 { | |
117 unsigned int_offset = 0; | |
118 rtx offset = this->offset; | |
119 unsigned dest = REGNO (this->dest); | |
120 unsigned base = REGNO (this->base); | |
121 machine_mode dest_mode = GET_MODE (this->dest); | |
122 | |
123 /* Falkor does not support SVE; GET_LOAD_INFO ensures that the | |
124 destination mode is constant here. */ | |
125 unsigned dest_mode_size = GET_MODE_SIZE (dest_mode).to_constant (); | |
126 | |
127 /* For loads of larger than 16 bytes, the DEST part of the tag is 0. */ | |
128 if ((dest_mode_size << this->ldp) > 16) | |
129 dest = 0; | |
130 | |
131 if (offset && REG_P (offset)) | |
132 int_offset = (1 << 5) | REGNO (offset); | |
133 else if (offset && CONST_INT_P (offset)) | |
134 { | |
135 int_offset = INTVAL (offset); | |
136 int_offset /= dest_mode_size; | |
137 if (!this->writeback) | |
138 int_offset >>= 2; | |
139 } | |
140 return ((dest & 0xf) | |
141 | ((base & 0xf) << 4) | |
142 | ((int_offset & 0x3f) << 8)); | |
143 } | |
144 }; | |
145 | |
146 /* Hash map to traverse and process instructions with colliding tags. */ | |
147 typedef hash_map <rtx, auto_vec <tag_insn_info *> > tag_map_t; | |
148 | |
149 /* Vector of instructions with colliding tags. */ | |
150 typedef auto_vec <tag_insn_info *> insn_info_list_t; | |
151 | |
152 /* Pair of instruction information and unavailable register set to pass to | |
153 CHECK_COLLIDING_TAGS. */ | |
154 typedef std::pair <tag_insn_info *, HARD_REG_SET *> arg_pair_t; | |
155 | |
156 | |
157 /* Callback to free all tag_insn_info objects. */ | |
158 bool | |
159 free_insn_info (const rtx &t ATTRIBUTE_UNUSED, insn_info_list_t *v, | |
160 void *arg ATTRIBUTE_UNUSED) | |
161 { | |
162 while (v->length () > 0) | |
163 delete v->pop (); | |
164 | |
165 return true; | |
166 } | |
167 | |
168 | |
169 /* Add all aliases of the register to the unavailable register set. REG is the | |
170 smallest register number that can then be used to reference its aliases. | |
171 UNAVAILABLE is the hard register set to add the ignored register numbers to | |
172 and MODE is the mode in which the registers would have been used. */ | |
173 static void | |
174 ignore_all_aliases (HARD_REG_SET *unavailable, machine_mode mode, unsigned reg) | |
175 { | |
176 add_to_hard_reg_set (unavailable, mode, reg); | |
177 add_to_hard_reg_set (unavailable, mode, reg + 16); | |
178 add_to_hard_reg_set (unavailable, mode, reg + 32); | |
179 add_to_hard_reg_set (unavailable, mode, reg + 48); | |
180 } | |
181 | |
182 | |
183 /* Callback to check which destination registers are unavailable to us for | |
184 renaming because of the base and offset colliding. This is a callback that | |
185 gets called for every name value pair (T, V) in the TAG_MAP. The ARG is an | |
186 std::pair of the tag_insn_info of the original insn and the hard register | |
187 set UNAVAILABLE that is used to record hard register numbers that cannot be | |
188 used for the renaming. This always returns true since we want to traverse | |
189 through the entire TAG_MAP. */ | |
190 bool | |
191 check_colliding_tags (const rtx &t, const insn_info_list_t &v, arg_pair_t *arg) | |
192 { | |
193 HARD_REG_SET *unavailable = arg->second; | |
194 unsigned orig_tag = arg->first->tag (); | |
195 unsigned tag = INTVAL (t); | |
196 machine_mode mode = GET_MODE (arg->first->dest); | |
197 | |
198 /* Can't collide with emptiness. */ | |
199 if (v.length () == 0) | |
200 return true; | |
201 | |
202 /* Drop all aliased destination registers that result in the same | |
203 tag. It is not necessary to drop all of them but we do anyway | |
204 because it is quicker than checking ranges. */ | |
205 if (TAG_UPDATE_DEST (tag, 0) == TAG_UPDATE_DEST (orig_tag, 0)) | |
206 ignore_all_aliases (unavailable, mode, TAG_GET_DEST (tag)); | |
207 | |
208 return true; | |
209 } | |
210 | |
211 | |
212 /* Initialize and build a set of hard register numbers UNAVAILABLE to avoid for | |
213 renaming. INSN_INFO is the original insn, TAG_MAP is the map of the list of | |
214 insns indexed by their tags, HEAD is the def/use chain head of the | |
215 destination register of the original insn. The routine returns the super | |
216 class of register classes that may be used during the renaming. */ | |
217 static enum reg_class | |
218 init_unavailable (tag_insn_info *insn_info, tag_map_t &tag_map, du_head_p head, | |
219 HARD_REG_SET *unavailable) | |
220 { | |
221 unsigned dest = head->regno; | |
222 enum reg_class super_class = NO_REGS; | |
223 machine_mode mode = GET_MODE (insn_info->dest); | |
224 | |
225 CLEAR_HARD_REG_SET (*unavailable); | |
226 | |
227 for (struct du_chain *tmp = head->first; tmp; tmp = tmp->next_use) | |
228 { | |
229 if (DEBUG_INSN_P (tmp->insn)) | |
230 continue; | |
231 | |
232 IOR_COMPL_HARD_REG_SET (*unavailable, reg_class_contents[tmp->cl]); | |
233 super_class = reg_class_superunion[(int) super_class][(int) tmp->cl]; | |
234 } | |
235 | |
236 for (unsigned i = 0; i < FIRST_PSEUDO_REGISTER; i++) | |
237 if (fixed_regs[i] || global_regs[i]) | |
238 add_to_hard_reg_set (unavailable, mode, i); | |
239 | |
240 arg_pair_t arg = arg_pair_t (insn_info, unavailable); | |
241 | |
242 /* Exclude all registers that would lead to collisions with other loads. */ | |
243 tag_map.traverse <arg_pair_t *, check_colliding_tags> (&arg); | |
244 | |
245 /* Finally, also ignore all aliases of the current reg. */ | |
246 ignore_all_aliases (unavailable, mode, dest & 0xf); | |
247 | |
248 return super_class; | |
249 } | |
250 | |
251 | |
252 /* Find a suitable and available register and rename the chain of occurrences | |
253 of the register defined in the def/use chain headed by HEAD in which INSN | |
254 exists. CUR_TAG, TAGS and TAG_MAP are used to determine which registers are | |
255 unavailable due to a potential collision due to the rename. The routine | |
256 returns the register number in case of a successful rename or -1 to indicate | |
257 failure. */ | |
258 static int | |
259 rename_chain (tag_insn_info *insn_info, tag_map_t &tag_map, du_head_p head) | |
260 { | |
261 unsigned dest_regno = head->regno; | |
262 | |
263 if (head->cannot_rename || head->renamed) | |
264 return -1; | |
265 | |
266 HARD_REG_SET unavailable; | |
267 | |
268 enum reg_class super_class = init_unavailable (insn_info, tag_map, head, | |
269 &unavailable); | |
270 | |
271 unsigned new_regno = find_rename_reg (head, super_class, &unavailable, | |
272 dest_regno, false); | |
273 | |
274 /* Attempt to rename as long as regrename doesn't just throw the same | |
275 register at us. */ | |
276 if (new_regno != dest_regno && regrename_do_replace (head, new_regno)) | |
277 { | |
278 if (dump_file && (dump_flags & TDF_DETAILS)) | |
279 fprintf (dump_file, "\tInsn %d: Renamed %d to %d\n", | |
280 INSN_UID (insn_info->insn), dest_regno, new_regno); | |
281 | |
282 return new_regno; | |
283 } | |
284 | |
285 return -1; | |
286 } | |
287 | |
288 | |
289 /* Return true if REGNO is not safe to rename. */ | |
290 static bool | |
291 unsafe_rename_p (unsigned regno) | |
292 { | |
293 /* Avoid renaming registers used for argument passing and return value. In | |
294 future we could be a little less conservative and walk through the basic | |
295 blocks to see if there are any call or syscall sites. */ | |
296 if (regno <= R8_REGNUM | |
297 || (regno >= V0_REGNUM && regno < V8_REGNUM)) | |
298 return true; | |
299 | |
300 /* Don't attempt to rename registers that may have specific meanings. */ | |
301 switch (regno) | |
302 { | |
303 case LR_REGNUM: | |
304 case HARD_FRAME_POINTER_REGNUM: | |
305 case FRAME_POINTER_REGNUM: | |
306 case STACK_POINTER_REGNUM: | |
307 return true; | |
308 } | |
309 | |
310 return false; | |
311 } | |
312 | |
313 | |
314 /* Go through the def/use chains for the register and find the chain for this | |
315 insn to rename. The function returns the hard register number in case of a | |
316 successful rename and -1 otherwise. */ | |
317 static int | |
318 rename_dest (tag_insn_info *insn_info, tag_map_t &tag_map) | |
319 { | |
320 struct du_chain *chain = NULL; | |
321 du_head_p head = NULL; | |
322 int i; | |
323 | |
324 unsigned dest_regno = REGNO (insn_info->dest); | |
325 | |
326 if (unsafe_rename_p (dest_regno)) | |
327 return -1; | |
328 | |
329 /* Search the chain where this instruction is (one of) the root. */ | |
330 rtx_insn *insn = insn_info->insn; | |
331 operand_rr_info *dest_op_info = insn_rr[INSN_UID (insn)].op_info; | |
332 | |
333 for (i = 0; i < dest_op_info->n_chains; i++) | |
334 { | |
335 /* The register tracked by this chain does not match the | |
336 destination register of insn. */ | |
337 if (dest_op_info->heads[i]->regno != dest_regno) | |
338 continue; | |
339 | |
340 head = dest_op_info->heads[i]; | |
341 /* The chain was merged in another, find the new head. */ | |
342 if (!head->first) | |
343 head = regrename_chain_from_id (head->id); | |
344 | |
345 for (chain = head->first; chain; chain = chain->next_use) | |
346 /* Found the insn in the chain, so try renaming the register in this | |
347 chain. */ | |
348 if (chain->insn == insn) | |
349 return rename_chain (insn_info, tag_map, head); | |
350 } | |
351 | |
352 return -1; | |
353 } | |
354 | |
355 | |
356 /* Flag to track if the map has changed. */ | |
357 static bool map_changed = false; | |
358 | |
359 /* The actual reallocation logic. For each vector of collisions V, try to | |
360 resolve the collision by attempting to rename the destination register of | |
361 all but one of the loads. This is a callback that is invoked for each | |
362 name-value pair (T, V) in TAG_MAP. The function returns true whenever it | |
363 returns unchanged and false otherwise to halt traversal. */ | |
364 bool | |
365 avoid_collisions_1 (const rtx &t, insn_info_list_t *v, tag_map_t *tag_map) | |
366 { | |
367 /* We need at least two loads to cause a tag collision, return unchanged. */ | |
368 if (v->length () < 2) | |
369 return true; | |
370 | |
371 tag_insn_info *vec_start = v->pop (); | |
372 tag_insn_info *insn_info = vec_start; | |
373 | |
374 /* Try to rename at least one register to reduce the collision. If we | |
375 iterate all the way through, we end up dropping one of the loads from the | |
376 list. This is fine because we want at most one element to ensure that a | |
377 subsequent rename attempt does not end up worsening the collision. */ | |
378 do | |
379 { | |
380 int new_regno; | |
381 | |
382 if ((new_regno = rename_dest (insn_info, *tag_map)) != -1) | |
383 { | |
384 rtx new_tag = GEN_INT (TAG_UPDATE_DEST (INTVAL (t), new_regno)); | |
385 | |
386 tag_map->get_or_insert (new_tag).safe_push (insn_info); | |
387 df_set_regs_ever_live (new_regno, true); | |
388 map_changed = true; | |
389 return false; | |
390 } | |
391 | |
392 v->safe_insert (0, insn_info); | |
393 insn_info = v->pop (); | |
394 } | |
395 while (insn_info != vec_start); | |
396 | |
397 if (dump_file) | |
398 fprintf (dump_file, "\t>> Failed to rename destination in insn %d\n\t>>", | |
399 INSN_UID (insn_info->insn)); | |
400 | |
401 /* Drop the last element and move on to the next tag. */ | |
402 delete insn_info; | |
403 return true; | |
404 } | |
405 | |
406 | |
407 /* For each set of collisions, attempt to rename the registers or insert a move | |
408 to avoid the collision. We repeatedly traverse through TAG_MAP using | |
409 AVOID_COLLISIONS_1 trying to rename registers to avoid collisions until a | |
410 full traversal results in no change in the map. */ | |
411 static void | |
412 avoid_collisions (tag_map_t &tag_map) | |
413 { | |
414 do | |
415 { | |
416 map_changed = false; | |
417 tag_map.traverse <tag_map_t *, avoid_collisions_1> (&tag_map); | |
418 } | |
419 while (map_changed); | |
420 } | |
421 | |
422 | |
423 | |
424 /* Find the use def chain in which INSN exists and then see if there is a | |
425 definition inside the loop and outside it. We use this as a simple | |
426 approximation to determine whether the base register is an IV. The basic | |
427 idea is to find INSN in the use-def chains for its base register and find | |
428 all definitions that reach it. Of all these definitions, there should be at | |
429 least one definition that is a simple addition of a constant value, either | |
430 as a binary operation or a pre or post update. | |
431 | |
432 The function returns true if the base register is estimated to be an IV. */ | |
433 static bool | |
434 iv_p (rtx_insn *insn, rtx reg, struct loop *loop) | |
435 { | |
436 df_ref ause; | |
437 unsigned regno = REGNO (reg); | |
438 | |
439 /* Ignore loads from the stack. */ | |
440 if (regno == SP_REGNUM) | |
441 return false; | |
442 | |
443 for (ause = DF_REG_USE_CHAIN (regno); ause; ause = DF_REF_NEXT_REG (ause)) | |
444 { | |
445 if (!DF_REF_INSN_INFO (ause) | |
446 || !NONDEBUG_INSN_P (DF_REF_INSN (ause))) | |
447 continue; | |
448 | |
449 if (insn != DF_REF_INSN (ause)) | |
450 continue; | |
451 | |
452 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
453 df_ref def_rec; | |
454 | |
455 FOR_EACH_INSN_INFO_DEF (def_rec, insn_info) | |
456 { | |
457 rtx_insn *insn = DF_REF_INSN (def_rec); | |
458 basic_block bb = BLOCK_FOR_INSN (insn); | |
459 | |
460 if (dominated_by_p (CDI_DOMINATORS, bb, loop->header) | |
461 && bb->loop_father == loop) | |
462 { | |
463 if (recog_memoized (insn) < 0) | |
464 continue; | |
465 | |
466 rtx pat = PATTERN (insn); | |
467 | |
468 /* Prefetch or clobber; unlikely to be a constant stride. The | |
469 falkor software prefetcher tuning is pretty conservative, so | |
470 its presence indicates that the access pattern is probably | |
471 strided but most likely with an unknown stride size or a | |
472 stride size that is quite large. */ | |
473 if (GET_CODE (pat) != SET) | |
474 continue; | |
475 | |
476 rtx x = SET_SRC (pat); | |
477 if (GET_CODE (x) == ZERO_EXTRACT | |
478 || GET_CODE (x) == ZERO_EXTEND | |
479 || GET_CODE (x) == SIGN_EXTEND) | |
480 x = XEXP (x, 0); | |
481 | |
482 /* Loading the value from memory; unlikely to be a constant | |
483 stride. */ | |
484 if (MEM_P (x)) | |
485 continue; | |
486 | |
487 /* An increment or decrement by a constant MODE_SIZE amount or | |
488 the result of a binary expression is likely to be an IV. */ | |
489 if (GET_CODE (x) == POST_INC | |
490 || GET_CODE (x) == POST_DEC | |
491 || GET_CODE (x) == PRE_INC | |
492 || GET_CODE (x) == PRE_DEC) | |
493 return true; | |
494 else if (BINARY_P (x) | |
495 && (CONST_INT_P (XEXP (x, 0)) | |
496 || CONST_INT_P (XEXP (x, 1)))) | |
497 { | |
498 rtx stride = (CONST_INT_P (XEXP (x, 0)) | |
499 ? XEXP (x, 0) : XEXP (x, 1)); | |
500 | |
501 /* Don't bother with very long strides because the prefetcher | |
502 is unable to train on them anyway. */ | |
503 if (INTVAL (stride) < MAX_PREFETCH_STRIDE) | |
504 return true; | |
505 } | |
506 } | |
507 } | |
508 return false; | |
509 } | |
510 return false; | |
511 } | |
512 | |
513 | |
514 /* Return true if SRC is a strided load in the LOOP, false otherwise. | |
515 If it is a strided load, set the BASE and OFFSET. Also, if this is | |
516 a pre/post increment load, set PRE_POST to true. */ | |
517 static bool | |
518 valid_src_p (rtx src, rtx_insn *insn, struct loop *loop, bool *pre_post, | |
519 rtx *base, rtx *offset, bool load_pair) | |
520 { | |
521 subrtx_var_iterator::array_type array; | |
522 rtx x = NULL_RTX; | |
523 | |
524 FOR_EACH_SUBRTX_VAR (iter, array, src, NONCONST) | |
525 if (MEM_P (*iter)) | |
526 { | |
527 x = *iter; | |
528 break; | |
529 } | |
530 | |
531 if (!x) | |
532 return false; | |
533 | |
534 struct aarch64_address_info addr; | |
535 machine_mode mode = GET_MODE (x); | |
536 | |
537 if (!aarch64_classify_address (&addr, XEXP (x, 0), mode, true)) | |
538 return false; | |
539 | |
540 unsigned regno = REGNO (addr.base); | |
541 if (global_regs[regno] || fixed_regs[regno]) | |
542 return false; | |
543 | |
544 if (addr.type == ADDRESS_REG_WB) | |
545 { | |
546 unsigned code = GET_CODE (XEXP (x, 0)); | |
547 | |
548 *pre_post = true; | |
549 *base = addr.base; | |
550 | |
551 if (code == PRE_MODIFY || code == POST_MODIFY) | |
552 *offset = addr.offset; | |
553 else | |
554 { | |
555 /*Writeback is only supported for fixed-width modes. */ | |
556 unsigned int_offset = GET_MODE_SIZE (mode).to_constant (); | |
557 | |
558 /* For post-incremented load pairs we would increment the base twice | |
559 over, so make that adjustment. */ | |
560 if (load_pair && (code == POST_INC || code == POST_DEC)) | |
561 int_offset *= 2; | |
562 | |
563 *offset = GEN_INT (int_offset); | |
564 } | |
565 return true; | |
566 } | |
567 else if (addr.type == ADDRESS_REG_IMM || addr.type == ADDRESS_REG_REG) | |
568 { | |
569 /* Check if the load is strided. */ | |
570 if (!iv_p (insn, addr.base, loop)) | |
571 return false; | |
572 | |
573 *base = addr.base; | |
574 *offset = addr.offset; | |
575 return true; | |
576 } | |
577 | |
578 return false; | |
579 } | |
580 | |
581 | |
582 /* Return true if INSN is a strided load in LOOP. If it is a strided load, set | |
583 the DEST, BASE and OFFSET. Also, if this is a pre/post increment load, set | |
584 PRE_POST to true. | |
585 | |
586 The routine does checks on the destination of the insn and depends on | |
587 STRIDED_LOAD_P to check the source and fill in the BASE and OFFSET. */ | |
588 static bool | |
589 get_load_info (rtx_insn *insn, struct loop *loop, rtx *dest, rtx *base, | |
590 rtx *offset, bool *pre_post, bool *ldp) | |
591 { | |
592 if (!INSN_P (insn) || recog_memoized (insn) < 0) | |
593 return false; | |
594 | |
595 rtx pat = PATTERN (insn); | |
596 unsigned code = GET_CODE (pat); | |
597 bool load_pair = (code == PARALLEL); | |
598 | |
599 /* For a load pair we need only the first base and destination | |
600 registers. We however need to ensure that our pre/post increment | |
601 offset is doubled; we do that in STRIDED_LOAD_P. */ | |
602 if (load_pair) | |
603 { | |
604 pat = XVECEXP (pat, 0, 0); | |
605 code = GET_CODE (pat); | |
606 } | |
607 | |
608 if (code != SET) | |
609 return false; | |
610 | |
611 rtx dest_rtx = SET_DEST (pat); | |
612 | |
613 if (!REG_P (dest_rtx)) | |
614 return false; | |
615 | |
616 unsigned regno = REGNO (dest_rtx); | |
617 machine_mode mode = GET_MODE (dest_rtx); | |
618 machine_mode inner_mode = GET_MODE_INNER (mode); | |
619 | |
620 /* Falkor does not support SVE vectors. */ | |
621 if (!GET_MODE_SIZE (mode).is_constant ()) | |
622 return false; | |
623 | |
624 /* Ignore vector struct or lane loads. */ | |
625 if (GET_MODE_SIZE (mode).to_constant () | |
626 != GET_MODE_SIZE (inner_mode).to_constant ()) | |
627 return false; | |
628 | |
629 /* The largest width we want to bother with is a load of a pair of | |
630 quad-words. */ | |
631 if ((GET_MODE_SIZE (mode).to_constant () << load_pair) | |
632 > GET_MODE_SIZE (OImode)) | |
633 return false; | |
634 | |
635 /* Ignore loads into the stack pointer because it is unlikely to be a | |
636 stream. */ | |
637 if (regno == SP_REGNUM) | |
638 return false; | |
639 | |
640 if (valid_src_p (SET_SRC (pat), insn, loop, pre_post, base, offset, | |
641 load_pair)) | |
642 { | |
643 *dest = dest_rtx; | |
644 *ldp = load_pair; | |
645 | |
646 return true; | |
647 } | |
648 | |
649 return false; | |
650 } | |
651 | |
652 | |
653 /* Return whether INSN and CAND are in the same def/use chain. */ | |
654 static bool | |
655 in_same_chain (rtx_insn *insn, rtx_insn *cand, unsigned regno) | |
656 { | |
657 struct du_chain *chain = NULL; | |
658 du_head_p head = NULL; | |
659 int i; | |
660 | |
661 /* Search the chain where this instruction is (one of) the root. */ | |
662 operand_rr_info *op_info = insn_rr[INSN_UID (insn)].op_info; | |
663 | |
664 for (i = 0; i < op_info->n_chains; i++) | |
665 { | |
666 /* The register tracked by this chain does not match the | |
667 dest register of insn. */ | |
668 if (op_info->heads[i]->regno != regno) | |
669 continue; | |
670 | |
671 head = op_info->heads[i]; | |
672 /* The chain was merged in another, find the new head. */ | |
673 if (!head->first) | |
674 head = regrename_chain_from_id (head->id); | |
675 | |
676 bool found_insn = false, found_cand = false; | |
677 | |
678 for (chain = head->first; chain; chain = chain->next_use) | |
679 { | |
680 rtx *loc = &SET_DEST (PATTERN (chain->insn)); | |
681 | |
682 if (chain->loc != loc) | |
683 continue; | |
684 | |
685 if (chain->insn == insn) | |
686 found_insn = true; | |
687 | |
688 if (chain->insn == cand) | |
689 found_cand = true; | |
690 | |
691 if (found_insn && found_cand) | |
692 return true; | |
693 } | |
694 } | |
695 | |
696 return false; | |
697 } | |
698 | |
699 | |
700 /* Callback function to traverse the tag map and drop loads that have the same | |
701 destination and and in the same chain of occurrence. Routine always returns | |
702 true to allow traversal through all of TAG_MAP. */ | |
703 bool | |
704 single_dest_per_chain (const rtx &t ATTRIBUTE_UNUSED, insn_info_list_t *v, | |
705 void *arg ATTRIBUTE_UNUSED) | |
706 { | |
707 for (int i = v->length () - 1; i>= 1; i--) | |
708 { | |
709 tag_insn_info *insn_info = (*v)[i]; | |
710 | |
711 for (int j = v->length () - 2; j >= 0; j--) | |
712 { | |
713 /* Filter out destinations in the same chain. */ | |
714 if (in_same_chain (insn_info->insn, (*v)[j]->insn, | |
715 REGNO (insn_info->dest))) | |
716 { | |
717 v->ordered_remove (j); | |
718 i = v->length (); | |
719 break; | |
720 } | |
721 } | |
722 } | |
723 | |
724 return true; | |
725 } | |
726 | |
727 | |
728 /* Callback invoked for each name-value pair (T, INSN_INFO) to dump the insn | |
729 list INSN_INFO for tag T. */ | |
730 bool | |
731 dump_insn_list (const rtx &t, const insn_info_list_t &insn_info, | |
732 void *unused ATTRIBUTE_UNUSED) | |
733 { | |
734 gcc_assert (dump_file); | |
735 fprintf (dump_file, "Tag 0x%lx ::\n", INTVAL (t)); | |
736 | |
737 for (unsigned i = 0; i < insn_info.length (); i++) | |
738 dump_insn_slim (dump_file, insn_info[i]->insn); | |
739 | |
740 fprintf (dump_file, "\n"); | |
741 | |
742 return true; | |
743 } | |
744 | |
745 | |
746 /* Record all loads in LOOP into TAG_MAP indexed by the falkor hardware | |
747 prefetcher memory tags. */ | |
748 static void | |
749 record_loads (tag_map_t &tag_map, struct loop *loop) | |
750 { | |
751 rtx_insn *insn; | |
752 basic_block *body, bb; | |
753 | |
754 body = get_loop_body (loop); | |
755 | |
756 for (unsigned i = 0; i < loop->num_nodes; i++) | |
757 { | |
758 bb = body[i]; | |
759 FOR_BB_INSNS (bb, insn) | |
760 { | |
761 rtx base = NULL_RTX; | |
762 rtx dest = NULL_RTX; | |
763 rtx offset = NULL_RTX; | |
764 bool writeback = false; | |
765 bool ldp = false; | |
766 | |
767 if (!INSN_P (insn) || DEBUG_INSN_P (insn)) | |
768 continue; | |
769 | |
770 if (get_load_info (insn, loop, &dest, &base, &offset, &writeback, | |
771 &ldp)) | |
772 { | |
773 tag_insn_info *i = new tag_insn_info (insn, dest, base, offset, | |
774 writeback, ldp); | |
775 rtx tag = GEN_INT (i->tag ()); | |
776 tag_map.get_or_insert (tag).safe_push (i); | |
777 } | |
778 } | |
779 } | |
780 | |
781 if (dump_file) | |
782 { | |
783 fprintf (dump_file, "Loop %d: Tag map generated.\n", loop->num); | |
784 tag_map.traverse <void *, dump_insn_list> (NULL); | |
785 } | |
786 | |
787 /* Try to reduce the dataset before launching into the rename attempt. Drop | |
788 destinations in the same collision chain that appear in the same def/use | |
789 chain, all as defs. These chains will move together in a rename so | |
790 there's no point in keeping both in there. */ | |
791 tag_map.traverse <void *, single_dest_per_chain> (NULL); | |
792 } | |
793 | |
794 | |
795 /* Tag collision avoidance pass for Falkor. The pass runs in two phases for | |
796 each loop; the first phase collects all loads that we consider as | |
797 interesting for renaming into a tag-indexed map of lists. The second phase | |
798 renames the destination register of the loads in an attempt to spread out | |
799 the loads into different tags. */ | |
800 void | |
801 execute_tag_collision_avoidance () | |
802 { | |
803 struct loop *loop; | |
804 | |
805 df_set_flags (DF_RD_PRUNE_DEAD_DEFS); | |
806 df_chain_add_problem (DF_UD_CHAIN); | |
807 df_compute_regs_ever_live (true); | |
808 df_analyze (); | |
809 df_set_flags (DF_DEFER_INSN_RESCAN); | |
810 | |
811 regrename_init (true); | |
812 regrename_analyze (NULL); | |
813 | |
814 compute_bb_for_insn (); | |
815 calculate_dominance_info (CDI_DOMINATORS); | |
816 loop_optimizer_init (AVOID_CFG_MODIFICATIONS); | |
817 | |
818 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) | |
819 { | |
820 tag_map_t tag_map (512); | |
821 | |
822 record_loads (tag_map, loop); | |
823 avoid_collisions (tag_map); | |
824 if (dump_file) | |
825 { | |
826 fprintf (dump_file, "Loop %d: Completed rename.\n", loop->num); | |
827 tag_map.traverse <void *, dump_insn_list> (NULL); | |
828 } | |
829 tag_map.traverse <void *, free_insn_info> (NULL); | |
830 } | |
831 | |
832 loop_optimizer_finalize (); | |
833 free_dominance_info (CDI_DOMINATORS); | |
834 regrename_finish (); | |
835 } | |
836 | |
837 | |
838 const pass_data pass_data_tag_collision_avoidance = | |
839 { | |
840 RTL_PASS, /* type */ | |
841 "tag_collision_avoidance", /* name */ | |
842 OPTGROUP_NONE, /* optinfo_flags */ | |
843 TV_NONE, /* tv_id */ | |
844 0, /* properties_required */ | |
845 0, /* properties_provided */ | |
846 0, /* properties_destroyed */ | |
847 0, /* todo_flags_start */ | |
848 TODO_df_finish, /* todo_flags_finish */ | |
849 }; | |
850 | |
851 | |
852 class pass_tag_collision_avoidance : public rtl_opt_pass | |
853 { | |
854 public: | |
855 pass_tag_collision_avoidance (gcc::context *ctxt) | |
856 : rtl_opt_pass (pass_data_tag_collision_avoidance, ctxt) | |
857 {} | |
858 | |
859 /* opt_pass methods: */ | |
860 virtual bool gate (function *) | |
861 { | |
862 return ((aarch64_tune_params.extra_tuning_flags | |
863 & AARCH64_EXTRA_TUNE_RENAME_LOAD_REGS) | |
864 && optimize >= 2); | |
865 } | |
866 | |
867 virtual unsigned int execute (function *) | |
868 { | |
869 execute_tag_collision_avoidance (); | |
870 return 0; | |
871 } | |
872 | |
873 }; // class pass_tag_collision_avoidance | |
874 | |
875 | |
876 /* Create a new pass instance. */ | |
877 rtl_opt_pass * | |
878 make_pass_tag_collision_avoidance (gcc::context *ctxt) | |
879 { | |
880 return new pass_tag_collision_avoidance (ctxt); | |
881 } |