131
|
1 /* Tag Collision Avoidance pass for Falkor.
|
|
2 Copyright (C) 2018 Free Software Foundation, Inc.
|
|
3
|
|
4 This file is part of GCC.
|
|
5
|
|
6 GCC is free software; you can redistribute it and/or modify it
|
|
7 under the terms of the GNU General Public License as published by
|
|
8 the Free Software Foundation; either version 3, or (at your option)
|
|
9 any later version.
|
|
10
|
|
11 GCC is distributed in the hope that it will be useful, but
|
|
12 WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
14 General Public License for more details.
|
|
15
|
|
16 You should have received a copy of the GNU General Public License
|
|
17 along with GCC; see the file COPYING3. If not see
|
|
18 <http://www.gnu.org/licenses/>. */
|
|
19
|
|
20 #define IN_TARGET_CODE 1
|
|
21
|
|
22 #include "config.h"
|
|
23 #define INCLUDE_LIST
|
|
24 #include "system.h"
|
|
25 #include "coretypes.h"
|
|
26 #include "backend.h"
|
|
27 #include "target.h"
|
|
28 #include "rtl.h"
|
|
29 #include "tree.h"
|
|
30 #include "tree-pass.h"
|
|
31 #include "aarch64-protos.h"
|
|
32 #include "hash-map.h"
|
|
33 #include "cfgloop.h"
|
|
34 #include "cfgrtl.h"
|
|
35 #include "rtl-iter.h"
|
|
36 #include "df.h"
|
|
37 #include "memmodel.h"
|
|
38 #include "optabs.h"
|
|
39 #include "regs.h"
|
|
40 #include "recog.h"
|
|
41 #include "regrename.h"
|
|
42 #include "print-rtl.h"
|
|
43
|
|
44 /* The Falkor hardware prefetching system uses the encoding of the registers
|
|
45 and offsets of loads to decide which of the multiple hardware prefetchers to
|
|
46 assign the load to. This has the positive effect of accelerating prefetches
|
|
47 when all related loads with uniform strides are assigned to the same
|
|
48 prefetcher unit. The down side is that because of the way the assignment
|
|
49 works, multiple unrelated loads may end up on the same prefetch unit, thus
|
|
50 causing the unit to bounce between different sets of addresses and never
|
|
51 train correctly. The point of this pass is to avoid such collisions so that
|
|
52 unrelated loads are spread out to different prefetchers. It also makes a
|
|
53 rudimentary attempt to ensure that related loads with the same tags don't
|
|
54 get moved out unnecessarily.
|
|
55
|
|
56 Perhaps a future enhancement would be to make a more concerted attempt to
|
|
57 get related loads under the same tag. See the memcpy/memset implementation
|
|
58 for falkor in glibc to understand the kind of impact this can have on
|
|
59 falkor.
|
|
60
|
|
61 The assignment of loads is based on a tag that is computed from the encoding
|
|
62 of the first destination register (the only destination in case of LDR), the
|
|
63 base register and the offset (either the register or the immediate value, as
|
|
64 encoded in the instruction). This is what the 14 bit tag looks like:
|
|
65
|
|
66 |<- 6 bits ->|<- 4b ->|<- 4b ->|
|
|
67 --------------------------------
|
|
68 | OFFSET | SRC | DST |
|
|
69 --------------------------------
|
|
70
|
|
71 For all cases, the SRC and DST are the 4 LSB of the encoding of the register
|
|
72 in the instruction. Offset computation is more involved and is as follows:
|
|
73
|
|
74 - For register offset addressing: 4 LSB of the offset register with the MSB
|
|
75 of the 6 bits set to 1.
|
|
76
|
|
77 - For immediate offset: 4 LSB of the encoded immediate offset. The encoding
|
|
78 depends on the width of the load and is expressed as multiples of the
|
|
79 width.
|
|
80
|
|
81 - For loads with update: 4 LSB of the offset. The encoding here is the
|
|
82 exact number by which the base is offset and incremented.
|
|
83
|
|
84 Based on the above it is clear that registers 0 and 16 will result in
|
|
85 collisions, 1 and 17 and so on. This pass detects such collisions within a
|
|
86 def/use chain of the source register in a loop and tries to resolve the
|
|
87 collision by renaming one of the destination registers. */
|
|
88
|
|
89 /* Get the destination part of the tag. */
|
|
90 #define TAG_GET_DEST(__tag) ((__tag) & 0xf)
|
|
91
|
|
92 /* Get the tag with the destination part updated. */
|
|
93 #define TAG_UPDATE_DEST(__tag, __dest) (((__tag) & ~0xf) | (__dest & 0xf))
|
|
94
|
|
95 #define MAX_PREFETCH_STRIDE 2048
|
|
96
|
|
97 /* The instruction information structure. This is used to cache information
|
|
98 about the INSN that we derive when traversing through all of the insns in
|
|
99 loops. */
|
|
100 class tag_insn_info
|
|
101 {
|
|
102 public:
|
|
103 rtx_insn *insn;
|
|
104 rtx dest;
|
|
105 rtx base;
|
|
106 rtx offset;
|
|
107 bool writeback;
|
|
108 bool ldp;
|
|
109
|
|
110 tag_insn_info (rtx_insn *i, rtx d, rtx b, rtx o, bool w, bool p)
|
|
111 : insn (i), dest (d), base (b), offset (o), writeback (w), ldp (p)
|
|
112 {}
|
|
113
|
|
114 /* Compute the tag based on BASE, DEST and OFFSET of the load. */
|
|
115 unsigned tag ()
|
|
116 {
|
|
117 unsigned int_offset = 0;
|
|
118 rtx offset = this->offset;
|
|
119 unsigned dest = REGNO (this->dest);
|
|
120 unsigned base = REGNO (this->base);
|
|
121 machine_mode dest_mode = GET_MODE (this->dest);
|
|
122
|
|
123 /* Falkor does not support SVE; GET_LOAD_INFO ensures that the
|
|
124 destination mode is constant here. */
|
|
125 unsigned dest_mode_size = GET_MODE_SIZE (dest_mode).to_constant ();
|
|
126
|
|
127 /* For loads of larger than 16 bytes, the DEST part of the tag is 0. */
|
|
128 if ((dest_mode_size << this->ldp) > 16)
|
|
129 dest = 0;
|
|
130
|
|
131 if (offset && REG_P (offset))
|
|
132 int_offset = (1 << 5) | REGNO (offset);
|
|
133 else if (offset && CONST_INT_P (offset))
|
|
134 {
|
|
135 int_offset = INTVAL (offset);
|
|
136 int_offset /= dest_mode_size;
|
|
137 if (!this->writeback)
|
|
138 int_offset >>= 2;
|
|
139 }
|
|
140 return ((dest & 0xf)
|
|
141 | ((base & 0xf) << 4)
|
|
142 | ((int_offset & 0x3f) << 8));
|
|
143 }
|
|
144 };
|
|
145
|
|
146 /* Hash map to traverse and process instructions with colliding tags. */
|
|
147 typedef hash_map <rtx, auto_vec <tag_insn_info *> > tag_map_t;
|
|
148
|
|
149 /* Vector of instructions with colliding tags. */
|
|
150 typedef auto_vec <tag_insn_info *> insn_info_list_t;
|
|
151
|
|
152 /* Pair of instruction information and unavailable register set to pass to
|
|
153 CHECK_COLLIDING_TAGS. */
|
|
154 typedef std::pair <tag_insn_info *, HARD_REG_SET *> arg_pair_t;
|
|
155
|
|
156
|
|
157 /* Callback to free all tag_insn_info objects. */
|
|
158 bool
|
|
159 free_insn_info (const rtx &t ATTRIBUTE_UNUSED, insn_info_list_t *v,
|
|
160 void *arg ATTRIBUTE_UNUSED)
|
|
161 {
|
|
162 while (v->length () > 0)
|
|
163 delete v->pop ();
|
|
164
|
|
165 return true;
|
|
166 }
|
|
167
|
|
168
|
|
169 /* Add all aliases of the register to the unavailable register set. REG is the
|
|
170 smallest register number that can then be used to reference its aliases.
|
|
171 UNAVAILABLE is the hard register set to add the ignored register numbers to
|
|
172 and MODE is the mode in which the registers would have been used. */
|
|
173 static void
|
|
174 ignore_all_aliases (HARD_REG_SET *unavailable, machine_mode mode, unsigned reg)
|
|
175 {
|
|
176 add_to_hard_reg_set (unavailable, mode, reg);
|
|
177 add_to_hard_reg_set (unavailable, mode, reg + 16);
|
|
178 add_to_hard_reg_set (unavailable, mode, reg + 32);
|
|
179 add_to_hard_reg_set (unavailable, mode, reg + 48);
|
|
180 }
|
|
181
|
|
182
|
|
183 /* Callback to check which destination registers are unavailable to us for
|
|
184 renaming because of the base and offset colliding. This is a callback that
|
|
185 gets called for every name value pair (T, V) in the TAG_MAP. The ARG is an
|
|
186 std::pair of the tag_insn_info of the original insn and the hard register
|
|
187 set UNAVAILABLE that is used to record hard register numbers that cannot be
|
|
188 used for the renaming. This always returns true since we want to traverse
|
|
189 through the entire TAG_MAP. */
|
|
190 bool
|
|
191 check_colliding_tags (const rtx &t, const insn_info_list_t &v, arg_pair_t *arg)
|
|
192 {
|
|
193 HARD_REG_SET *unavailable = arg->second;
|
|
194 unsigned orig_tag = arg->first->tag ();
|
|
195 unsigned tag = INTVAL (t);
|
|
196 machine_mode mode = GET_MODE (arg->first->dest);
|
|
197
|
|
198 /* Can't collide with emptiness. */
|
|
199 if (v.length () == 0)
|
|
200 return true;
|
|
201
|
|
202 /* Drop all aliased destination registers that result in the same
|
|
203 tag. It is not necessary to drop all of them but we do anyway
|
|
204 because it is quicker than checking ranges. */
|
|
205 if (TAG_UPDATE_DEST (tag, 0) == TAG_UPDATE_DEST (orig_tag, 0))
|
|
206 ignore_all_aliases (unavailable, mode, TAG_GET_DEST (tag));
|
|
207
|
|
208 return true;
|
|
209 }
|
|
210
|
|
211
|
|
212 /* Initialize and build a set of hard register numbers UNAVAILABLE to avoid for
|
|
213 renaming. INSN_INFO is the original insn, TAG_MAP is the map of the list of
|
|
214 insns indexed by their tags, HEAD is the def/use chain head of the
|
|
215 destination register of the original insn. The routine returns the super
|
|
216 class of register classes that may be used during the renaming. */
|
|
217 static enum reg_class
|
|
218 init_unavailable (tag_insn_info *insn_info, tag_map_t &tag_map, du_head_p head,
|
|
219 HARD_REG_SET *unavailable)
|
|
220 {
|
|
221 unsigned dest = head->regno;
|
|
222 enum reg_class super_class = NO_REGS;
|
|
223 machine_mode mode = GET_MODE (insn_info->dest);
|
|
224
|
|
225 CLEAR_HARD_REG_SET (*unavailable);
|
|
226
|
|
227 for (struct du_chain *tmp = head->first; tmp; tmp = tmp->next_use)
|
|
228 {
|
|
229 if (DEBUG_INSN_P (tmp->insn))
|
|
230 continue;
|
|
231
|
|
232 IOR_COMPL_HARD_REG_SET (*unavailable, reg_class_contents[tmp->cl]);
|
|
233 super_class = reg_class_superunion[(int) super_class][(int) tmp->cl];
|
|
234 }
|
|
235
|
|
236 for (unsigned i = 0; i < FIRST_PSEUDO_REGISTER; i++)
|
|
237 if (fixed_regs[i] || global_regs[i])
|
|
238 add_to_hard_reg_set (unavailable, mode, i);
|
|
239
|
|
240 arg_pair_t arg = arg_pair_t (insn_info, unavailable);
|
|
241
|
|
242 /* Exclude all registers that would lead to collisions with other loads. */
|
|
243 tag_map.traverse <arg_pair_t *, check_colliding_tags> (&arg);
|
|
244
|
|
245 /* Finally, also ignore all aliases of the current reg. */
|
|
246 ignore_all_aliases (unavailable, mode, dest & 0xf);
|
|
247
|
|
248 return super_class;
|
|
249 }
|
|
250
|
|
251
|
|
252 /* Find a suitable and available register and rename the chain of occurrences
|
|
253 of the register defined in the def/use chain headed by HEAD in which INSN
|
|
254 exists. CUR_TAG, TAGS and TAG_MAP are used to determine which registers are
|
|
255 unavailable due to a potential collision due to the rename. The routine
|
|
256 returns the register number in case of a successful rename or -1 to indicate
|
|
257 failure. */
|
|
258 static int
|
|
259 rename_chain (tag_insn_info *insn_info, tag_map_t &tag_map, du_head_p head)
|
|
260 {
|
|
261 unsigned dest_regno = head->regno;
|
|
262
|
|
263 if (head->cannot_rename || head->renamed)
|
|
264 return -1;
|
|
265
|
|
266 HARD_REG_SET unavailable;
|
|
267
|
|
268 enum reg_class super_class = init_unavailable (insn_info, tag_map, head,
|
|
269 &unavailable);
|
|
270
|
|
271 unsigned new_regno = find_rename_reg (head, super_class, &unavailable,
|
|
272 dest_regno, false);
|
|
273
|
|
274 /* Attempt to rename as long as regrename doesn't just throw the same
|
|
275 register at us. */
|
|
276 if (new_regno != dest_regno && regrename_do_replace (head, new_regno))
|
|
277 {
|
|
278 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
279 fprintf (dump_file, "\tInsn %d: Renamed %d to %d\n",
|
|
280 INSN_UID (insn_info->insn), dest_regno, new_regno);
|
|
281
|
|
282 return new_regno;
|
|
283 }
|
|
284
|
|
285 return -1;
|
|
286 }
|
|
287
|
|
288
|
|
289 /* Return true if REGNO is not safe to rename. */
|
|
290 static bool
|
|
291 unsafe_rename_p (unsigned regno)
|
|
292 {
|
|
293 /* Avoid renaming registers used for argument passing and return value. In
|
|
294 future we could be a little less conservative and walk through the basic
|
|
295 blocks to see if there are any call or syscall sites. */
|
|
296 if (regno <= R8_REGNUM
|
|
297 || (regno >= V0_REGNUM && regno < V8_REGNUM))
|
|
298 return true;
|
|
299
|
|
300 /* Don't attempt to rename registers that may have specific meanings. */
|
|
301 switch (regno)
|
|
302 {
|
|
303 case LR_REGNUM:
|
|
304 case HARD_FRAME_POINTER_REGNUM:
|
|
305 case FRAME_POINTER_REGNUM:
|
|
306 case STACK_POINTER_REGNUM:
|
|
307 return true;
|
|
308 }
|
|
309
|
|
310 return false;
|
|
311 }
|
|
312
|
|
313
|
|
314 /* Go through the def/use chains for the register and find the chain for this
|
|
315 insn to rename. The function returns the hard register number in case of a
|
|
316 successful rename and -1 otherwise. */
|
|
317 static int
|
|
318 rename_dest (tag_insn_info *insn_info, tag_map_t &tag_map)
|
|
319 {
|
|
320 struct du_chain *chain = NULL;
|
|
321 du_head_p head = NULL;
|
|
322 int i;
|
|
323
|
|
324 unsigned dest_regno = REGNO (insn_info->dest);
|
|
325
|
|
326 if (unsafe_rename_p (dest_regno))
|
|
327 return -1;
|
|
328
|
|
329 /* Search the chain where this instruction is (one of) the root. */
|
|
330 rtx_insn *insn = insn_info->insn;
|
|
331 operand_rr_info *dest_op_info = insn_rr[INSN_UID (insn)].op_info;
|
|
332
|
|
333 for (i = 0; i < dest_op_info->n_chains; i++)
|
|
334 {
|
|
335 /* The register tracked by this chain does not match the
|
|
336 destination register of insn. */
|
|
337 if (dest_op_info->heads[i]->regno != dest_regno)
|
|
338 continue;
|
|
339
|
|
340 head = dest_op_info->heads[i];
|
|
341 /* The chain was merged in another, find the new head. */
|
|
342 if (!head->first)
|
|
343 head = regrename_chain_from_id (head->id);
|
|
344
|
|
345 for (chain = head->first; chain; chain = chain->next_use)
|
|
346 /* Found the insn in the chain, so try renaming the register in this
|
|
347 chain. */
|
|
348 if (chain->insn == insn)
|
|
349 return rename_chain (insn_info, tag_map, head);
|
|
350 }
|
|
351
|
|
352 return -1;
|
|
353 }
|
|
354
|
|
355
|
|
356 /* Flag to track if the map has changed. */
|
|
357 static bool map_changed = false;
|
|
358
|
|
359 /* The actual reallocation logic. For each vector of collisions V, try to
|
|
360 resolve the collision by attempting to rename the destination register of
|
|
361 all but one of the loads. This is a callback that is invoked for each
|
|
362 name-value pair (T, V) in TAG_MAP. The function returns true whenever it
|
|
363 returns unchanged and false otherwise to halt traversal. */
|
|
364 bool
|
|
365 avoid_collisions_1 (const rtx &t, insn_info_list_t *v, tag_map_t *tag_map)
|
|
366 {
|
|
367 /* We need at least two loads to cause a tag collision, return unchanged. */
|
|
368 if (v->length () < 2)
|
|
369 return true;
|
|
370
|
|
371 tag_insn_info *vec_start = v->pop ();
|
|
372 tag_insn_info *insn_info = vec_start;
|
|
373
|
|
374 /* Try to rename at least one register to reduce the collision. If we
|
|
375 iterate all the way through, we end up dropping one of the loads from the
|
|
376 list. This is fine because we want at most one element to ensure that a
|
|
377 subsequent rename attempt does not end up worsening the collision. */
|
|
378 do
|
|
379 {
|
|
380 int new_regno;
|
|
381
|
|
382 if ((new_regno = rename_dest (insn_info, *tag_map)) != -1)
|
|
383 {
|
|
384 rtx new_tag = GEN_INT (TAG_UPDATE_DEST (INTVAL (t), new_regno));
|
|
385
|
|
386 tag_map->get_or_insert (new_tag).safe_push (insn_info);
|
|
387 df_set_regs_ever_live (new_regno, true);
|
|
388 map_changed = true;
|
|
389 return false;
|
|
390 }
|
|
391
|
|
392 v->safe_insert (0, insn_info);
|
|
393 insn_info = v->pop ();
|
|
394 }
|
|
395 while (insn_info != vec_start);
|
|
396
|
|
397 if (dump_file)
|
|
398 fprintf (dump_file, "\t>> Failed to rename destination in insn %d\n\t>>",
|
|
399 INSN_UID (insn_info->insn));
|
|
400
|
|
401 /* Drop the last element and move on to the next tag. */
|
|
402 delete insn_info;
|
|
403 return true;
|
|
404 }
|
|
405
|
|
406
|
|
407 /* For each set of collisions, attempt to rename the registers or insert a move
|
|
408 to avoid the collision. We repeatedly traverse through TAG_MAP using
|
|
409 AVOID_COLLISIONS_1 trying to rename registers to avoid collisions until a
|
|
410 full traversal results in no change in the map. */
|
|
411 static void
|
|
412 avoid_collisions (tag_map_t &tag_map)
|
|
413 {
|
|
414 do
|
|
415 {
|
|
416 map_changed = false;
|
|
417 tag_map.traverse <tag_map_t *, avoid_collisions_1> (&tag_map);
|
|
418 }
|
|
419 while (map_changed);
|
|
420 }
|
|
421
|
|
422
|
|
423
|
|
424 /* Find the use def chain in which INSN exists and then see if there is a
|
|
425 definition inside the loop and outside it. We use this as a simple
|
|
426 approximation to determine whether the base register is an IV. The basic
|
|
427 idea is to find INSN in the use-def chains for its base register and find
|
|
428 all definitions that reach it. Of all these definitions, there should be at
|
|
429 least one definition that is a simple addition of a constant value, either
|
|
430 as a binary operation or a pre or post update.
|
|
431
|
|
432 The function returns true if the base register is estimated to be an IV. */
|
|
433 static bool
|
|
434 iv_p (rtx_insn *insn, rtx reg, struct loop *loop)
|
|
435 {
|
|
436 df_ref ause;
|
|
437 unsigned regno = REGNO (reg);
|
|
438
|
|
439 /* Ignore loads from the stack. */
|
|
440 if (regno == SP_REGNUM)
|
|
441 return false;
|
|
442
|
|
443 for (ause = DF_REG_USE_CHAIN (regno); ause; ause = DF_REF_NEXT_REG (ause))
|
|
444 {
|
|
445 if (!DF_REF_INSN_INFO (ause)
|
|
446 || !NONDEBUG_INSN_P (DF_REF_INSN (ause)))
|
|
447 continue;
|
|
448
|
|
449 if (insn != DF_REF_INSN (ause))
|
|
450 continue;
|
|
451
|
|
452 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
|
|
453 df_ref def_rec;
|
|
454
|
|
455 FOR_EACH_INSN_INFO_DEF (def_rec, insn_info)
|
|
456 {
|
|
457 rtx_insn *insn = DF_REF_INSN (def_rec);
|
|
458 basic_block bb = BLOCK_FOR_INSN (insn);
|
|
459
|
|
460 if (dominated_by_p (CDI_DOMINATORS, bb, loop->header)
|
|
461 && bb->loop_father == loop)
|
|
462 {
|
|
463 if (recog_memoized (insn) < 0)
|
|
464 continue;
|
|
465
|
|
466 rtx pat = PATTERN (insn);
|
|
467
|
|
468 /* Prefetch or clobber; unlikely to be a constant stride. The
|
|
469 falkor software prefetcher tuning is pretty conservative, so
|
|
470 its presence indicates that the access pattern is probably
|
|
471 strided but most likely with an unknown stride size or a
|
|
472 stride size that is quite large. */
|
|
473 if (GET_CODE (pat) != SET)
|
|
474 continue;
|
|
475
|
|
476 rtx x = SET_SRC (pat);
|
|
477 if (GET_CODE (x) == ZERO_EXTRACT
|
|
478 || GET_CODE (x) == ZERO_EXTEND
|
|
479 || GET_CODE (x) == SIGN_EXTEND)
|
|
480 x = XEXP (x, 0);
|
|
481
|
|
482 /* Loading the value from memory; unlikely to be a constant
|
|
483 stride. */
|
|
484 if (MEM_P (x))
|
|
485 continue;
|
|
486
|
|
487 /* An increment or decrement by a constant MODE_SIZE amount or
|
|
488 the result of a binary expression is likely to be an IV. */
|
|
489 if (GET_CODE (x) == POST_INC
|
|
490 || GET_CODE (x) == POST_DEC
|
|
491 || GET_CODE (x) == PRE_INC
|
|
492 || GET_CODE (x) == PRE_DEC)
|
|
493 return true;
|
|
494 else if (BINARY_P (x)
|
|
495 && (CONST_INT_P (XEXP (x, 0))
|
|
496 || CONST_INT_P (XEXP (x, 1))))
|
|
497 {
|
|
498 rtx stride = (CONST_INT_P (XEXP (x, 0))
|
|
499 ? XEXP (x, 0) : XEXP (x, 1));
|
|
500
|
|
501 /* Don't bother with very long strides because the prefetcher
|
|
502 is unable to train on them anyway. */
|
|
503 if (INTVAL (stride) < MAX_PREFETCH_STRIDE)
|
|
504 return true;
|
|
505 }
|
|
506 }
|
|
507 }
|
|
508 return false;
|
|
509 }
|
|
510 return false;
|
|
511 }
|
|
512
|
|
513
|
|
514 /* Return true if SRC is a strided load in the LOOP, false otherwise.
|
|
515 If it is a strided load, set the BASE and OFFSET. Also, if this is
|
|
516 a pre/post increment load, set PRE_POST to true. */
|
|
517 static bool
|
|
518 valid_src_p (rtx src, rtx_insn *insn, struct loop *loop, bool *pre_post,
|
|
519 rtx *base, rtx *offset, bool load_pair)
|
|
520 {
|
|
521 subrtx_var_iterator::array_type array;
|
|
522 rtx x = NULL_RTX;
|
|
523
|
|
524 FOR_EACH_SUBRTX_VAR (iter, array, src, NONCONST)
|
|
525 if (MEM_P (*iter))
|
|
526 {
|
|
527 x = *iter;
|
|
528 break;
|
|
529 }
|
|
530
|
|
531 if (!x)
|
|
532 return false;
|
|
533
|
|
534 struct aarch64_address_info addr;
|
|
535 machine_mode mode = GET_MODE (x);
|
|
536
|
|
537 if (!aarch64_classify_address (&addr, XEXP (x, 0), mode, true))
|
|
538 return false;
|
|
539
|
|
540 unsigned regno = REGNO (addr.base);
|
|
541 if (global_regs[regno] || fixed_regs[regno])
|
|
542 return false;
|
|
543
|
|
544 if (addr.type == ADDRESS_REG_WB)
|
|
545 {
|
|
546 unsigned code = GET_CODE (XEXP (x, 0));
|
|
547
|
|
548 *pre_post = true;
|
|
549 *base = addr.base;
|
|
550
|
|
551 if (code == PRE_MODIFY || code == POST_MODIFY)
|
|
552 *offset = addr.offset;
|
|
553 else
|
|
554 {
|
|
555 /*Writeback is only supported for fixed-width modes. */
|
|
556 unsigned int_offset = GET_MODE_SIZE (mode).to_constant ();
|
|
557
|
|
558 /* For post-incremented load pairs we would increment the base twice
|
|
559 over, so make that adjustment. */
|
|
560 if (load_pair && (code == POST_INC || code == POST_DEC))
|
|
561 int_offset *= 2;
|
|
562
|
|
563 *offset = GEN_INT (int_offset);
|
|
564 }
|
|
565 return true;
|
|
566 }
|
|
567 else if (addr.type == ADDRESS_REG_IMM || addr.type == ADDRESS_REG_REG)
|
|
568 {
|
|
569 /* Check if the load is strided. */
|
|
570 if (!iv_p (insn, addr.base, loop))
|
|
571 return false;
|
|
572
|
|
573 *base = addr.base;
|
|
574 *offset = addr.offset;
|
|
575 return true;
|
|
576 }
|
|
577
|
|
578 return false;
|
|
579 }
|
|
580
|
|
581
|
|
582 /* Return true if INSN is a strided load in LOOP. If it is a strided load, set
|
|
583 the DEST, BASE and OFFSET. Also, if this is a pre/post increment load, set
|
|
584 PRE_POST to true.
|
|
585
|
|
586 The routine does checks on the destination of the insn and depends on
|
|
587 STRIDED_LOAD_P to check the source and fill in the BASE and OFFSET. */
|
|
588 static bool
|
|
589 get_load_info (rtx_insn *insn, struct loop *loop, rtx *dest, rtx *base,
|
|
590 rtx *offset, bool *pre_post, bool *ldp)
|
|
591 {
|
|
592 if (!INSN_P (insn) || recog_memoized (insn) < 0)
|
|
593 return false;
|
|
594
|
|
595 rtx pat = PATTERN (insn);
|
|
596 unsigned code = GET_CODE (pat);
|
|
597 bool load_pair = (code == PARALLEL);
|
|
598
|
|
599 /* For a load pair we need only the first base and destination
|
|
600 registers. We however need to ensure that our pre/post increment
|
|
601 offset is doubled; we do that in STRIDED_LOAD_P. */
|
|
602 if (load_pair)
|
|
603 {
|
|
604 pat = XVECEXP (pat, 0, 0);
|
|
605 code = GET_CODE (pat);
|
|
606 }
|
|
607
|
|
608 if (code != SET)
|
|
609 return false;
|
|
610
|
|
611 rtx dest_rtx = SET_DEST (pat);
|
|
612
|
|
613 if (!REG_P (dest_rtx))
|
|
614 return false;
|
|
615
|
|
616 unsigned regno = REGNO (dest_rtx);
|
|
617 machine_mode mode = GET_MODE (dest_rtx);
|
|
618 machine_mode inner_mode = GET_MODE_INNER (mode);
|
|
619
|
|
620 /* Falkor does not support SVE vectors. */
|
|
621 if (!GET_MODE_SIZE (mode).is_constant ())
|
|
622 return false;
|
|
623
|
|
624 /* Ignore vector struct or lane loads. */
|
|
625 if (GET_MODE_SIZE (mode).to_constant ()
|
|
626 != GET_MODE_SIZE (inner_mode).to_constant ())
|
|
627 return false;
|
|
628
|
|
629 /* The largest width we want to bother with is a load of a pair of
|
|
630 quad-words. */
|
|
631 if ((GET_MODE_SIZE (mode).to_constant () << load_pair)
|
|
632 > GET_MODE_SIZE (OImode))
|
|
633 return false;
|
|
634
|
|
635 /* Ignore loads into the stack pointer because it is unlikely to be a
|
|
636 stream. */
|
|
637 if (regno == SP_REGNUM)
|
|
638 return false;
|
|
639
|
|
640 if (valid_src_p (SET_SRC (pat), insn, loop, pre_post, base, offset,
|
|
641 load_pair))
|
|
642 {
|
|
643 *dest = dest_rtx;
|
|
644 *ldp = load_pair;
|
|
645
|
|
646 return true;
|
|
647 }
|
|
648
|
|
649 return false;
|
|
650 }
|
|
651
|
|
652
|
|
653 /* Return whether INSN and CAND are in the same def/use chain. */
|
|
654 static bool
|
|
655 in_same_chain (rtx_insn *insn, rtx_insn *cand, unsigned regno)
|
|
656 {
|
|
657 struct du_chain *chain = NULL;
|
|
658 du_head_p head = NULL;
|
|
659 int i;
|
|
660
|
|
661 /* Search the chain where this instruction is (one of) the root. */
|
|
662 operand_rr_info *op_info = insn_rr[INSN_UID (insn)].op_info;
|
|
663
|
|
664 for (i = 0; i < op_info->n_chains; i++)
|
|
665 {
|
|
666 /* The register tracked by this chain does not match the
|
|
667 dest register of insn. */
|
|
668 if (op_info->heads[i]->regno != regno)
|
|
669 continue;
|
|
670
|
|
671 head = op_info->heads[i];
|
|
672 /* The chain was merged in another, find the new head. */
|
|
673 if (!head->first)
|
|
674 head = regrename_chain_from_id (head->id);
|
|
675
|
|
676 bool found_insn = false, found_cand = false;
|
|
677
|
|
678 for (chain = head->first; chain; chain = chain->next_use)
|
|
679 {
|
|
680 rtx *loc = &SET_DEST (PATTERN (chain->insn));
|
|
681
|
|
682 if (chain->loc != loc)
|
|
683 continue;
|
|
684
|
|
685 if (chain->insn == insn)
|
|
686 found_insn = true;
|
|
687
|
|
688 if (chain->insn == cand)
|
|
689 found_cand = true;
|
|
690
|
|
691 if (found_insn && found_cand)
|
|
692 return true;
|
|
693 }
|
|
694 }
|
|
695
|
|
696 return false;
|
|
697 }
|
|
698
|
|
699
|
|
700 /* Callback function to traverse the tag map and drop loads that have the same
|
|
701 destination and and in the same chain of occurrence. Routine always returns
|
|
702 true to allow traversal through all of TAG_MAP. */
|
|
703 bool
|
|
704 single_dest_per_chain (const rtx &t ATTRIBUTE_UNUSED, insn_info_list_t *v,
|
|
705 void *arg ATTRIBUTE_UNUSED)
|
|
706 {
|
|
707 for (int i = v->length () - 1; i>= 1; i--)
|
|
708 {
|
|
709 tag_insn_info *insn_info = (*v)[i];
|
|
710
|
|
711 for (int j = v->length () - 2; j >= 0; j--)
|
|
712 {
|
|
713 /* Filter out destinations in the same chain. */
|
|
714 if (in_same_chain (insn_info->insn, (*v)[j]->insn,
|
|
715 REGNO (insn_info->dest)))
|
|
716 {
|
|
717 v->ordered_remove (j);
|
|
718 i = v->length ();
|
|
719 break;
|
|
720 }
|
|
721 }
|
|
722 }
|
|
723
|
|
724 return true;
|
|
725 }
|
|
726
|
|
727
|
|
728 /* Callback invoked for each name-value pair (T, INSN_INFO) to dump the insn
|
|
729 list INSN_INFO for tag T. */
|
|
730 bool
|
|
731 dump_insn_list (const rtx &t, const insn_info_list_t &insn_info,
|
|
732 void *unused ATTRIBUTE_UNUSED)
|
|
733 {
|
|
734 gcc_assert (dump_file);
|
|
735 fprintf (dump_file, "Tag 0x%lx ::\n", INTVAL (t));
|
|
736
|
|
737 for (unsigned i = 0; i < insn_info.length (); i++)
|
|
738 dump_insn_slim (dump_file, insn_info[i]->insn);
|
|
739
|
|
740 fprintf (dump_file, "\n");
|
|
741
|
|
742 return true;
|
|
743 }
|
|
744
|
|
745
|
|
746 /* Record all loads in LOOP into TAG_MAP indexed by the falkor hardware
|
|
747 prefetcher memory tags. */
|
|
748 static void
|
|
749 record_loads (tag_map_t &tag_map, struct loop *loop)
|
|
750 {
|
|
751 rtx_insn *insn;
|
|
752 basic_block *body, bb;
|
|
753
|
|
754 body = get_loop_body (loop);
|
|
755
|
|
756 for (unsigned i = 0; i < loop->num_nodes; i++)
|
|
757 {
|
|
758 bb = body[i];
|
|
759 FOR_BB_INSNS (bb, insn)
|
|
760 {
|
|
761 rtx base = NULL_RTX;
|
|
762 rtx dest = NULL_RTX;
|
|
763 rtx offset = NULL_RTX;
|
|
764 bool writeback = false;
|
|
765 bool ldp = false;
|
|
766
|
|
767 if (!INSN_P (insn) || DEBUG_INSN_P (insn))
|
|
768 continue;
|
|
769
|
|
770 if (get_load_info (insn, loop, &dest, &base, &offset, &writeback,
|
|
771 &ldp))
|
|
772 {
|
|
773 tag_insn_info *i = new tag_insn_info (insn, dest, base, offset,
|
|
774 writeback, ldp);
|
|
775 rtx tag = GEN_INT (i->tag ());
|
|
776 tag_map.get_or_insert (tag).safe_push (i);
|
|
777 }
|
|
778 }
|
|
779 }
|
|
780
|
|
781 if (dump_file)
|
|
782 {
|
|
783 fprintf (dump_file, "Loop %d: Tag map generated.\n", loop->num);
|
|
784 tag_map.traverse <void *, dump_insn_list> (NULL);
|
|
785 }
|
|
786
|
|
787 /* Try to reduce the dataset before launching into the rename attempt. Drop
|
|
788 destinations in the same collision chain that appear in the same def/use
|
|
789 chain, all as defs. These chains will move together in a rename so
|
|
790 there's no point in keeping both in there. */
|
|
791 tag_map.traverse <void *, single_dest_per_chain> (NULL);
|
|
792 }
|
|
793
|
|
794
|
|
795 /* Tag collision avoidance pass for Falkor. The pass runs in two phases for
|
|
796 each loop; the first phase collects all loads that we consider as
|
|
797 interesting for renaming into a tag-indexed map of lists. The second phase
|
|
798 renames the destination register of the loads in an attempt to spread out
|
|
799 the loads into different tags. */
|
|
800 void
|
|
801 execute_tag_collision_avoidance ()
|
|
802 {
|
|
803 struct loop *loop;
|
|
804
|
|
805 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
|
|
806 df_chain_add_problem (DF_UD_CHAIN);
|
|
807 df_compute_regs_ever_live (true);
|
|
808 df_analyze ();
|
|
809 df_set_flags (DF_DEFER_INSN_RESCAN);
|
|
810
|
|
811 regrename_init (true);
|
|
812 regrename_analyze (NULL);
|
|
813
|
|
814 compute_bb_for_insn ();
|
|
815 calculate_dominance_info (CDI_DOMINATORS);
|
|
816 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
|
|
817
|
|
818 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
|
|
819 {
|
|
820 tag_map_t tag_map (512);
|
|
821
|
|
822 record_loads (tag_map, loop);
|
|
823 avoid_collisions (tag_map);
|
|
824 if (dump_file)
|
|
825 {
|
|
826 fprintf (dump_file, "Loop %d: Completed rename.\n", loop->num);
|
|
827 tag_map.traverse <void *, dump_insn_list> (NULL);
|
|
828 }
|
|
829 tag_map.traverse <void *, free_insn_info> (NULL);
|
|
830 }
|
|
831
|
|
832 loop_optimizer_finalize ();
|
|
833 free_dominance_info (CDI_DOMINATORS);
|
|
834 regrename_finish ();
|
|
835 }
|
|
836
|
|
837
|
|
838 const pass_data pass_data_tag_collision_avoidance =
|
|
839 {
|
|
840 RTL_PASS, /* type */
|
|
841 "tag_collision_avoidance", /* name */
|
|
842 OPTGROUP_NONE, /* optinfo_flags */
|
|
843 TV_NONE, /* tv_id */
|
|
844 0, /* properties_required */
|
|
845 0, /* properties_provided */
|
|
846 0, /* properties_destroyed */
|
|
847 0, /* todo_flags_start */
|
|
848 TODO_df_finish, /* todo_flags_finish */
|
|
849 };
|
|
850
|
|
851
|
|
852 class pass_tag_collision_avoidance : public rtl_opt_pass
|
|
853 {
|
|
854 public:
|
|
855 pass_tag_collision_avoidance (gcc::context *ctxt)
|
|
856 : rtl_opt_pass (pass_data_tag_collision_avoidance, ctxt)
|
|
857 {}
|
|
858
|
|
859 /* opt_pass methods: */
|
|
860 virtual bool gate (function *)
|
|
861 {
|
|
862 return ((aarch64_tune_params.extra_tuning_flags
|
|
863 & AARCH64_EXTRA_TUNE_RENAME_LOAD_REGS)
|
|
864 && optimize >= 2);
|
|
865 }
|
|
866
|
|
867 virtual unsigned int execute (function *)
|
|
868 {
|
|
869 execute_tag_collision_avoidance ();
|
|
870 return 0;
|
|
871 }
|
|
872
|
|
873 }; // class pass_tag_collision_avoidance
|
|
874
|
|
875
|
|
876 /* Create a new pass instance. */
|
|
877 rtl_opt_pass *
|
|
878 make_pass_tag_collision_avoidance (gcc::context *ctxt)
|
|
879 {
|
|
880 return new pass_tag_collision_avoidance (ctxt);
|
|
881 }
|