Mercurial > hg > CbC > GCC_original
annotate gcc/loop-invariant.c @ 16:04ced10e8804
gcc 7
author | kono |
---|---|
date | Fri, 27 Oct 2017 22:46:09 +0900 |
parents | f6334be47118 |
children | 84e7813d76e9 |
rev | line source |
---|---|
0 | 1 /* RTL-level loop invariant motion. |
16 | 2 Copyright (C) 2004-2017 Free Software Foundation, Inc. |
0 | 3 |
4 This file is part of GCC. | |
5 | |
6 GCC is free software; you can redistribute it and/or modify it | |
7 under the terms of the GNU General Public License as published by the | |
8 Free Software Foundation; either version 3, or (at your option) any | |
9 later version. | |
10 | |
11 GCC is distributed in the hope that it will be useful, but WITHOUT | |
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 for more details. | |
15 | |
16 You should have received a copy of the GNU General Public License | |
17 along with GCC; see the file COPYING3. If not see | |
18 <http://www.gnu.org/licenses/>. */ | |
19 | |
20 /* This implements the loop invariant motion pass. It is very simple | |
21 (no calls, no loads/stores, etc.). This should be sufficient to cleanup | |
22 things like address arithmetics -- other more complicated invariants should | |
23 be eliminated on GIMPLE either in tree-ssa-loop-im.c or in tree-ssa-pre.c. | |
24 | |
25 We proceed loop by loop -- it is simpler than trying to handle things | |
26 globally and should not lose much. First we inspect all sets inside loop | |
27 and create a dependency graph on insns (saying "to move this insn, you must | |
28 also move the following insns"). | |
29 | |
30 We then need to determine what to move. We estimate the number of registers | |
31 used and move as many invariants as possible while we still have enough free | |
32 registers. We prefer the expensive invariants. | |
33 | |
34 Then we move the selected invariants out of the loop, creating a new | |
35 temporaries for them if necessary. */ | |
36 | |
37 #include "config.h" | |
38 #include "system.h" | |
39 #include "coretypes.h" | |
16 | 40 #include "backend.h" |
41 #include "target.h" | |
0 | 42 #include "rtl.h" |
16 | 43 #include "tree.h" |
44 #include "cfghooks.h" | |
45 #include "df.h" | |
46 #include "memmodel.h" | |
0 | 47 #include "tm_p.h" |
16 | 48 #include "insn-config.h" |
49 #include "regs.h" | |
50 #include "ira.h" | |
51 #include "recog.h" | |
52 #include "cfgrtl.h" | |
0 | 53 #include "cfgloop.h" |
54 #include "expr.h" | |
55 #include "params.h" | |
16 | 56 #include "rtl-iter.h" |
57 #include "dumpfile.h" | |
0 | 58 |
59 /* The data stored for the loop. */ | |
60 | |
61 struct loop_data | |
62 { | |
63 struct loop *outermost_exit; /* The outermost exit of the loop. */ | |
64 bool has_call; /* True if the loop contains a call. */ | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
65 /* Maximal register pressure inside loop for given register class |
16 | 66 (defined only for the pressure classes). */ |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
67 int max_reg_pressure[N_REG_CLASSES]; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
68 /* Loop regs referenced and live pseudo-registers. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
69 bitmap_head regs_ref; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
70 bitmap_head regs_live; |
0 | 71 }; |
72 | |
73 #define LOOP_DATA(LOOP) ((struct loop_data *) (LOOP)->aux) | |
74 | |
75 /* The description of an use. */ | |
76 | |
77 struct use | |
78 { | |
79 rtx *pos; /* Position of the use. */ | |
16 | 80 rtx_insn *insn; /* The insn in that the use occurs. */ |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
81 unsigned addr_use_p; /* Whether the use occurs in an address. */ |
0 | 82 struct use *next; /* Next use in the list. */ |
83 }; | |
84 | |
85 /* The description of a def. */ | |
86 | |
87 struct def | |
88 { | |
89 struct use *uses; /* The list of uses that are uniquely reached | |
90 by it. */ | |
91 unsigned n_uses; /* Number of such uses. */ | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
92 unsigned n_addr_uses; /* Number of uses in addresses. */ |
0 | 93 unsigned invno; /* The corresponding invariant. */ |
16 | 94 bool can_prop_to_addr_uses; /* True if the corresponding inv can be |
95 propagated into its address uses. */ | |
0 | 96 }; |
97 | |
98 /* The data stored for each invariant. */ | |
99 | |
100 struct invariant | |
101 { | |
102 /* The number of the invariant. */ | |
103 unsigned invno; | |
104 | |
105 /* The number of the invariant with the same value. */ | |
106 unsigned eqto; | |
107 | |
16 | 108 /* The number of invariants which eqto this. */ |
109 unsigned eqno; | |
0 | 110 |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
111 /* If we moved the invariant out of the loop, the original regno |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
112 that contained its value. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
113 int orig_regno; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
114 |
16 | 115 /* If we moved the invariant out of the loop, the register that contains its |
116 value. */ | |
117 rtx reg; | |
118 | |
0 | 119 /* The definition of the invariant. */ |
120 struct def *def; | |
121 | |
122 /* The insn in that it is defined. */ | |
16 | 123 rtx_insn *insn; |
0 | 124 |
125 /* Whether it is always executed. */ | |
126 bool always_executed; | |
127 | |
128 /* Whether to move the invariant. */ | |
129 bool move; | |
130 | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
131 /* Whether the invariant is cheap when used as an address. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
132 bool cheap_address; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
133 |
0 | 134 /* Cost of the invariant. */ |
135 unsigned cost; | |
136 | |
137 /* Used for detecting already visited invariants during determining | |
138 costs of movements. */ | |
139 unsigned stamp; | |
16 | 140 |
141 /* The invariants it depends on. */ | |
142 bitmap depends_on; | |
0 | 143 }; |
144 | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
145 /* Currently processed loop. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
146 static struct loop *curr_loop; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
147 |
0 | 148 /* Table of invariants indexed by the df_ref uid field. */ |
149 | |
150 static unsigned int invariant_table_size = 0; | |
151 static struct invariant ** invariant_table; | |
152 | |
153 /* Entry for hash table of invariant expressions. */ | |
154 | |
155 struct invariant_expr_entry | |
156 { | |
157 /* The invariant. */ | |
158 struct invariant *inv; | |
159 | |
160 /* Its value. */ | |
161 rtx expr; | |
162 | |
163 /* Its mode. */ | |
16 | 164 machine_mode mode; |
0 | 165 |
166 /* Its hash. */ | |
167 hashval_t hash; | |
168 }; | |
169 | |
170 /* The actual stamp for marking already visited invariants during determining | |
171 costs of movements. */ | |
172 | |
173 static unsigned actual_stamp; | |
174 | |
175 typedef struct invariant *invariant_p; | |
176 | |
177 | |
178 /* The invariants. */ | |
179 | |
16 | 180 static vec<invariant_p> invariants; |
0 | 181 |
182 /* Check the size of the invariant table and realloc if necessary. */ | |
183 | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
184 static void |
0 | 185 check_invariant_table_size (void) |
186 { | |
16 | 187 if (invariant_table_size < DF_DEFS_TABLE_SIZE ()) |
0 | 188 { |
189 unsigned int new_size = DF_DEFS_TABLE_SIZE () + (DF_DEFS_TABLE_SIZE () / 4); | |
190 invariant_table = XRESIZEVEC (struct invariant *, invariant_table, new_size); | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
191 memset (&invariant_table[invariant_table_size], 0, |
16 | 192 (new_size - invariant_table_size) * sizeof (struct invariant *)); |
0 | 193 invariant_table_size = new_size; |
194 } | |
195 } | |
196 | |
197 /* Test for possibility of invariantness of X. */ | |
198 | |
199 static bool | |
200 check_maybe_invariant (rtx x) | |
201 { | |
202 enum rtx_code code = GET_CODE (x); | |
203 int i, j; | |
204 const char *fmt; | |
205 | |
206 switch (code) | |
207 { | |
16 | 208 CASE_CONST_ANY: |
0 | 209 case SYMBOL_REF: |
210 case CONST: | |
211 case LABEL_REF: | |
212 return true; | |
213 | |
214 case PC: | |
215 case CC0: | |
216 case UNSPEC_VOLATILE: | |
217 case CALL: | |
218 return false; | |
219 | |
220 case REG: | |
221 return true; | |
222 | |
223 case MEM: | |
224 /* Load/store motion is done elsewhere. ??? Perhaps also add it here? | |
225 It should not be hard, and might be faster than "elsewhere". */ | |
226 | |
227 /* Just handle the most trivial case where we load from an unchanging | |
228 location (most importantly, pic tables). */ | |
229 if (MEM_READONLY_P (x) && !MEM_VOLATILE_P (x)) | |
230 break; | |
231 | |
232 return false; | |
233 | |
234 case ASM_OPERANDS: | |
235 /* Don't mess with insns declared volatile. */ | |
236 if (MEM_VOLATILE_P (x)) | |
237 return false; | |
238 break; | |
239 | |
240 default: | |
241 break; | |
242 } | |
243 | |
244 fmt = GET_RTX_FORMAT (code); | |
245 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) | |
246 { | |
247 if (fmt[i] == 'e') | |
248 { | |
249 if (!check_maybe_invariant (XEXP (x, i))) | |
250 return false; | |
251 } | |
252 else if (fmt[i] == 'E') | |
253 { | |
254 for (j = 0; j < XVECLEN (x, i); j++) | |
255 if (!check_maybe_invariant (XVECEXP (x, i, j))) | |
256 return false; | |
257 } | |
258 } | |
259 | |
260 return true; | |
261 } | |
262 | |
263 /* Returns the invariant definition for USE, or NULL if USE is not | |
264 invariant. */ | |
265 | |
266 static struct invariant * | |
267 invariant_for_use (df_ref use) | |
268 { | |
269 struct df_link *defs; | |
270 df_ref def; | |
271 basic_block bb = DF_REF_BB (use), def_bb; | |
272 | |
273 if (DF_REF_FLAGS (use) & DF_REF_READ_WRITE) | |
274 return NULL; | |
275 | |
276 defs = DF_REF_CHAIN (use); | |
277 if (!defs || defs->next) | |
278 return NULL; | |
279 def = defs->ref; | |
280 check_invariant_table_size (); | |
16 | 281 if (!invariant_table[DF_REF_ID (def)]) |
0 | 282 return NULL; |
283 | |
284 def_bb = DF_REF_BB (def); | |
285 if (!dominated_by_p (CDI_DOMINATORS, bb, def_bb)) | |
286 return NULL; | |
16 | 287 return invariant_table[DF_REF_ID (def)]; |
0 | 288 } |
289 | |
290 /* Computes hash value for invariant expression X in INSN. */ | |
291 | |
292 static hashval_t | |
16 | 293 hash_invariant_expr_1 (rtx_insn *insn, rtx x) |
0 | 294 { |
295 enum rtx_code code = GET_CODE (x); | |
296 int i, j; | |
297 const char *fmt; | |
298 hashval_t val = code; | |
299 int do_not_record_p; | |
300 df_ref use; | |
301 struct invariant *inv; | |
302 | |
303 switch (code) | |
304 { | |
16 | 305 CASE_CONST_ANY: |
0 | 306 case SYMBOL_REF: |
307 case CONST: | |
308 case LABEL_REF: | |
309 return hash_rtx (x, GET_MODE (x), &do_not_record_p, NULL, false); | |
310 | |
311 case REG: | |
312 use = df_find_use (insn, x); | |
313 if (!use) | |
314 return hash_rtx (x, GET_MODE (x), &do_not_record_p, NULL, false); | |
315 inv = invariant_for_use (use); | |
316 if (!inv) | |
317 return hash_rtx (x, GET_MODE (x), &do_not_record_p, NULL, false); | |
318 | |
319 gcc_assert (inv->eqto != ~0u); | |
320 return inv->eqto; | |
321 | |
322 default: | |
323 break; | |
324 } | |
325 | |
326 fmt = GET_RTX_FORMAT (code); | |
327 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) | |
328 { | |
329 if (fmt[i] == 'e') | |
330 val ^= hash_invariant_expr_1 (insn, XEXP (x, i)); | |
331 else if (fmt[i] == 'E') | |
332 { | |
333 for (j = 0; j < XVECLEN (x, i); j++) | |
334 val ^= hash_invariant_expr_1 (insn, XVECEXP (x, i, j)); | |
335 } | |
336 else if (fmt[i] == 'i' || fmt[i] == 'n') | |
337 val ^= XINT (x, i); | |
338 } | |
339 | |
340 return val; | |
341 } | |
342 | |
343 /* Returns true if the invariant expressions E1 and E2 used in insns INSN1 | |
344 and INSN2 have always the same value. */ | |
345 | |
346 static bool | |
16 | 347 invariant_expr_equal_p (rtx_insn *insn1, rtx e1, rtx_insn *insn2, rtx e2) |
0 | 348 { |
349 enum rtx_code code = GET_CODE (e1); | |
350 int i, j; | |
351 const char *fmt; | |
352 df_ref use1, use2; | |
353 struct invariant *inv1 = NULL, *inv2 = NULL; | |
354 rtx sub1, sub2; | |
355 | |
356 /* If mode of only one of the operands is VOIDmode, it is not equivalent to | |
357 the other one. If both are VOIDmode, we rely on the caller of this | |
358 function to verify that their modes are the same. */ | |
359 if (code != GET_CODE (e2) || GET_MODE (e1) != GET_MODE (e2)) | |
360 return false; | |
361 | |
362 switch (code) | |
363 { | |
16 | 364 CASE_CONST_ANY: |
0 | 365 case SYMBOL_REF: |
366 case CONST: | |
367 case LABEL_REF: | |
368 return rtx_equal_p (e1, e2); | |
369 | |
370 case REG: | |
371 use1 = df_find_use (insn1, e1); | |
372 use2 = df_find_use (insn2, e2); | |
373 if (use1) | |
374 inv1 = invariant_for_use (use1); | |
375 if (use2) | |
376 inv2 = invariant_for_use (use2); | |
377 | |
378 if (!inv1 && !inv2) | |
379 return rtx_equal_p (e1, e2); | |
380 | |
381 if (!inv1 || !inv2) | |
382 return false; | |
383 | |
384 gcc_assert (inv1->eqto != ~0u); | |
385 gcc_assert (inv2->eqto != ~0u); | |
386 return inv1->eqto == inv2->eqto; | |
387 | |
388 default: | |
389 break; | |
390 } | |
391 | |
392 fmt = GET_RTX_FORMAT (code); | |
393 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--) | |
394 { | |
395 if (fmt[i] == 'e') | |
396 { | |
397 sub1 = XEXP (e1, i); | |
398 sub2 = XEXP (e2, i); | |
399 | |
400 if (!invariant_expr_equal_p (insn1, sub1, insn2, sub2)) | |
401 return false; | |
402 } | |
403 | |
404 else if (fmt[i] == 'E') | |
405 { | |
406 if (XVECLEN (e1, i) != XVECLEN (e2, i)) | |
407 return false; | |
408 | |
409 for (j = 0; j < XVECLEN (e1, i); j++) | |
410 { | |
411 sub1 = XVECEXP (e1, i, j); | |
412 sub2 = XVECEXP (e2, i, j); | |
413 | |
414 if (!invariant_expr_equal_p (insn1, sub1, insn2, sub2)) | |
415 return false; | |
416 } | |
417 } | |
418 else if (fmt[i] == 'i' || fmt[i] == 'n') | |
419 { | |
420 if (XINT (e1, i) != XINT (e2, i)) | |
421 return false; | |
422 } | |
423 /* Unhandled type of subexpression, we fail conservatively. */ | |
424 else | |
425 return false; | |
426 } | |
427 | |
428 return true; | |
429 } | |
430 | |
16 | 431 struct invariant_expr_hasher : free_ptr_hash <invariant_expr_entry> |
432 { | |
433 static inline hashval_t hash (const invariant_expr_entry *); | |
434 static inline bool equal (const invariant_expr_entry *, | |
435 const invariant_expr_entry *); | |
436 }; | |
0 | 437 |
16 | 438 /* Returns hash value for invariant expression entry ENTRY. */ |
439 | |
440 inline hashval_t | |
441 invariant_expr_hasher::hash (const invariant_expr_entry *entry) | |
0 | 442 { |
443 return entry->hash; | |
444 } | |
445 | |
16 | 446 /* Compares invariant expression entries ENTRY1 and ENTRY2. */ |
0 | 447 |
16 | 448 inline bool |
449 invariant_expr_hasher::equal (const invariant_expr_entry *entry1, | |
450 const invariant_expr_entry *entry2) | |
0 | 451 { |
452 if (entry1->mode != entry2->mode) | |
453 return 0; | |
454 | |
455 return invariant_expr_equal_p (entry1->inv->insn, entry1->expr, | |
456 entry2->inv->insn, entry2->expr); | |
457 } | |
458 | |
16 | 459 typedef hash_table<invariant_expr_hasher> invariant_htab_type; |
460 | |
0 | 461 /* Checks whether invariant with value EXPR in machine mode MODE is |
462 recorded in EQ. If this is the case, return the invariant. Otherwise | |
463 insert INV to the table for this expression and return INV. */ | |
464 | |
465 static struct invariant * | |
16 | 466 find_or_insert_inv (invariant_htab_type *eq, rtx expr, machine_mode mode, |
0 | 467 struct invariant *inv) |
468 { | |
469 hashval_t hash = hash_invariant_expr_1 (inv->insn, expr); | |
470 struct invariant_expr_entry *entry; | |
471 struct invariant_expr_entry pentry; | |
16 | 472 invariant_expr_entry **slot; |
0 | 473 |
474 pentry.expr = expr; | |
475 pentry.inv = inv; | |
476 pentry.mode = mode; | |
16 | 477 slot = eq->find_slot_with_hash (&pentry, hash, INSERT); |
478 entry = *slot; | |
0 | 479 |
480 if (entry) | |
481 return entry->inv; | |
482 | |
483 entry = XNEW (struct invariant_expr_entry); | |
484 entry->inv = inv; | |
485 entry->expr = expr; | |
486 entry->mode = mode; | |
487 entry->hash = hash; | |
488 *slot = entry; | |
489 | |
490 return inv; | |
491 } | |
492 | |
493 /* Finds invariants identical to INV and records the equivalence. EQ is the | |
494 hash table of the invariants. */ | |
495 | |
496 static void | |
16 | 497 find_identical_invariants (invariant_htab_type *eq, struct invariant *inv) |
0 | 498 { |
499 unsigned depno; | |
500 bitmap_iterator bi; | |
501 struct invariant *dep; | |
502 rtx expr, set; | |
16 | 503 machine_mode mode; |
504 struct invariant *tmp; | |
0 | 505 |
506 if (inv->eqto != ~0u) | |
507 return; | |
508 | |
509 EXECUTE_IF_SET_IN_BITMAP (inv->depends_on, 0, depno, bi) | |
510 { | |
16 | 511 dep = invariants[depno]; |
0 | 512 find_identical_invariants (eq, dep); |
513 } | |
514 | |
515 set = single_set (inv->insn); | |
516 expr = SET_SRC (set); | |
517 mode = GET_MODE (expr); | |
518 if (mode == VOIDmode) | |
519 mode = GET_MODE (SET_DEST (set)); | |
16 | 520 |
521 tmp = find_or_insert_inv (eq, expr, mode, inv); | |
522 inv->eqto = tmp->invno; | |
523 | |
524 if (tmp->invno != inv->invno && inv->always_executed) | |
525 tmp->eqno++; | |
0 | 526 |
527 if (dump_file && inv->eqto != inv->invno) | |
528 fprintf (dump_file, | |
529 "Invariant %d is equivalent to invariant %d.\n", | |
530 inv->invno, inv->eqto); | |
531 } | |
532 | |
533 /* Find invariants with the same value and record the equivalences. */ | |
534 | |
535 static void | |
536 merge_identical_invariants (void) | |
537 { | |
538 unsigned i; | |
539 struct invariant *inv; | |
16 | 540 invariant_htab_type eq (invariants.length ()); |
0 | 541 |
16 | 542 FOR_EACH_VEC_ELT (invariants, i, inv) |
543 find_identical_invariants (&eq, inv); | |
0 | 544 } |
545 | |
546 /* Determines the basic blocks inside LOOP that are always executed and | |
547 stores their bitmap to ALWAYS_REACHED. MAY_EXIT is a bitmap of | |
548 basic blocks that may either exit the loop, or contain the call that | |
549 does not have to return. BODY is body of the loop obtained by | |
550 get_loop_body_in_dom_order. */ | |
551 | |
552 static void | |
553 compute_always_reached (struct loop *loop, basic_block *body, | |
554 bitmap may_exit, bitmap always_reached) | |
555 { | |
556 unsigned i; | |
557 | |
558 for (i = 0; i < loop->num_nodes; i++) | |
559 { | |
560 if (dominated_by_p (CDI_DOMINATORS, loop->latch, body[i])) | |
561 bitmap_set_bit (always_reached, i); | |
562 | |
563 if (bitmap_bit_p (may_exit, i)) | |
564 return; | |
565 } | |
566 } | |
567 | |
568 /* Finds exits out of the LOOP with body BODY. Marks blocks in that we may | |
569 exit the loop by cfg edge to HAS_EXIT and MAY_EXIT. In MAY_EXIT | |
570 additionally mark blocks that may exit due to a call. */ | |
571 | |
572 static void | |
573 find_exits (struct loop *loop, basic_block *body, | |
574 bitmap may_exit, bitmap has_exit) | |
575 { | |
576 unsigned i; | |
577 edge_iterator ei; | |
578 edge e; | |
579 struct loop *outermost_exit = loop, *aexit; | |
580 bool has_call = false; | |
16 | 581 rtx_insn *insn; |
0 | 582 |
583 for (i = 0; i < loop->num_nodes; i++) | |
584 { | |
585 if (body[i]->loop_father == loop) | |
586 { | |
587 FOR_BB_INSNS (body[i], insn) | |
588 { | |
589 if (CALL_P (insn) | |
590 && (RTL_LOOPING_CONST_OR_PURE_CALL_P (insn) | |
591 || !RTL_CONST_OR_PURE_CALL_P (insn))) | |
592 { | |
593 has_call = true; | |
594 bitmap_set_bit (may_exit, i); | |
595 break; | |
596 } | |
597 } | |
598 | |
599 FOR_EACH_EDGE (e, ei, body[i]->succs) | |
600 { | |
16 | 601 if (! flow_bb_inside_loop_p (loop, e->dest)) |
602 { | |
603 bitmap_set_bit (may_exit, i); | |
604 bitmap_set_bit (has_exit, i); | |
605 outermost_exit = find_common_loop (outermost_exit, | |
606 e->dest->loop_father); | |
607 } | |
608 /* If we enter a subloop that might never terminate treat | |
609 it like a possible exit. */ | |
610 if (flow_loop_nested_p (loop, e->dest->loop_father)) | |
611 bitmap_set_bit (may_exit, i); | |
0 | 612 } |
613 continue; | |
614 } | |
615 | |
616 /* Use the data stored for the subloop to decide whether we may exit | |
617 through it. It is sufficient to do this for header of the loop, | |
618 as other basic blocks inside it must be dominated by it. */ | |
619 if (body[i]->loop_father->header != body[i]) | |
620 continue; | |
621 | |
622 if (LOOP_DATA (body[i]->loop_father)->has_call) | |
623 { | |
624 has_call = true; | |
625 bitmap_set_bit (may_exit, i); | |
626 } | |
627 aexit = LOOP_DATA (body[i]->loop_father)->outermost_exit; | |
628 if (aexit != loop) | |
629 { | |
630 bitmap_set_bit (may_exit, i); | |
631 bitmap_set_bit (has_exit, i); | |
632 | |
633 if (flow_loop_nested_p (aexit, outermost_exit)) | |
634 outermost_exit = aexit; | |
635 } | |
636 } | |
637 | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
638 if (loop->aux == NULL) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
639 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
640 loop->aux = xcalloc (1, sizeof (struct loop_data)); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
641 bitmap_initialize (&LOOP_DATA (loop)->regs_ref, ®_obstack); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
642 bitmap_initialize (&LOOP_DATA (loop)->regs_live, ®_obstack); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
643 } |
0 | 644 LOOP_DATA (loop)->outermost_exit = outermost_exit; |
645 LOOP_DATA (loop)->has_call = has_call; | |
646 } | |
647 | |
648 /* Check whether we may assign a value to X from a register. */ | |
649 | |
650 static bool | |
651 may_assign_reg_p (rtx x) | |
652 { | |
653 return (GET_MODE (x) != VOIDmode | |
654 && GET_MODE (x) != BLKmode | |
655 && can_copy_p (GET_MODE (x)) | |
656 && (!REG_P (x) | |
657 || !HARD_REGISTER_P (x) | |
658 || REGNO_REG_CLASS (REGNO (x)) != NO_REGS)); | |
659 } | |
660 | |
661 /* Finds definitions that may correspond to invariants in LOOP with body | |
662 BODY. */ | |
663 | |
664 static void | |
16 | 665 find_defs (struct loop *loop) |
0 | 666 { |
16 | 667 if (dump_file) |
668 { | |
669 fprintf (dump_file, | |
670 "*****starting processing of loop %d ******\n", | |
671 loop->num); | |
672 } | |
0 | 673 |
674 df_remove_problem (df_chain); | |
675 df_process_deferred_rescans (); | |
676 df_chain_add_problem (DF_UD_CHAIN); | |
16 | 677 df_live_add_problem (); |
678 df_live_set_all_dirty (); | |
679 df_set_flags (DF_RD_PRUNE_DEAD_DEFS); | |
680 df_analyze_loop (loop); | |
681 check_invariant_table_size (); | |
0 | 682 |
683 if (dump_file) | |
684 { | |
685 df_dump_region (dump_file); | |
16 | 686 fprintf (dump_file, |
687 "*****ending processing of loop %d ******\n", | |
688 loop->num); | |
0 | 689 } |
690 } | |
691 | |
692 /* Creates a new invariant for definition DEF in INSN, depending on invariants | |
693 in DEPENDS_ON. ALWAYS_EXECUTED is true if the insn is always executed, | |
694 unless the program ends due to a function call. The newly created invariant | |
695 is returned. */ | |
696 | |
697 static struct invariant * | |
16 | 698 create_new_invariant (struct def *def, rtx_insn *insn, bitmap depends_on, |
0 | 699 bool always_executed) |
700 { | |
701 struct invariant *inv = XNEW (struct invariant); | |
702 rtx set = single_set (insn); | |
703 bool speed = optimize_bb_for_speed_p (BLOCK_FOR_INSN (insn)); | |
704 | |
705 inv->def = def; | |
706 inv->always_executed = always_executed; | |
707 inv->depends_on = depends_on; | |
708 | |
709 /* If the set is simple, usually by moving it we move the whole store out of | |
710 the loop. Otherwise we save only cost of the computation. */ | |
711 if (def) | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
712 { |
16 | 713 inv->cost = set_rtx_cost (set, speed); |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
714 /* ??? Try to determine cheapness of address computation. Unfortunately |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
715 the address cost is only a relative measure, we can't really compare |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
716 it with any absolute number, but only with other address costs. |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
717 But here we don't have any other addresses, so compare with a magic |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
718 number anyway. It has to be large enough to not regress PR33928 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
719 (by avoiding to move reg+8,reg+16,reg+24 invariants), but small |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
720 enough to not regress 410.bwaves either (by still moving reg+reg |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
721 invariants). |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
722 See http://gcc.gnu.org/ml/gcc-patches/2009-10/msg01210.html . */ |
16 | 723 if (SCALAR_INT_MODE_P (GET_MODE (SET_DEST (set)))) |
724 inv->cheap_address = address_cost (SET_SRC (set), word_mode, | |
725 ADDR_SPACE_GENERIC, speed) < 3; | |
726 else | |
727 inv->cheap_address = false; | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
728 } |
0 | 729 else |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
730 { |
16 | 731 inv->cost = set_src_cost (SET_SRC (set), GET_MODE (SET_DEST (set)), |
732 speed); | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
733 inv->cheap_address = false; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
734 } |
0 | 735 |
736 inv->move = false; | |
737 inv->reg = NULL_RTX; | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
738 inv->orig_regno = -1; |
0 | 739 inv->stamp = 0; |
740 inv->insn = insn; | |
741 | |
16 | 742 inv->invno = invariants.length (); |
0 | 743 inv->eqto = ~0u; |
16 | 744 |
745 /* Itself. */ | |
746 inv->eqno = 1; | |
747 | |
0 | 748 if (def) |
749 def->invno = inv->invno; | |
16 | 750 invariants.safe_push (inv); |
0 | 751 |
752 if (dump_file) | |
753 { | |
754 fprintf (dump_file, | |
755 "Set in insn %d is invariant (%d), cost %d, depends on ", | |
756 INSN_UID (insn), inv->invno, inv->cost); | |
757 dump_bitmap (dump_file, inv->depends_on); | |
758 } | |
759 | |
760 return inv; | |
761 } | |
762 | |
16 | 763 /* Return a canonical version of X for the address, from the point of view, |
764 that all multiplications are represented as MULT instead of the multiply | |
765 by a power of 2 being represented as ASHIFT. | |
766 | |
767 Callers should prepare a copy of X because this function may modify it | |
768 in place. */ | |
769 | |
770 static void | |
771 canonicalize_address_mult (rtx x) | |
772 { | |
773 subrtx_var_iterator::array_type array; | |
774 FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST) | |
775 { | |
776 rtx sub = *iter; | |
777 scalar_int_mode sub_mode; | |
778 if (is_a <scalar_int_mode> (GET_MODE (sub), &sub_mode) | |
779 && GET_CODE (sub) == ASHIFT | |
780 && CONST_INT_P (XEXP (sub, 1)) | |
781 && INTVAL (XEXP (sub, 1)) < GET_MODE_BITSIZE (sub_mode) | |
782 && INTVAL (XEXP (sub, 1)) >= 0) | |
783 { | |
784 HOST_WIDE_INT shift = INTVAL (XEXP (sub, 1)); | |
785 PUT_CODE (sub, MULT); | |
786 XEXP (sub, 1) = gen_int_mode (HOST_WIDE_INT_1 << shift, sub_mode); | |
787 iter.skip_subrtxes (); | |
788 } | |
789 } | |
790 } | |
791 | |
792 /* Maximum number of sub expressions in address. We set it to | |
793 a small integer since it's unlikely to have a complicated | |
794 address expression. */ | |
795 | |
796 #define MAX_CANON_ADDR_PARTS (5) | |
797 | |
798 /* Collect sub expressions in address X with PLUS as the seperator. | |
799 Sub expressions are stored in vector ADDR_PARTS. */ | |
800 | |
801 static void | |
802 collect_address_parts (rtx x, vec<rtx> *addr_parts) | |
803 { | |
804 subrtx_var_iterator::array_type array; | |
805 FOR_EACH_SUBRTX_VAR (iter, array, x, NONCONST) | |
806 { | |
807 rtx sub = *iter; | |
808 | |
809 if (GET_CODE (sub) != PLUS) | |
810 { | |
811 addr_parts->safe_push (sub); | |
812 iter.skip_subrtxes (); | |
813 } | |
814 } | |
815 } | |
816 | |
817 /* Compare function for sorting sub expressions X and Y based on | |
818 precedence defined for communitive operations. */ | |
819 | |
820 static int | |
821 compare_address_parts (const void *x, const void *y) | |
822 { | |
823 const rtx *rx = (const rtx *)x; | |
824 const rtx *ry = (const rtx *)y; | |
825 int px = commutative_operand_precedence (*rx); | |
826 int py = commutative_operand_precedence (*ry); | |
827 | |
828 return (py - px); | |
829 } | |
830 | |
831 /* Return a canonical version address for X by following steps: | |
832 1) Rewrite ASHIFT into MULT recursively. | |
833 2) Divide address into sub expressions with PLUS as the | |
834 separator. | |
835 3) Sort sub expressions according to precedence defined | |
836 for communative operations. | |
837 4) Simplify CONST_INT_P sub expressions. | |
838 5) Create new canonicalized address and return. | |
839 Callers should prepare a copy of X because this function may | |
840 modify it in place. */ | |
841 | |
842 static rtx | |
843 canonicalize_address (rtx x) | |
844 { | |
845 rtx res; | |
846 unsigned int i, j; | |
847 machine_mode mode = GET_MODE (x); | |
848 auto_vec<rtx, MAX_CANON_ADDR_PARTS> addr_parts; | |
849 | |
850 /* Rewrite ASHIFT into MULT. */ | |
851 canonicalize_address_mult (x); | |
852 /* Divide address into sub expressions. */ | |
853 collect_address_parts (x, &addr_parts); | |
854 /* Unlikely to have very complicated address. */ | |
855 if (addr_parts.length () < 2 | |
856 || addr_parts.length () > MAX_CANON_ADDR_PARTS) | |
857 return x; | |
858 | |
859 /* Sort sub expressions according to canonicalization precedence. */ | |
860 addr_parts.qsort (compare_address_parts); | |
861 | |
862 /* Simplify all constant int summary if possible. */ | |
863 for (i = 0; i < addr_parts.length (); i++) | |
864 if (CONST_INT_P (addr_parts[i])) | |
865 break; | |
866 | |
867 for (j = i + 1; j < addr_parts.length (); j++) | |
868 { | |
869 gcc_assert (CONST_INT_P (addr_parts[j])); | |
870 addr_parts[i] = simplify_gen_binary (PLUS, mode, | |
871 addr_parts[i], | |
872 addr_parts[j]); | |
873 } | |
874 | |
875 /* Chain PLUS operators to the left for !CONST_INT_P sub expressions. */ | |
876 res = addr_parts[0]; | |
877 for (j = 1; j < i; j++) | |
878 res = simplify_gen_binary (PLUS, mode, res, addr_parts[j]); | |
879 | |
880 /* Pickup the last CONST_INT_P sub expression. */ | |
881 if (i < addr_parts.length ()) | |
882 res = simplify_gen_binary (PLUS, mode, res, addr_parts[i]); | |
883 | |
884 return res; | |
885 } | |
886 | |
887 /* Given invariant DEF and its address USE, check if the corresponding | |
888 invariant expr can be propagated into the use or not. */ | |
889 | |
890 static bool | |
891 inv_can_prop_to_addr_use (struct def *def, df_ref use) | |
892 { | |
893 struct invariant *inv; | |
894 rtx *pos = DF_REF_REAL_LOC (use), def_set, use_set; | |
895 rtx_insn *use_insn = DF_REF_INSN (use); | |
896 rtx_insn *def_insn; | |
897 bool ok; | |
898 | |
899 inv = invariants[def->invno]; | |
900 /* No need to check if address expression is expensive. */ | |
901 if (!inv->cheap_address) | |
902 return false; | |
903 | |
904 def_insn = inv->insn; | |
905 def_set = single_set (def_insn); | |
906 if (!def_set) | |
907 return false; | |
908 | |
909 validate_unshare_change (use_insn, pos, SET_SRC (def_set), true); | |
910 ok = verify_changes (0); | |
911 /* Try harder with canonicalization in address expression. */ | |
912 if (!ok && (use_set = single_set (use_insn)) != NULL_RTX) | |
913 { | |
914 rtx src, dest, mem = NULL_RTX; | |
915 | |
916 src = SET_SRC (use_set); | |
917 dest = SET_DEST (use_set); | |
918 if (MEM_P (src)) | |
919 mem = src; | |
920 else if (MEM_P (dest)) | |
921 mem = dest; | |
922 | |
923 if (mem != NULL_RTX | |
924 && !memory_address_addr_space_p (GET_MODE (mem), | |
925 XEXP (mem, 0), | |
926 MEM_ADDR_SPACE (mem))) | |
927 { | |
928 rtx addr = canonicalize_address (copy_rtx (XEXP (mem, 0))); | |
929 if (memory_address_addr_space_p (GET_MODE (mem), | |
930 addr, MEM_ADDR_SPACE (mem))) | |
931 ok = true; | |
932 } | |
933 } | |
934 cancel_changes (0); | |
935 return ok; | |
936 } | |
937 | |
0 | 938 /* Record USE at DEF. */ |
939 | |
940 static void | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
941 record_use (struct def *def, df_ref use) |
0 | 942 { |
943 struct use *u = XNEW (struct use); | |
944 | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
945 u->pos = DF_REF_REAL_LOC (use); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
946 u->insn = DF_REF_INSN (use); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
947 u->addr_use_p = (DF_REF_TYPE (use) == DF_REF_REG_MEM_LOAD |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
948 || DF_REF_TYPE (use) == DF_REF_REG_MEM_STORE); |
0 | 949 u->next = def->uses; |
950 def->uses = u; | |
951 def->n_uses++; | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
952 if (u->addr_use_p) |
16 | 953 { |
954 /* Initialize propagation information if this is the first addr | |
955 use of the inv def. */ | |
956 if (def->n_addr_uses == 0) | |
957 def->can_prop_to_addr_uses = true; | |
958 | |
959 def->n_addr_uses++; | |
960 if (def->can_prop_to_addr_uses && !inv_can_prop_to_addr_use (def, use)) | |
961 def->can_prop_to_addr_uses = false; | |
962 } | |
0 | 963 } |
964 | |
965 /* Finds the invariants USE depends on and store them to the DEPENDS_ON | |
966 bitmap. Returns true if all dependencies of USE are known to be | |
967 loop invariants, false otherwise. */ | |
968 | |
969 static bool | |
970 check_dependency (basic_block bb, df_ref use, bitmap depends_on) | |
971 { | |
972 df_ref def; | |
973 basic_block def_bb; | |
974 struct df_link *defs; | |
975 struct def *def_data; | |
976 struct invariant *inv; | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
977 |
0 | 978 if (DF_REF_FLAGS (use) & DF_REF_READ_WRITE) |
979 return false; | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
980 |
0 | 981 defs = DF_REF_CHAIN (use); |
982 if (!defs) | |
16 | 983 { |
984 unsigned int regno = DF_REF_REGNO (use); | |
985 | |
986 /* If this is the use of an uninitialized argument register that is | |
987 likely to be spilled, do not move it lest this might extend its | |
988 lifetime and cause reload to die. This can occur for a call to | |
989 a function taking complex number arguments and moving the insns | |
990 preparing the arguments without moving the call itself wouldn't | |
991 gain much in practice. */ | |
992 if ((DF_REF_FLAGS (use) & DF_HARD_REG_LIVE) | |
993 && FUNCTION_ARG_REGNO_P (regno) | |
994 && targetm.class_likely_spilled_p (REGNO_REG_CLASS (regno))) | |
995 return false; | |
996 | |
997 return true; | |
998 } | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
999 |
0 | 1000 if (defs->next) |
1001 return false; | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1002 |
0 | 1003 def = defs->ref; |
1004 check_invariant_table_size (); | |
16 | 1005 inv = invariant_table[DF_REF_ID (def)]; |
0 | 1006 if (!inv) |
1007 return false; | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1008 |
0 | 1009 def_data = inv->def; |
1010 gcc_assert (def_data != NULL); | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1011 |
0 | 1012 def_bb = DF_REF_BB (def); |
1013 /* Note that in case bb == def_bb, we know that the definition | |
1014 dominates insn, because def has invariant_table[DF_REF_ID(def)] | |
1015 defined and we process the insns in the basic block bb | |
1016 sequentially. */ | |
1017 if (!dominated_by_p (CDI_DOMINATORS, bb, def_bb)) | |
1018 return false; | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1019 |
0 | 1020 bitmap_set_bit (depends_on, def_data->invno); |
1021 return true; | |
1022 } | |
1023 | |
1024 | |
1025 /* Finds the invariants INSN depends on and store them to the DEPENDS_ON | |
1026 bitmap. Returns true if all dependencies of INSN are known to be | |
1027 loop invariants, false otherwise. */ | |
1028 | |
1029 static bool | |
16 | 1030 check_dependencies (rtx_insn *insn, bitmap depends_on) |
0 | 1031 { |
1032 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
16 | 1033 df_ref use; |
0 | 1034 basic_block bb = BLOCK_FOR_INSN (insn); |
1035 | |
16 | 1036 FOR_EACH_INSN_INFO_USE (use, insn_info) |
1037 if (!check_dependency (bb, use, depends_on)) | |
0 | 1038 return false; |
16 | 1039 FOR_EACH_INSN_INFO_EQ_USE (use, insn_info) |
1040 if (!check_dependency (bb, use, depends_on)) | |
0 | 1041 return false; |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1042 |
0 | 1043 return true; |
1044 } | |
1045 | |
16 | 1046 /* Pre-check candidate DEST to skip the one which can not make a valid insn |
1047 during move_invariant_reg. SIMPLE is to skip HARD_REGISTER. */ | |
1048 static bool | |
1049 pre_check_invariant_p (bool simple, rtx dest) | |
1050 { | |
1051 if (simple && REG_P (dest) && DF_REG_DEF_COUNT (REGNO (dest)) > 1) | |
1052 { | |
1053 df_ref use; | |
1054 unsigned int i = REGNO (dest); | |
1055 struct df_insn_info *insn_info; | |
1056 df_ref def_rec; | |
1057 | |
1058 for (use = DF_REG_USE_CHAIN (i); use; use = DF_REF_NEXT_REG (use)) | |
1059 { | |
1060 rtx_insn *ref = DF_REF_INSN (use); | |
1061 insn_info = DF_INSN_INFO_GET (ref); | |
1062 | |
1063 FOR_EACH_INSN_INFO_DEF (def_rec, insn_info) | |
1064 if (DF_REF_REGNO (def_rec) == i) | |
1065 { | |
1066 /* Multi definitions at this stage, most likely are due to | |
1067 instruction constraints, which requires both read and write | |
1068 on the same register. Since move_invariant_reg is not | |
1069 powerful enough to handle such cases, just ignore the INV | |
1070 and leave the chance to others. */ | |
1071 return false; | |
1072 } | |
1073 } | |
1074 } | |
1075 return true; | |
1076 } | |
1077 | |
0 | 1078 /* Finds invariant in INSN. ALWAYS_REACHED is true if the insn is always |
1079 executed. ALWAYS_EXECUTED is true if the insn is always executed, | |
1080 unless the program ends due to a function call. */ | |
1081 | |
1082 static void | |
16 | 1083 find_invariant_insn (rtx_insn *insn, bool always_reached, bool always_executed) |
0 | 1084 { |
1085 df_ref ref; | |
1086 struct def *def; | |
1087 bitmap depends_on; | |
1088 rtx set, dest; | |
1089 bool simple = true; | |
1090 struct invariant *inv; | |
1091 | |
1092 /* We can't move a CC0 setter without the user. */ | |
16 | 1093 if (HAVE_cc0 && sets_cc0_p (insn)) |
0 | 1094 return; |
1095 | |
1096 set = single_set (insn); | |
1097 if (!set) | |
1098 return; | |
1099 dest = SET_DEST (set); | |
1100 | |
1101 if (!REG_P (dest) | |
1102 || HARD_REGISTER_P (dest)) | |
1103 simple = false; | |
1104 | |
16 | 1105 if (!may_assign_reg_p (dest) |
1106 || !pre_check_invariant_p (simple, dest) | |
0 | 1107 || !check_maybe_invariant (SET_SRC (set))) |
1108 return; | |
1109 | |
1110 /* If the insn can throw exception, we cannot move it at all without changing | |
1111 cfg. */ | |
1112 if (can_throw_internal (insn)) | |
1113 return; | |
1114 | |
1115 /* We cannot make trapping insn executed, unless it was executed before. */ | |
1116 if (may_trap_or_fault_p (PATTERN (insn)) && !always_reached) | |
1117 return; | |
1118 | |
1119 depends_on = BITMAP_ALLOC (NULL); | |
1120 if (!check_dependencies (insn, depends_on)) | |
1121 { | |
1122 BITMAP_FREE (depends_on); | |
1123 return; | |
1124 } | |
1125 | |
1126 if (simple) | |
1127 def = XCNEW (struct def); | |
1128 else | |
1129 def = NULL; | |
1130 | |
1131 inv = create_new_invariant (def, insn, depends_on, always_executed); | |
1132 | |
1133 if (simple) | |
1134 { | |
1135 ref = df_find_def (insn, dest); | |
1136 check_invariant_table_size (); | |
16 | 1137 invariant_table[DF_REF_ID (ref)] = inv; |
0 | 1138 } |
1139 } | |
1140 | |
1141 /* Record registers used in INSN that have a unique invariant definition. */ | |
1142 | |
1143 static void | |
16 | 1144 record_uses (rtx_insn *insn) |
0 | 1145 { |
1146 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); | |
16 | 1147 df_ref use; |
0 | 1148 struct invariant *inv; |
1149 | |
16 | 1150 FOR_EACH_INSN_INFO_USE (use, insn_info) |
0 | 1151 { |
1152 inv = invariant_for_use (use); | |
1153 if (inv) | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1154 record_use (inv->def, use); |
0 | 1155 } |
16 | 1156 FOR_EACH_INSN_INFO_EQ_USE (use, insn_info) |
0 | 1157 { |
1158 inv = invariant_for_use (use); | |
1159 if (inv) | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1160 record_use (inv->def, use); |
0 | 1161 } |
1162 } | |
1163 | |
1164 /* Finds invariants in INSN. ALWAYS_REACHED is true if the insn is always | |
1165 executed. ALWAYS_EXECUTED is true if the insn is always executed, | |
1166 unless the program ends due to a function call. */ | |
1167 | |
1168 static void | |
16 | 1169 find_invariants_insn (rtx_insn *insn, bool always_reached, bool always_executed) |
0 | 1170 { |
1171 find_invariant_insn (insn, always_reached, always_executed); | |
1172 record_uses (insn); | |
1173 } | |
1174 | |
1175 /* Finds invariants in basic block BB. ALWAYS_REACHED is true if the | |
1176 basic block is always executed. ALWAYS_EXECUTED is true if the basic | |
1177 block is always executed, unless the program ends due to a function | |
1178 call. */ | |
1179 | |
1180 static void | |
1181 find_invariants_bb (basic_block bb, bool always_reached, bool always_executed) | |
1182 { | |
16 | 1183 rtx_insn *insn; |
0 | 1184 |
1185 FOR_BB_INSNS (bb, insn) | |
1186 { | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1187 if (!NONDEBUG_INSN_P (insn)) |
0 | 1188 continue; |
1189 | |
1190 find_invariants_insn (insn, always_reached, always_executed); | |
1191 | |
1192 if (always_reached | |
1193 && CALL_P (insn) | |
1194 && (RTL_LOOPING_CONST_OR_PURE_CALL_P (insn) | |
1195 || ! RTL_CONST_OR_PURE_CALL_P (insn))) | |
1196 always_reached = false; | |
1197 } | |
1198 } | |
1199 | |
1200 /* Finds invariants in LOOP with body BODY. ALWAYS_REACHED is the bitmap of | |
1201 basic blocks in BODY that are always executed. ALWAYS_EXECUTED is the | |
1202 bitmap of basic blocks in BODY that are always executed unless the program | |
1203 ends due to a function call. */ | |
1204 | |
1205 static void | |
1206 find_invariants_body (struct loop *loop, basic_block *body, | |
1207 bitmap always_reached, bitmap always_executed) | |
1208 { | |
1209 unsigned i; | |
1210 | |
1211 for (i = 0; i < loop->num_nodes; i++) | |
1212 find_invariants_bb (body[i], | |
1213 bitmap_bit_p (always_reached, i), | |
1214 bitmap_bit_p (always_executed, i)); | |
1215 } | |
1216 | |
1217 /* Finds invariants in LOOP. */ | |
1218 | |
1219 static void | |
1220 find_invariants (struct loop *loop) | |
1221 { | |
16 | 1222 auto_bitmap may_exit; |
1223 auto_bitmap always_reached; | |
1224 auto_bitmap has_exit; | |
1225 auto_bitmap always_executed; | |
0 | 1226 basic_block *body = get_loop_body_in_dom_order (loop); |
1227 | |
1228 find_exits (loop, body, may_exit, has_exit); | |
1229 compute_always_reached (loop, body, may_exit, always_reached); | |
1230 compute_always_reached (loop, body, has_exit, always_executed); | |
1231 | |
16 | 1232 find_defs (loop); |
0 | 1233 find_invariants_body (loop, body, always_reached, always_executed); |
1234 merge_identical_invariants (); | |
1235 | |
1236 free (body); | |
1237 } | |
1238 | |
1239 /* Frees a list of uses USE. */ | |
1240 | |
1241 static void | |
1242 free_use_list (struct use *use) | |
1243 { | |
1244 struct use *next; | |
1245 | |
1246 for (; use; use = next) | |
1247 { | |
1248 next = use->next; | |
1249 free (use); | |
1250 } | |
1251 } | |
1252 | |
16 | 1253 /* Return pressure class and number of hard registers (through *NREGS) |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1254 for destination of INSN. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1255 static enum reg_class |
16 | 1256 get_pressure_class_and_nregs (rtx_insn *insn, int *nregs) |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1257 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1258 rtx reg; |
16 | 1259 enum reg_class pressure_class; |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1260 rtx set = single_set (insn); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1261 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1262 /* Considered invariant insns have only one set. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1263 gcc_assert (set != NULL_RTX); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1264 reg = SET_DEST (set); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1265 if (GET_CODE (reg) == SUBREG) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1266 reg = SUBREG_REG (reg); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1267 if (MEM_P (reg)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1268 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1269 *nregs = 0; |
16 | 1270 pressure_class = NO_REGS; |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1271 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1272 else |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1273 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1274 if (! REG_P (reg)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1275 reg = NULL_RTX; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1276 if (reg == NULL_RTX) |
16 | 1277 pressure_class = GENERAL_REGS; |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1278 else |
16 | 1279 { |
1280 pressure_class = reg_allocno_class (REGNO (reg)); | |
1281 pressure_class = ira_pressure_class_translate[pressure_class]; | |
1282 } | |
1283 *nregs | |
1284 = ira_reg_class_max_nregs[pressure_class][GET_MODE (SET_SRC (set))]; | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1285 } |
16 | 1286 return pressure_class; |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1287 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1288 |
0 | 1289 /* Calculates cost and number of registers needed for moving invariant INV |
16 | 1290 out of the loop and stores them to *COST and *REGS_NEEDED. *CL will be |
1291 the REG_CLASS of INV. Return | |
1292 -1: if INV is invalid. | |
1293 0: if INV and its depends_on have same reg_class | |
1294 1: if INV and its depends_on have different reg_classes. */ | |
0 | 1295 |
16 | 1296 static int |
1297 get_inv_cost (struct invariant *inv, int *comp_cost, unsigned *regs_needed, | |
1298 enum reg_class *cl) | |
0 | 1299 { |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1300 int i, acomp_cost; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1301 unsigned aregs_needed[N_REG_CLASSES]; |
0 | 1302 unsigned depno; |
1303 struct invariant *dep; | |
1304 bitmap_iterator bi; | |
16 | 1305 int ret = 1; |
0 | 1306 |
1307 /* Find the representative of the class of the equivalent invariants. */ | |
16 | 1308 inv = invariants[inv->eqto]; |
0 | 1309 |
1310 *comp_cost = 0; | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1311 if (! flag_ira_loop_pressure) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1312 regs_needed[0] = 0; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1313 else |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1314 { |
16 | 1315 for (i = 0; i < ira_pressure_classes_num; i++) |
1316 regs_needed[ira_pressure_classes[i]] = 0; | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1317 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1318 |
0 | 1319 if (inv->move |
1320 || inv->stamp == actual_stamp) | |
16 | 1321 return -1; |
0 | 1322 inv->stamp = actual_stamp; |
1323 | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1324 if (! flag_ira_loop_pressure) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1325 regs_needed[0]++; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1326 else |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1327 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1328 int nregs; |
16 | 1329 enum reg_class pressure_class; |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1330 |
16 | 1331 pressure_class = get_pressure_class_and_nregs (inv->insn, &nregs); |
1332 regs_needed[pressure_class] += nregs; | |
1333 *cl = pressure_class; | |
1334 ret = 0; | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1335 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1336 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1337 if (!inv->cheap_address |
16 | 1338 || inv->def->n_uses == 0 |
1339 || inv->def->n_addr_uses < inv->def->n_uses | |
1340 /* Count cost if the inv can't be propagated into address uses. */ | |
1341 || !inv->def->can_prop_to_addr_uses) | |
1342 (*comp_cost) += inv->cost * inv->eqno; | |
0 | 1343 |
1344 #ifdef STACK_REGS | |
1345 { | |
1346 /* Hoisting constant pool constants into stack regs may cost more than | |
1347 just single register. On x87, the balance is affected both by the | |
1348 small number of FP registers, and by its register stack organization, | |
1349 that forces us to add compensation code in and around the loop to | |
1350 shuffle the operands to the top of stack before use, and pop them | |
1351 from the stack after the loop finishes. | |
1352 | |
1353 To model this effect, we increase the number of registers needed for | |
1354 stack registers by two: one register push, and one register pop. | |
1355 This usually has the effect that FP constant loads from the constant | |
1356 pool are not moved out of the loop. | |
1357 | |
1358 Note that this also means that dependent invariants can not be moved. | |
1359 However, the primary purpose of this pass is to move loop invariant | |
1360 address arithmetic out of loops, and address arithmetic that depends | |
1361 on floating point constants is unlikely to ever occur. */ | |
1362 rtx set = single_set (inv->insn); | |
1363 if (set | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1364 && IS_STACK_MODE (GET_MODE (SET_SRC (set))) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1365 && constant_pool_constant_p (SET_SRC (set))) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1366 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1367 if (flag_ira_loop_pressure) |
16 | 1368 regs_needed[ira_stack_reg_pressure_class] += 2; |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1369 else |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1370 regs_needed[0] += 2; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1371 } |
0 | 1372 } |
1373 #endif | |
1374 | |
1375 EXECUTE_IF_SET_IN_BITMAP (inv->depends_on, 0, depno, bi) | |
1376 { | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1377 bool check_p; |
16 | 1378 enum reg_class dep_cl = ALL_REGS; |
1379 int dep_ret; | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1380 |
16 | 1381 dep = invariants[depno]; |
0 | 1382 |
16 | 1383 /* If DEP is moved out of the loop, it is not a depends_on any more. */ |
1384 if (dep->move) | |
1385 continue; | |
1386 | |
1387 dep_ret = get_inv_cost (dep, &acomp_cost, aregs_needed, &dep_cl); | |
0 | 1388 |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1389 if (! flag_ira_loop_pressure) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1390 check_p = aregs_needed[0] != 0; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1391 else |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1392 { |
16 | 1393 for (i = 0; i < ira_pressure_classes_num; i++) |
1394 if (aregs_needed[ira_pressure_classes[i]] != 0) | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1395 break; |
16 | 1396 check_p = i < ira_pressure_classes_num; |
1397 | |
1398 if ((dep_ret == 1) || ((dep_ret == 0) && (*cl != dep_cl))) | |
1399 { | |
1400 *cl = ALL_REGS; | |
1401 ret = 1; | |
1402 } | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1403 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1404 if (check_p |
0 | 1405 /* We need to check always_executed, since if the original value of |
1406 the invariant may be preserved, we may need to keep it in a | |
1407 separate register. TODO check whether the register has an | |
1408 use outside of the loop. */ | |
1409 && dep->always_executed | |
1410 && !dep->def->uses->next) | |
1411 { | |
1412 /* If this is a single use, after moving the dependency we will not | |
1413 need a new register. */ | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1414 if (! flag_ira_loop_pressure) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1415 aregs_needed[0]--; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1416 else |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1417 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1418 int nregs; |
16 | 1419 enum reg_class pressure_class; |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1420 |
16 | 1421 pressure_class = get_pressure_class_and_nregs (inv->insn, &nregs); |
1422 aregs_needed[pressure_class] -= nregs; | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1423 } |
0 | 1424 } |
1425 | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1426 if (! flag_ira_loop_pressure) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1427 regs_needed[0] += aregs_needed[0]; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1428 else |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1429 { |
16 | 1430 for (i = 0; i < ira_pressure_classes_num; i++) |
1431 regs_needed[ira_pressure_classes[i]] | |
1432 += aregs_needed[ira_pressure_classes[i]]; | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1433 } |
0 | 1434 (*comp_cost) += acomp_cost; |
1435 } | |
16 | 1436 return ret; |
0 | 1437 } |
1438 | |
1439 /* Calculates gain for eliminating invariant INV. REGS_USED is the number | |
1440 of registers used in the loop, NEW_REGS is the number of new variables | |
1441 already added due to the invariant motion. The number of registers needed | |
14
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
11
diff
changeset
|
1442 for it is stored in *REGS_NEEDED. SPEED and CALL_P are flags passed |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
11
diff
changeset
|
1443 through to estimate_reg_pressure_cost. */ |
0 | 1444 |
1445 static int | |
1446 gain_for_invariant (struct invariant *inv, unsigned *regs_needed, | |
14
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
11
diff
changeset
|
1447 unsigned *new_regs, unsigned regs_used, |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
11
diff
changeset
|
1448 bool speed, bool call_p) |
0 | 1449 { |
1450 int comp_cost, size_cost; | |
16 | 1451 /* Workaround -Wmaybe-uninitialized false positive during |
1452 profiledbootstrap by initializing it. */ | |
1453 enum reg_class cl = NO_REGS; | |
1454 int ret; | |
0 | 1455 |
1456 actual_stamp++; | |
1457 | |
16 | 1458 ret = get_inv_cost (inv, &comp_cost, regs_needed, &cl); |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1459 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1460 if (! flag_ira_loop_pressure) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1461 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1462 size_cost = (estimate_reg_pressure_cost (new_regs[0] + regs_needed[0], |
14
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
11
diff
changeset
|
1463 regs_used, speed, call_p) |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1464 - estimate_reg_pressure_cost (new_regs[0], |
14
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
11
diff
changeset
|
1465 regs_used, speed, call_p)); |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1466 } |
16 | 1467 else if (ret < 0) |
1468 return -1; | |
1469 else if ((ret == 0) && (cl == NO_REGS)) | |
1470 /* Hoist it anyway since it does not impact register pressure. */ | |
1471 return 1; | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1472 else |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1473 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1474 int i; |
16 | 1475 enum reg_class pressure_class; |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1476 |
16 | 1477 for (i = 0; i < ira_pressure_classes_num; i++) |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1478 { |
16 | 1479 pressure_class = ira_pressure_classes[i]; |
1480 | |
1481 if (!reg_classes_intersect_p (pressure_class, cl)) | |
1482 continue; | |
1483 | |
1484 if ((int) new_regs[pressure_class] | |
1485 + (int) regs_needed[pressure_class] | |
1486 + LOOP_DATA (curr_loop)->max_reg_pressure[pressure_class] | |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1487 + IRA_LOOP_RESERVED_REGS |
16 | 1488 > ira_class_hard_regs_num[pressure_class]) |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1489 break; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1490 } |
16 | 1491 if (i < ira_pressure_classes_num) |
9
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1492 /* There will be register pressure excess and we want not to |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1493 make this loop invariant motion. All loop invariants with |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1494 non-positive gains will be rejected in function |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1495 find_invariants_to_move. Therefore we return the negative |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1496 number here. |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1497 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1498 One could think that this rejects also expensive loop |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1499 invariant motions and this will hurt code performance. |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1500 However numerous experiments with different heuristics |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1501 taking invariant cost into account did not confirm this |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1502 assumption. There are possible explanations for this |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1503 result: |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1504 o probably all expensive invariants were already moved out |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1505 of the loop by PRE and gimple invariant motion pass. |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1506 o expensive invariant execution will be hidden by insn |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1507 scheduling or OOO processor hardware because usually such |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1508 invariants have a lot of freedom to be executed |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1509 out-of-order. |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1510 Another reason for ignoring invariant cost vs spilling cost |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1511 heuristics is also in difficulties to evaluate accurately |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1512 spill cost at this stage. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1513 return -1; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryuk& |