Mercurial > hg > CbC > CbC_gcc
annotate gcc/tree-predcom.c @ 80:d8bf5c8fdea8
fix comments
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Sat, 24 Sep 2011 02:41:02 +0900 |
parents | f6334be47118 |
children | 04ced10e8804 |
rev | line source |
---|---|
0 | 1 /* Predictive commoning. |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2 Copyright (C) 2005, 2007, 2008, 2009, 2010, 2011 |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
3 Free Software Foundation, Inc. |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
4 |
0 | 5 This file is part of GCC. |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
6 |
0 | 7 GCC is free software; you can redistribute it and/or modify it |
8 under the terms of the GNU General Public License as published by the | |
9 Free Software Foundation; either version 3, or (at your option) any | |
10 later version. | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
11 |
0 | 12 GCC is distributed in the hope that it will be useful, but WITHOUT |
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 for more details. | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
16 |
0 | 17 You should have received a copy of the GNU General Public License |
18 along with GCC; see the file COPYING3. If not see | |
19 <http://www.gnu.org/licenses/>. */ | |
20 | |
21 /* This file implements the predictive commoning optimization. Predictive | |
22 commoning can be viewed as CSE around a loop, and with some improvements, | |
23 as generalized strength reduction-- i.e., reusing values computed in | |
24 earlier iterations of a loop in the later ones. So far, the pass only | |
25 handles the most useful case, that is, reusing values of memory references. | |
26 If you think this is all just a special case of PRE, you are sort of right; | |
27 however, concentrating on loops is simpler, and makes it possible to | |
28 incorporate data dependence analysis to detect the opportunities, perform | |
29 loop unrolling to avoid copies together with renaming immediately, | |
30 and if needed, we could also take register pressure into account. | |
31 | |
32 Let us demonstrate what is done on an example: | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
33 |
0 | 34 for (i = 0; i < 100; i++) |
35 { | |
36 a[i+2] = a[i] + a[i+1]; | |
37 b[10] = b[10] + i; | |
38 c[i] = c[99 - i]; | |
39 d[i] = d[i + 1]; | |
40 } | |
41 | |
42 1) We find data references in the loop, and split them to mutually | |
43 independent groups (i.e., we find components of a data dependence | |
44 graph). We ignore read-read dependences whose distance is not constant. | |
45 (TODO -- we could also ignore antidependences). In this example, we | |
46 find the following groups: | |
47 | |
48 a[i]{read}, a[i+1]{read}, a[i+2]{write} | |
49 b[10]{read}, b[10]{write} | |
50 c[99 - i]{read}, c[i]{write} | |
51 d[i + 1]{read}, d[i]{write} | |
52 | |
53 2) Inside each of the group, we verify several conditions: | |
54 a) all the references must differ in indices only, and the indices | |
55 must all have the same step | |
56 b) the references must dominate loop latch (and thus, they must be | |
57 ordered by dominance relation). | |
58 c) the distance of the indices must be a small multiple of the step | |
59 We are then able to compute the difference of the references (# of | |
60 iterations before they point to the same place as the first of them). | |
61 Also, in case there are writes in the loop, we split the groups into | |
62 chains whose head is the write whose values are used by the reads in | |
63 the same chain. The chains are then processed independently, | |
64 making the further transformations simpler. Also, the shorter chains | |
65 need the same number of registers, but may require lower unrolling | |
66 factor in order to get rid of the copies on the loop latch. | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
67 |
0 | 68 In our example, we get the following chains (the chain for c is invalid). |
69 | |
70 a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2} | |
71 b[10]{read,+0}, b[10]{write,+0} | |
72 d[i + 1]{read,+0}, d[i]{write,+1} | |
73 | |
74 3) For each read, we determine the read or write whose value it reuses, | |
75 together with the distance of this reuse. I.e. we take the last | |
76 reference before it with distance 0, or the last of the references | |
77 with the smallest positive distance to the read. Then, we remove | |
78 the references that are not used in any of these chains, discard the | |
79 empty groups, and propagate all the links so that they point to the | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
80 single root reference of the chain (adjusting their distance |
0 | 81 appropriately). Some extra care needs to be taken for references with |
82 step 0. In our example (the numbers indicate the distance of the | |
83 reuse), | |
84 | |
85 a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*) | |
86 b[10] --> (*) 1, b[10] (*) | |
87 | |
88 4) The chains are combined together if possible. If the corresponding | |
89 elements of two chains are always combined together with the same | |
90 operator, we remember just the result of this combination, instead | |
91 of remembering the values separately. We may need to perform | |
92 reassociation to enable combining, for example | |
93 | |
94 e[i] + f[i+1] + e[i+1] + f[i] | |
95 | |
96 can be reassociated as | |
97 | |
98 (e[i] + f[i]) + (e[i+1] + f[i+1]) | |
99 | |
100 and we can combine the chains for e and f into one chain. | |
101 | |
102 5) For each root reference (end of the chain) R, let N be maximum distance | |
103 of a reference reusing its value. Variables R0 upto RN are created, | |
104 together with phi nodes that transfer values from R1 .. RN to | |
105 R0 .. R(N-1). | |
106 Initial values are loaded to R0..R(N-1) (in case not all references | |
107 must necessarily be accessed and they may trap, we may fail here; | |
108 TODO sometimes, the loads could be guarded by a check for the number | |
109 of iterations). Values loaded/stored in roots are also copied to | |
110 RN. Other reads are replaced with the appropriate variable Ri. | |
111 Everything is put to SSA form. | |
112 | |
113 As a small improvement, if R0 is dead after the root (i.e., all uses of | |
114 the value with the maximum distance dominate the root), we can avoid | |
115 creating RN and use R0 instead of it. | |
116 | |
117 In our example, we get (only the parts concerning a and b are shown): | |
118 for (i = 0; i < 100; i++) | |
119 { | |
120 f = phi (a[0], s); | |
121 s = phi (a[1], f); | |
122 x = phi (b[10], x); | |
123 | |
124 f = f + s; | |
125 a[i+2] = f; | |
126 x = x + i; | |
127 b[10] = x; | |
128 } | |
129 | |
130 6) Factor F for unrolling is determined as the smallest common multiple of | |
131 (N + 1) for each root reference (N for references for that we avoided | |
132 creating RN). If F and the loop is small enough, loop is unrolled F | |
133 times. The stores to RN (R0) in the copies of the loop body are | |
134 periodically replaced with R0, R1, ... (R1, R2, ...), so that they can | |
135 be coalesced and the copies can be eliminated. | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
136 |
0 | 137 TODO -- copy propagation and other optimizations may change the live |
138 ranges of the temporary registers and prevent them from being coalesced; | |
139 this may increase the register pressure. | |
140 | |
141 In our case, F = 2 and the (main loop of the) result is | |
142 | |
143 for (i = 0; i < ...; i += 2) | |
144 { | |
145 f = phi (a[0], f); | |
146 s = phi (a[1], s); | |
147 x = phi (b[10], x); | |
148 | |
149 f = f + s; | |
150 a[i+2] = f; | |
151 x = x + i; | |
152 b[10] = x; | |
153 | |
154 s = s + f; | |
155 a[i+3] = s; | |
156 x = x + i; | |
157 b[10] = x; | |
158 } | |
159 | |
160 TODO -- stores killing other stores can be taken into account, e.g., | |
161 for (i = 0; i < n; i++) | |
162 { | |
163 a[i] = 1; | |
164 a[i+2] = 2; | |
165 } | |
166 | |
167 can be replaced with | |
168 | |
169 t0 = a[0]; | |
170 t1 = a[1]; | |
171 for (i = 0; i < n; i++) | |
172 { | |
173 a[i] = 1; | |
174 t2 = 2; | |
175 t0 = t1; | |
176 t1 = t2; | |
177 } | |
178 a[n] = t0; | |
179 a[n+1] = t1; | |
180 | |
181 The interesting part is that this would generalize store motion; still, since | |
182 sm is performed elsewhere, it does not seem that important. | |
183 | |
184 Predictive commoning can be generalized for arbitrary computations (not | |
185 just memory loads), and also nontrivial transfer functions (e.g., replacing | |
186 i * i with ii_last + 2 * i + 1), to generalize strength reduction. */ | |
187 | |
188 #include "config.h" | |
189 #include "system.h" | |
190 #include "coretypes.h" | |
191 #include "tm.h" | |
192 #include "tree.h" | |
193 #include "tm_p.h" | |
194 #include "cfgloop.h" | |
195 #include "tree-flow.h" | |
196 #include "ggc.h" | |
197 #include "tree-data-ref.h" | |
198 #include "tree-scalar-evolution.h" | |
199 #include "tree-chrec.h" | |
200 #include "params.h" | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
201 #include "tree-pretty-print.h" |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
202 #include "gimple-pretty-print.h" |
0 | 203 #include "tree-pass.h" |
204 #include "tree-affine.h" | |
205 #include "tree-inline.h" | |
206 | |
207 /* The maximum number of iterations between the considered memory | |
208 references. */ | |
209 | |
210 #define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
211 |
0 | 212 /* Data references (or phi nodes that carry data reference values across |
213 loop iterations). */ | |
214 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
215 typedef struct dref_d |
0 | 216 { |
217 /* The reference itself. */ | |
218 struct data_reference *ref; | |
219 | |
220 /* The statement in that the reference appears. */ | |
221 gimple stmt; | |
222 | |
223 /* In case that STMT is a phi node, this field is set to the SSA name | |
224 defined by it in replace_phis_by_defined_names (in order to avoid | |
225 pointing to phi node that got reallocated in the meantime). */ | |
226 tree name_defined_by_phi; | |
227 | |
228 /* Distance of the reference from the root of the chain (in number of | |
229 iterations of the loop). */ | |
230 unsigned distance; | |
231 | |
232 /* Number of iterations offset from the first reference in the component. */ | |
233 double_int offset; | |
234 | |
235 /* Number of the reference in a component, in dominance ordering. */ | |
236 unsigned pos; | |
237 | |
238 /* True if the memory reference is always accessed when the loop is | |
239 entered. */ | |
240 unsigned always_accessed : 1; | |
241 } *dref; | |
242 | |
243 DEF_VEC_P (dref); | |
244 DEF_VEC_ALLOC_P (dref, heap); | |
245 | |
246 /* Type of the chain of the references. */ | |
247 | |
248 enum chain_type | |
249 { | |
250 /* The addresses of the references in the chain are constant. */ | |
251 CT_INVARIANT, | |
252 | |
253 /* There are only loads in the chain. */ | |
254 CT_LOAD, | |
255 | |
256 /* Root of the chain is store, the rest are loads. */ | |
257 CT_STORE_LOAD, | |
258 | |
259 /* A combination of two chains. */ | |
260 CT_COMBINATION | |
261 }; | |
262 | |
263 /* Chains of data references. */ | |
264 | |
265 typedef struct chain | |
266 { | |
267 /* Type of the chain. */ | |
268 enum chain_type type; | |
269 | |
270 /* For combination chains, the operator and the two chains that are | |
271 combined, and the type of the result. */ | |
272 enum tree_code op; | |
273 tree rslt_type; | |
274 struct chain *ch1, *ch2; | |
275 | |
276 /* The references in the chain. */ | |
277 VEC(dref,heap) *refs; | |
278 | |
279 /* The maximum distance of the reference in the chain from the root. */ | |
280 unsigned length; | |
281 | |
282 /* The variables used to copy the value throughout iterations. */ | |
283 VEC(tree,heap) *vars; | |
284 | |
285 /* Initializers for the variables. */ | |
286 VEC(tree,heap) *inits; | |
287 | |
288 /* True if there is a use of a variable with the maximal distance | |
289 that comes after the root in the loop. */ | |
290 unsigned has_max_use_after : 1; | |
291 | |
292 /* True if all the memory references in the chain are always accessed. */ | |
293 unsigned all_always_accessed : 1; | |
294 | |
295 /* True if this chain was combined together with some other chain. */ | |
296 unsigned combined : 1; | |
297 } *chain_p; | |
298 | |
299 DEF_VEC_P (chain_p); | |
300 DEF_VEC_ALLOC_P (chain_p, heap); | |
301 | |
302 /* Describes the knowledge about the step of the memory references in | |
303 the component. */ | |
304 | |
305 enum ref_step_type | |
306 { | |
307 /* The step is zero. */ | |
308 RS_INVARIANT, | |
309 | |
310 /* The step is nonzero. */ | |
311 RS_NONZERO, | |
312 | |
313 /* The step may or may not be nonzero. */ | |
314 RS_ANY | |
315 }; | |
316 | |
317 /* Components of the data dependence graph. */ | |
318 | |
319 struct component | |
320 { | |
321 /* The references in the component. */ | |
322 VEC(dref,heap) *refs; | |
323 | |
324 /* What we know about the step of the references in the component. */ | |
325 enum ref_step_type comp_step; | |
326 | |
327 /* Next component in the list. */ | |
328 struct component *next; | |
329 }; | |
330 | |
331 /* Bitmap of ssa names defined by looparound phi nodes covered by chains. */ | |
332 | |
333 static bitmap looparound_phis; | |
334 | |
335 /* Cache used by tree_to_aff_combination_expand. */ | |
336 | |
337 static struct pointer_map_t *name_expansions; | |
338 | |
339 /* Dumps data reference REF to FILE. */ | |
340 | |
341 extern void dump_dref (FILE *, dref); | |
342 void | |
343 dump_dref (FILE *file, dref ref) | |
344 { | |
345 if (ref->ref) | |
346 { | |
347 fprintf (file, " "); | |
348 print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM); | |
349 fprintf (file, " (id %u%s)\n", ref->pos, | |
350 DR_IS_READ (ref->ref) ? "" : ", write"); | |
351 | |
352 fprintf (file, " offset "); | |
353 dump_double_int (file, ref->offset, false); | |
354 fprintf (file, "\n"); | |
355 | |
356 fprintf (file, " distance %u\n", ref->distance); | |
357 } | |
358 else | |
359 { | |
360 if (gimple_code (ref->stmt) == GIMPLE_PHI) | |
361 fprintf (file, " looparound ref\n"); | |
362 else | |
363 fprintf (file, " combination ref\n"); | |
364 fprintf (file, " in statement "); | |
365 print_gimple_stmt (file, ref->stmt, 0, TDF_SLIM); | |
366 fprintf (file, "\n"); | |
367 fprintf (file, " distance %u\n", ref->distance); | |
368 } | |
369 | |
370 } | |
371 | |
372 /* Dumps CHAIN to FILE. */ | |
373 | |
374 extern void dump_chain (FILE *, chain_p); | |
375 void | |
376 dump_chain (FILE *file, chain_p chain) | |
377 { | |
378 dref a; | |
379 const char *chain_type; | |
380 unsigned i; | |
381 tree var; | |
382 | |
383 switch (chain->type) | |
384 { | |
385 case CT_INVARIANT: | |
386 chain_type = "Load motion"; | |
387 break; | |
388 | |
389 case CT_LOAD: | |
390 chain_type = "Loads-only"; | |
391 break; | |
392 | |
393 case CT_STORE_LOAD: | |
394 chain_type = "Store-loads"; | |
395 break; | |
396 | |
397 case CT_COMBINATION: | |
398 chain_type = "Combination"; | |
399 break; | |
400 | |
401 default: | |
402 gcc_unreachable (); | |
403 } | |
404 | |
405 fprintf (file, "%s chain %p%s\n", chain_type, (void *) chain, | |
406 chain->combined ? " (combined)" : ""); | |
407 if (chain->type != CT_INVARIANT) | |
408 fprintf (file, " max distance %u%s\n", chain->length, | |
409 chain->has_max_use_after ? "" : ", may reuse first"); | |
410 | |
411 if (chain->type == CT_COMBINATION) | |
412 { | |
413 fprintf (file, " equal to %p %s %p in type ", | |
414 (void *) chain->ch1, op_symbol_code (chain->op), | |
415 (void *) chain->ch2); | |
416 print_generic_expr (file, chain->rslt_type, TDF_SLIM); | |
417 fprintf (file, "\n"); | |
418 } | |
419 | |
420 if (chain->vars) | |
421 { | |
422 fprintf (file, " vars"); | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
423 FOR_EACH_VEC_ELT (tree, chain->vars, i, var) |
0 | 424 { |
425 fprintf (file, " "); | |
426 print_generic_expr (file, var, TDF_SLIM); | |
427 } | |
428 fprintf (file, "\n"); | |
429 } | |
430 | |
431 if (chain->inits) | |
432 { | |
433 fprintf (file, " inits"); | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
434 FOR_EACH_VEC_ELT (tree, chain->inits, i, var) |
0 | 435 { |
436 fprintf (file, " "); | |
437 print_generic_expr (file, var, TDF_SLIM); | |
438 } | |
439 fprintf (file, "\n"); | |
440 } | |
441 | |
442 fprintf (file, " references:\n"); | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
443 FOR_EACH_VEC_ELT (dref, chain->refs, i, a) |
0 | 444 dump_dref (file, a); |
445 | |
446 fprintf (file, "\n"); | |
447 } | |
448 | |
449 /* Dumps CHAINS to FILE. */ | |
450 | |
451 extern void dump_chains (FILE *, VEC (chain_p, heap) *); | |
452 void | |
453 dump_chains (FILE *file, VEC (chain_p, heap) *chains) | |
454 { | |
455 chain_p chain; | |
456 unsigned i; | |
457 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
458 FOR_EACH_VEC_ELT (chain_p, chains, i, chain) |
0 | 459 dump_chain (file, chain); |
460 } | |
461 | |
462 /* Dumps COMP to FILE. */ | |
463 | |
464 extern void dump_component (FILE *, struct component *); | |
465 void | |
466 dump_component (FILE *file, struct component *comp) | |
467 { | |
468 dref a; | |
469 unsigned i; | |
470 | |
471 fprintf (file, "Component%s:\n", | |
472 comp->comp_step == RS_INVARIANT ? " (invariant)" : ""); | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
473 FOR_EACH_VEC_ELT (dref, comp->refs, i, a) |
0 | 474 dump_dref (file, a); |
475 fprintf (file, "\n"); | |
476 } | |
477 | |
478 /* Dumps COMPS to FILE. */ | |
479 | |
480 extern void dump_components (FILE *, struct component *); | |
481 void | |
482 dump_components (FILE *file, struct component *comps) | |
483 { | |
484 struct component *comp; | |
485 | |
486 for (comp = comps; comp; comp = comp->next) | |
487 dump_component (file, comp); | |
488 } | |
489 | |
490 /* Frees a chain CHAIN. */ | |
491 | |
492 static void | |
493 release_chain (chain_p chain) | |
494 { | |
495 dref ref; | |
496 unsigned i; | |
497 | |
498 if (chain == NULL) | |
499 return; | |
500 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
501 FOR_EACH_VEC_ELT (dref, chain->refs, i, ref) |
0 | 502 free (ref); |
503 | |
504 VEC_free (dref, heap, chain->refs); | |
505 VEC_free (tree, heap, chain->vars); | |
506 VEC_free (tree, heap, chain->inits); | |
507 | |
508 free (chain); | |
509 } | |
510 | |
511 /* Frees CHAINS. */ | |
512 | |
513 static void | |
514 release_chains (VEC (chain_p, heap) *chains) | |
515 { | |
516 unsigned i; | |
517 chain_p chain; | |
518 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
519 FOR_EACH_VEC_ELT (chain_p, chains, i, chain) |
0 | 520 release_chain (chain); |
521 VEC_free (chain_p, heap, chains); | |
522 } | |
523 | |
524 /* Frees a component COMP. */ | |
525 | |
526 static void | |
527 release_component (struct component *comp) | |
528 { | |
529 VEC_free (dref, heap, comp->refs); | |
530 free (comp); | |
531 } | |
532 | |
533 /* Frees list of components COMPS. */ | |
534 | |
535 static void | |
536 release_components (struct component *comps) | |
537 { | |
538 struct component *act, *next; | |
539 | |
540 for (act = comps; act; act = next) | |
541 { | |
542 next = act->next; | |
543 release_component (act); | |
544 } | |
545 } | |
546 | |
547 /* Finds a root of tree given by FATHERS containing A, and performs path | |
548 shortening. */ | |
549 | |
550 static unsigned | |
551 component_of (unsigned fathers[], unsigned a) | |
552 { | |
553 unsigned root, n; | |
554 | |
555 for (root = a; root != fathers[root]; root = fathers[root]) | |
556 continue; | |
557 | |
558 for (; a != root; a = n) | |
559 { | |
560 n = fathers[a]; | |
561 fathers[a] = root; | |
562 } | |
563 | |
564 return root; | |
565 } | |
566 | |
567 /* Join operation for DFU. FATHERS gives the tree, SIZES are sizes of the | |
568 components, A and B are components to merge. */ | |
569 | |
570 static void | |
571 merge_comps (unsigned fathers[], unsigned sizes[], unsigned a, unsigned b) | |
572 { | |
573 unsigned ca = component_of (fathers, a); | |
574 unsigned cb = component_of (fathers, b); | |
575 | |
576 if (ca == cb) | |
577 return; | |
578 | |
579 if (sizes[ca] < sizes[cb]) | |
580 { | |
581 sizes[cb] += sizes[ca]; | |
582 fathers[ca] = cb; | |
583 } | |
584 else | |
585 { | |
586 sizes[ca] += sizes[cb]; | |
587 fathers[cb] = ca; | |
588 } | |
589 } | |
590 | |
591 /* Returns true if A is a reference that is suitable for predictive commoning | |
592 in the innermost loop that contains it. REF_STEP is set according to the | |
593 step of the reference A. */ | |
594 | |
595 static bool | |
596 suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step) | |
597 { | |
598 tree ref = DR_REF (a), step = DR_STEP (a); | |
599 | |
600 if (!step | |
601 || !is_gimple_reg_type (TREE_TYPE (ref)) | |
602 || tree_could_throw_p (ref)) | |
603 return false; | |
604 | |
605 if (integer_zerop (step)) | |
606 *ref_step = RS_INVARIANT; | |
607 else if (integer_nonzerop (step)) | |
608 *ref_step = RS_NONZERO; | |
609 else | |
610 *ref_step = RS_ANY; | |
611 | |
612 return true; | |
613 } | |
614 | |
615 /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET. */ | |
616 | |
617 static void | |
618 aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset) | |
619 { | |
620 aff_tree delta; | |
621 | |
622 tree_to_aff_combination_expand (DR_OFFSET (dr), sizetype, offset, | |
623 &name_expansions); | |
624 aff_combination_const (&delta, sizetype, tree_to_double_int (DR_INIT (dr))); | |
625 aff_combination_add (offset, &delta); | |
626 } | |
627 | |
628 /* Determines number of iterations of the innermost enclosing loop before B | |
629 refers to exactly the same location as A and stores it to OFF. If A and | |
630 B do not have the same step, they never meet, or anything else fails, | |
631 returns false, otherwise returns true. Both A and B are assumed to | |
632 satisfy suitable_reference_p. */ | |
633 | |
634 static bool | |
635 determine_offset (struct data_reference *a, struct data_reference *b, | |
636 double_int *off) | |
637 { | |
638 aff_tree diff, baseb, step; | |
639 tree typea, typeb; | |
640 | |
641 /* Check that both the references access the location in the same type. */ | |
642 typea = TREE_TYPE (DR_REF (a)); | |
643 typeb = TREE_TYPE (DR_REF (b)); | |
644 if (!useless_type_conversion_p (typeb, typea)) | |
645 return false; | |
646 | |
647 /* Check whether the base address and the step of both references is the | |
648 same. */ | |
649 if (!operand_equal_p (DR_STEP (a), DR_STEP (b), 0) | |
650 || !operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b), 0)) | |
651 return false; | |
652 | |
653 if (integer_zerop (DR_STEP (a))) | |
654 { | |
655 /* If the references have loop invariant address, check that they access | |
656 exactly the same location. */ | |
657 *off = double_int_zero; | |
658 return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0) | |
659 && operand_equal_p (DR_INIT (a), DR_INIT (b), 0)); | |
660 } | |
661 | |
662 /* Compare the offsets of the addresses, and check whether the difference | |
663 is a multiple of step. */ | |
664 aff_combination_dr_offset (a, &diff); | |
665 aff_combination_dr_offset (b, &baseb); | |
666 aff_combination_scale (&baseb, double_int_minus_one); | |
667 aff_combination_add (&diff, &baseb); | |
668 | |
669 tree_to_aff_combination_expand (DR_STEP (a), sizetype, | |
670 &step, &name_expansions); | |
671 return aff_combination_constant_multiple_p (&diff, &step, off); | |
672 } | |
673 | |
674 /* Returns the last basic block in LOOP for that we are sure that | |
675 it is executed whenever the loop is entered. */ | |
676 | |
677 static basic_block | |
678 last_always_executed_block (struct loop *loop) | |
679 { | |
680 unsigned i; | |
681 VEC (edge, heap) *exits = get_loop_exit_edges (loop); | |
682 edge ex; | |
683 basic_block last = loop->latch; | |
684 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
685 FOR_EACH_VEC_ELT (edge, exits, i, ex) |
0 | 686 last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src); |
687 VEC_free (edge, heap, exits); | |
688 | |
689 return last; | |
690 } | |
691 | |
692 /* Splits dependence graph on DATAREFS described by DEPENDS to components. */ | |
693 | |
694 static struct component * | |
695 split_data_refs_to_components (struct loop *loop, | |
696 VEC (data_reference_p, heap) *datarefs, | |
697 VEC (ddr_p, heap) *depends) | |
698 { | |
699 unsigned i, n = VEC_length (data_reference_p, datarefs); | |
700 unsigned ca, ia, ib, bad; | |
701 unsigned *comp_father = XNEWVEC (unsigned, n + 1); | |
702 unsigned *comp_size = XNEWVEC (unsigned, n + 1); | |
703 struct component **comps; | |
704 struct data_reference *dr, *dra, *drb; | |
705 struct data_dependence_relation *ddr; | |
706 struct component *comp_list = NULL, *comp; | |
707 dref dataref; | |
708 basic_block last_always_executed = last_always_executed_block (loop); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
709 |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
710 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr) |
0 | 711 { |
712 if (!DR_REF (dr)) | |
713 { | |
714 /* A fake reference for call or asm_expr that may clobber memory; | |
715 just fail. */ | |
716 goto end; | |
717 } | |
718 dr->aux = (void *) (size_t) i; | |
719 comp_father[i] = i; | |
720 comp_size[i] = 1; | |
721 } | |
722 | |
723 /* A component reserved for the "bad" data references. */ | |
724 comp_father[n] = n; | |
725 comp_size[n] = 1; | |
726 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
727 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr) |
0 | 728 { |
729 enum ref_step_type dummy; | |
730 | |
731 if (!suitable_reference_p (dr, &dummy)) | |
732 { | |
733 ia = (unsigned) (size_t) dr->aux; | |
734 merge_comps (comp_father, comp_size, n, ia); | |
735 } | |
736 } | |
737 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
738 FOR_EACH_VEC_ELT (ddr_p, depends, i, ddr) |
0 | 739 { |
740 double_int dummy_off; | |
741 | |
742 if (DDR_ARE_DEPENDENT (ddr) == chrec_known) | |
743 continue; | |
744 | |
745 dra = DDR_A (ddr); | |
746 drb = DDR_B (ddr); | |
747 ia = component_of (comp_father, (unsigned) (size_t) dra->aux); | |
748 ib = component_of (comp_father, (unsigned) (size_t) drb->aux); | |
749 if (ia == ib) | |
750 continue; | |
751 | |
752 bad = component_of (comp_father, n); | |
753 | |
754 /* If both A and B are reads, we may ignore unsuitable dependences. */ | |
755 if (DR_IS_READ (dra) && DR_IS_READ (drb) | |
756 && (ia == bad || ib == bad | |
757 || !determine_offset (dra, drb, &dummy_off))) | |
758 continue; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
759 |
0 | 760 merge_comps (comp_father, comp_size, ia, ib); |
761 } | |
762 | |
763 comps = XCNEWVEC (struct component *, n); | |
764 bad = component_of (comp_father, n); | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
765 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr) |
0 | 766 { |
767 ia = (unsigned) (size_t) dr->aux; | |
768 ca = component_of (comp_father, ia); | |
769 if (ca == bad) | |
770 continue; | |
771 | |
772 comp = comps[ca]; | |
773 if (!comp) | |
774 { | |
775 comp = XCNEW (struct component); | |
776 comp->refs = VEC_alloc (dref, heap, comp_size[ca]); | |
777 comps[ca] = comp; | |
778 } | |
779 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
780 dataref = XCNEW (struct dref_d); |
0 | 781 dataref->ref = dr; |
782 dataref->stmt = DR_STMT (dr); | |
783 dataref->offset = double_int_zero; | |
784 dataref->distance = 0; | |
785 | |
786 dataref->always_accessed | |
787 = dominated_by_p (CDI_DOMINATORS, last_always_executed, | |
788 gimple_bb (dataref->stmt)); | |
789 dataref->pos = VEC_length (dref, comp->refs); | |
790 VEC_quick_push (dref, comp->refs, dataref); | |
791 } | |
792 | |
793 for (i = 0; i < n; i++) | |
794 { | |
795 comp = comps[i]; | |
796 if (comp) | |
797 { | |
798 comp->next = comp_list; | |
799 comp_list = comp; | |
800 } | |
801 } | |
802 free (comps); | |
803 | |
804 end: | |
805 free (comp_father); | |
806 free (comp_size); | |
807 return comp_list; | |
808 } | |
809 | |
810 /* Returns true if the component COMP satisfies the conditions | |
811 described in 2) at the beginning of this file. LOOP is the current | |
812 loop. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
813 |
0 | 814 static bool |
815 suitable_component_p (struct loop *loop, struct component *comp) | |
816 { | |
817 unsigned i; | |
818 dref a, first; | |
819 basic_block ba, bp = loop->header; | |
820 bool ok, has_write = false; | |
821 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
822 FOR_EACH_VEC_ELT (dref, comp->refs, i, a) |
0 | 823 { |
824 ba = gimple_bb (a->stmt); | |
825 | |
826 if (!just_once_each_iteration_p (loop, ba)) | |
827 return false; | |
828 | |
829 gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp)); | |
830 bp = ba; | |
831 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
832 if (DR_IS_WRITE (a->ref)) |
0 | 833 has_write = true; |
834 } | |
835 | |
836 first = VEC_index (dref, comp->refs, 0); | |
837 ok = suitable_reference_p (first->ref, &comp->comp_step); | |
838 gcc_assert (ok); | |
839 first->offset = double_int_zero; | |
840 | |
841 for (i = 1; VEC_iterate (dref, comp->refs, i, a); i++) | |
842 { | |
843 if (!determine_offset (first->ref, a->ref, &a->offset)) | |
844 return false; | |
845 | |
846 #ifdef ENABLE_CHECKING | |
847 { | |
848 enum ref_step_type a_step; | |
849 ok = suitable_reference_p (a->ref, &a_step); | |
850 gcc_assert (ok && a_step == comp->comp_step); | |
851 } | |
852 #endif | |
853 } | |
854 | |
855 /* If there is a write inside the component, we must know whether the | |
856 step is nonzero or not -- we would not otherwise be able to recognize | |
857 whether the value accessed by reads comes from the OFFSET-th iteration | |
858 or the previous one. */ | |
859 if (has_write && comp->comp_step == RS_ANY) | |
860 return false; | |
861 | |
862 return true; | |
863 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
864 |
0 | 865 /* Check the conditions on references inside each of components COMPS, |
866 and remove the unsuitable components from the list. The new list | |
867 of components is returned. The conditions are described in 2) at | |
868 the beginning of this file. LOOP is the current loop. */ | |
869 | |
870 static struct component * | |
871 filter_suitable_components (struct loop *loop, struct component *comps) | |
872 { | |
873 struct component **comp, *act; | |
874 | |
875 for (comp = &comps; *comp; ) | |
876 { | |
877 act = *comp; | |
878 if (suitable_component_p (loop, act)) | |
879 comp = &act->next; | |
880 else | |
881 { | |
882 dref ref; | |
883 unsigned i; | |
884 | |
885 *comp = act->next; | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
886 FOR_EACH_VEC_ELT (dref, act->refs, i, ref) |
0 | 887 free (ref); |
888 release_component (act); | |
889 } | |
890 } | |
891 | |
892 return comps; | |
893 } | |
894 | |
895 /* Compares two drefs A and B by their offset and position. Callback for | |
896 qsort. */ | |
897 | |
898 static int | |
899 order_drefs (const void *a, const void *b) | |
900 { | |
901 const dref *const da = (const dref *) a; | |
902 const dref *const db = (const dref *) b; | |
903 int offcmp = double_int_scmp ((*da)->offset, (*db)->offset); | |
904 | |
905 if (offcmp != 0) | |
906 return offcmp; | |
907 | |
908 return (*da)->pos - (*db)->pos; | |
909 } | |
910 | |
911 /* Returns root of the CHAIN. */ | |
912 | |
913 static inline dref | |
914 get_chain_root (chain_p chain) | |
915 { | |
916 return VEC_index (dref, chain->refs, 0); | |
917 } | |
918 | |
919 /* Adds REF to the chain CHAIN. */ | |
920 | |
921 static void | |
922 add_ref_to_chain (chain_p chain, dref ref) | |
923 { | |
924 dref root = get_chain_root (chain); | |
925 double_int dist; | |
926 | |
927 gcc_assert (double_int_scmp (root->offset, ref->offset) <= 0); | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
928 dist = double_int_sub (ref->offset, root->offset); |
0 | 929 if (double_int_ucmp (uhwi_to_double_int (MAX_DISTANCE), dist) <= 0) |
930 { | |
931 free (ref); | |
932 return; | |
933 } | |
934 gcc_assert (double_int_fits_in_uhwi_p (dist)); | |
935 | |
936 VEC_safe_push (dref, heap, chain->refs, ref); | |
937 | |
938 ref->distance = double_int_to_uhwi (dist); | |
939 | |
940 if (ref->distance >= chain->length) | |
941 { | |
942 chain->length = ref->distance; | |
943 chain->has_max_use_after = false; | |
944 } | |
945 | |
946 if (ref->distance == chain->length | |
947 && ref->pos > root->pos) | |
948 chain->has_max_use_after = true; | |
949 | |
950 chain->all_always_accessed &= ref->always_accessed; | |
951 } | |
952 | |
953 /* Returns the chain for invariant component COMP. */ | |
954 | |
955 static chain_p | |
956 make_invariant_chain (struct component *comp) | |
957 { | |
958 chain_p chain = XCNEW (struct chain); | |
959 unsigned i; | |
960 dref ref; | |
961 | |
962 chain->type = CT_INVARIANT; | |
963 | |
964 chain->all_always_accessed = true; | |
965 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
966 FOR_EACH_VEC_ELT (dref, comp->refs, i, ref) |
0 | 967 { |
968 VEC_safe_push (dref, heap, chain->refs, ref); | |
969 chain->all_always_accessed &= ref->always_accessed; | |
970 } | |
971 | |
972 return chain; | |
973 } | |
974 | |
975 /* Make a new chain rooted at REF. */ | |
976 | |
977 static chain_p | |
978 make_rooted_chain (dref ref) | |
979 { | |
980 chain_p chain = XCNEW (struct chain); | |
981 | |
982 chain->type = DR_IS_READ (ref->ref) ? CT_LOAD : CT_STORE_LOAD; | |
983 | |
984 VEC_safe_push (dref, heap, chain->refs, ref); | |
985 chain->all_always_accessed = ref->always_accessed; | |
986 | |
987 ref->distance = 0; | |
988 | |
989 return chain; | |
990 } | |
991 | |
992 /* Returns true if CHAIN is not trivial. */ | |
993 | |
994 static bool | |
995 nontrivial_chain_p (chain_p chain) | |
996 { | |
997 return chain != NULL && VEC_length (dref, chain->refs) > 1; | |
998 } | |
999 | |
1000 /* Returns the ssa name that contains the value of REF, or NULL_TREE if there | |
1001 is no such name. */ | |
1002 | |
1003 static tree | |
1004 name_for_ref (dref ref) | |
1005 { | |
1006 tree name; | |
1007 | |
1008 if (is_gimple_assign (ref->stmt)) | |
1009 { | |
1010 if (!ref->ref || DR_IS_READ (ref->ref)) | |
1011 name = gimple_assign_lhs (ref->stmt); | |
1012 else | |
1013 name = gimple_assign_rhs1 (ref->stmt); | |
1014 } | |
1015 else | |
1016 name = PHI_RESULT (ref->stmt); | |
1017 | |
1018 return (TREE_CODE (name) == SSA_NAME ? name : NULL_TREE); | |
1019 } | |
1020 | |
1021 /* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in | |
1022 iterations of the innermost enclosing loop). */ | |
1023 | |
1024 static bool | |
1025 valid_initializer_p (struct data_reference *ref, | |
1026 unsigned distance, struct data_reference *root) | |
1027 { | |
1028 aff_tree diff, base, step; | |
1029 double_int off; | |
1030 | |
1031 /* Both REF and ROOT must be accessing the same object. */ | |
1032 if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0)) | |
1033 return false; | |
1034 | |
1035 /* The initializer is defined outside of loop, hence its address must be | |
1036 invariant inside the loop. */ | |
1037 gcc_assert (integer_zerop (DR_STEP (ref))); | |
1038 | |
1039 /* If the address of the reference is invariant, initializer must access | |
1040 exactly the same location. */ | |
1041 if (integer_zerop (DR_STEP (root))) | |
1042 return (operand_equal_p (DR_OFFSET (ref), DR_OFFSET (root), 0) | |
1043 && operand_equal_p (DR_INIT (ref), DR_INIT (root), 0)); | |
1044 | |
1045 /* Verify that this index of REF is equal to the root's index at | |
1046 -DISTANCE-th iteration. */ | |
1047 aff_combination_dr_offset (root, &diff); | |
1048 aff_combination_dr_offset (ref, &base); | |
1049 aff_combination_scale (&base, double_int_minus_one); | |
1050 aff_combination_add (&diff, &base); | |
1051 | |
1052 tree_to_aff_combination_expand (DR_STEP (root), sizetype, &step, | |
1053 &name_expansions); | |
1054 if (!aff_combination_constant_multiple_p (&diff, &step, &off)) | |
1055 return false; | |
1056 | |
1057 if (!double_int_equal_p (off, uhwi_to_double_int (distance))) | |
1058 return false; | |
1059 | |
1060 return true; | |
1061 } | |
1062 | |
1063 /* Finds looparound phi node of LOOP that copies the value of REF, and if its | |
1064 initial value is correct (equal to initial value of REF shifted by one | |
1065 iteration), returns the phi node. Otherwise, NULL_TREE is returned. ROOT | |
1066 is the root of the current chain. */ | |
1067 | |
1068 static gimple | |
1069 find_looparound_phi (struct loop *loop, dref ref, dref root) | |
1070 { | |
1071 tree name, init, init_ref; | |
1072 gimple phi = NULL, init_stmt; | |
1073 edge latch = loop_latch_edge (loop); | |
1074 struct data_reference init_dr; | |
1075 gimple_stmt_iterator psi; | |
1076 | |
1077 if (is_gimple_assign (ref->stmt)) | |
1078 { | |
1079 if (DR_IS_READ (ref->ref)) | |
1080 name = gimple_assign_lhs (ref->stmt); | |
1081 else | |
1082 name = gimple_assign_rhs1 (ref->stmt); | |
1083 } | |
1084 else | |
1085 name = PHI_RESULT (ref->stmt); | |
1086 if (!name) | |
1087 return NULL; | |
1088 | |
1089 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi)) | |
1090 { | |
1091 phi = gsi_stmt (psi); | |
1092 if (PHI_ARG_DEF_FROM_EDGE (phi, latch) == name) | |
1093 break; | |
1094 } | |
1095 | |
1096 if (gsi_end_p (psi)) | |
1097 return NULL; | |
1098 | |
1099 init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop)); | |
1100 if (TREE_CODE (init) != SSA_NAME) | |
1101 return NULL; | |
1102 init_stmt = SSA_NAME_DEF_STMT (init); | |
1103 if (gimple_code (init_stmt) != GIMPLE_ASSIGN) | |
1104 return NULL; | |
1105 gcc_assert (gimple_assign_lhs (init_stmt) == init); | |
1106 | |
1107 init_ref = gimple_assign_rhs1 (init_stmt); | |
1108 if (!REFERENCE_CLASS_P (init_ref) | |
1109 && !DECL_P (init_ref)) | |
1110 return NULL; | |
1111 | |
1112 /* Analyze the behavior of INIT_REF with respect to LOOP (innermost | |
1113 loop enclosing PHI). */ | |
1114 memset (&init_dr, 0, sizeof (struct data_reference)); | |
1115 DR_REF (&init_dr) = init_ref; | |
1116 DR_STMT (&init_dr) = phi; | |
1117 if (!dr_analyze_innermost (&init_dr)) | |
1118 return NULL; | |
1119 | |
1120 if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref)) | |
1121 return NULL; | |
1122 | |
1123 return phi; | |
1124 } | |
1125 | |
1126 /* Adds a reference for the looparound copy of REF in PHI to CHAIN. */ | |
1127 | |
1128 static void | |
1129 insert_looparound_copy (chain_p chain, dref ref, gimple phi) | |
1130 { | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1131 dref nw = XCNEW (struct dref_d), aref; |
0 | 1132 unsigned i; |
1133 | |
1134 nw->stmt = phi; | |
1135 nw->distance = ref->distance + 1; | |
1136 nw->always_accessed = 1; | |
1137 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1138 FOR_EACH_VEC_ELT (dref, chain->refs, i, aref) |
0 | 1139 if (aref->distance >= nw->distance) |
1140 break; | |
1141 VEC_safe_insert (dref, heap, chain->refs, i, nw); | |
1142 | |
1143 if (nw->distance > chain->length) | |
1144 { | |
1145 chain->length = nw->distance; | |
1146 chain->has_max_use_after = false; | |
1147 } | |
1148 } | |
1149 | |
1150 /* For references in CHAIN that are copied around the LOOP (created previously | |
1151 by PRE, or by user), add the results of such copies to the chain. This | |
1152 enables us to remove the copies by unrolling, and may need less registers | |
1153 (also, it may allow us to combine chains together). */ | |
1154 | |
1155 static void | |
1156 add_looparound_copies (struct loop *loop, chain_p chain) | |
1157 { | |
1158 unsigned i; | |
1159 dref ref, root = get_chain_root (chain); | |
1160 gimple phi; | |
1161 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1162 FOR_EACH_VEC_ELT (dref, chain->refs, i, ref) |
0 | 1163 { |
1164 phi = find_looparound_phi (loop, ref, root); | |
1165 if (!phi) | |
1166 continue; | |
1167 | |
1168 bitmap_set_bit (looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi))); | |
1169 insert_looparound_copy (chain, ref, phi); | |
1170 } | |
1171 } | |
1172 | |
1173 /* Find roots of the values and determine distances in the component COMP. | |
1174 The references are redistributed into CHAINS. LOOP is the current | |
1175 loop. */ | |
1176 | |
1177 static void | |
1178 determine_roots_comp (struct loop *loop, | |
1179 struct component *comp, | |
1180 VEC (chain_p, heap) **chains) | |
1181 { | |
1182 unsigned i; | |
1183 dref a; | |
1184 chain_p chain = NULL; | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1185 double_int last_ofs = double_int_zero; |
0 | 1186 |
1187 /* Invariants are handled specially. */ | |
1188 if (comp->comp_step == RS_INVARIANT) | |
1189 { | |
1190 chain = make_invariant_chain (comp); | |
1191 VEC_safe_push (chain_p, heap, *chains, chain); | |
1192 return; | |
1193 } | |
1194 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1195 VEC_qsort (dref, comp->refs, order_drefs); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1196 |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1197 FOR_EACH_VEC_ELT (dref, comp->refs, i, a) |
0 | 1198 { |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1199 if (!chain || DR_IS_WRITE (a->ref) |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1200 || double_int_ucmp (uhwi_to_double_int (MAX_DISTANCE), |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1201 double_int_sub (a->offset, last_ofs)) <= 0) |
0 | 1202 { |
1203 if (nontrivial_chain_p (chain)) | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1204 { |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1205 add_looparound_copies (loop, chain); |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1206 VEC_safe_push (chain_p, heap, *chains, chain); |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1207 } |
0 | 1208 else |
1209 release_chain (chain); | |
1210 chain = make_rooted_chain (a); | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1211 last_ofs = a->offset; |
0 | 1212 continue; |
1213 } | |
1214 | |
1215 add_ref_to_chain (chain, a); | |
1216 } | |
1217 | |
1218 if (nontrivial_chain_p (chain)) | |
1219 { | |
1220 add_looparound_copies (loop, chain); | |
1221 VEC_safe_push (chain_p, heap, *chains, chain); | |
1222 } | |
1223 else | |
1224 release_chain (chain); | |
1225 } | |
1226 | |
1227 /* Find roots of the values and determine distances in components COMPS, and | |
1228 separates the references to CHAINS. LOOP is the current loop. */ | |
1229 | |
1230 static void | |
1231 determine_roots (struct loop *loop, | |
1232 struct component *comps, VEC (chain_p, heap) **chains) | |
1233 { | |
1234 struct component *comp; | |
1235 | |
1236 for (comp = comps; comp; comp = comp->next) | |
1237 determine_roots_comp (loop, comp, chains); | |
1238 } | |
1239 | |
1240 /* Replace the reference in statement STMT with temporary variable | |
1241 NEW_TREE. If SET is true, NEW_TREE is instead initialized to the value of | |
1242 the reference in the statement. IN_LHS is true if the reference | |
1243 is in the lhs of STMT, false if it is in rhs. */ | |
1244 | |
1245 static void | |
1246 replace_ref_with (gimple stmt, tree new_tree, bool set, bool in_lhs) | |
1247 { | |
1248 tree val; | |
1249 gimple new_stmt; | |
1250 gimple_stmt_iterator bsi, psi; | |
1251 | |
1252 if (gimple_code (stmt) == GIMPLE_PHI) | |
1253 { | |
1254 gcc_assert (!in_lhs && !set); | |
1255 | |
1256 val = PHI_RESULT (stmt); | |
1257 bsi = gsi_after_labels (gimple_bb (stmt)); | |
1258 psi = gsi_for_stmt (stmt); | |
1259 remove_phi_node (&psi, false); | |
1260 | |
1261 /* Turn the phi node into GIMPLE_ASSIGN. */ | |
1262 new_stmt = gimple_build_assign (val, new_tree); | |
1263 gsi_insert_before (&bsi, new_stmt, GSI_NEW_STMT); | |
1264 return; | |
1265 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1266 |
0 | 1267 /* Since the reference is of gimple_reg type, it should only |
1268 appear as lhs or rhs of modify statement. */ | |
1269 gcc_assert (is_gimple_assign (stmt)); | |
1270 | |
1271 bsi = gsi_for_stmt (stmt); | |
1272 | |
1273 /* If we do not need to initialize NEW_TREE, just replace the use of OLD. */ | |
1274 if (!set) | |
1275 { | |
1276 gcc_assert (!in_lhs); | |
1277 gimple_assign_set_rhs_from_tree (&bsi, new_tree); | |
1278 stmt = gsi_stmt (bsi); | |
1279 update_stmt (stmt); | |
1280 return; | |
1281 } | |
1282 | |
1283 if (in_lhs) | |
1284 { | |
1285 /* We have statement | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1286 |
0 | 1287 OLD = VAL |
1288 | |
1289 If OLD is a memory reference, then VAL is gimple_val, and we transform | |
1290 this to | |
1291 | |
1292 OLD = VAL | |
1293 NEW = VAL | |
1294 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1295 Otherwise, we are replacing a combination chain, |
0 | 1296 VAL is the expression that performs the combination, and OLD is an |
1297 SSA name. In this case, we transform the assignment to | |
1298 | |
1299 OLD = VAL | |
1300 NEW = OLD | |
1301 | |
1302 */ | |
1303 | |
1304 val = gimple_assign_lhs (stmt); | |
1305 if (TREE_CODE (val) != SSA_NAME) | |
1306 { | |
1307 gcc_assert (gimple_assign_copy_p (stmt)); | |
1308 val = gimple_assign_rhs1 (stmt); | |
1309 } | |
1310 } | |
1311 else | |
1312 { | |
1313 /* VAL = OLD | |
1314 | |
1315 is transformed to | |
1316 | |
1317 VAL = OLD | |
1318 NEW = VAL */ | |
1319 | |
1320 val = gimple_assign_lhs (stmt); | |
1321 } | |
1322 | |
1323 new_stmt = gimple_build_assign (new_tree, unshare_expr (val)); | |
1324 gsi_insert_after (&bsi, new_stmt, GSI_NEW_STMT); | |
1325 } | |
1326 | |
1327 /* Returns the reference to the address of REF in the ITER-th iteration of | |
1328 LOOP, or NULL if we fail to determine it (ITER may be negative). We | |
1329 try to preserve the original shape of the reference (not rewrite it | |
1330 as an indirect ref to the address), to make tree_could_trap_p in | |
1331 prepare_initializers_chain return false more often. */ | |
1332 | |
1333 static tree | |
1334 ref_at_iteration (struct loop *loop, tree ref, int iter) | |
1335 { | |
1336 tree idx, *idx_p, type, val, op0 = NULL_TREE, ret; | |
1337 affine_iv iv; | |
1338 bool ok; | |
1339 | |
1340 if (handled_component_p (ref)) | |
1341 { | |
1342 op0 = ref_at_iteration (loop, TREE_OPERAND (ref, 0), iter); | |
1343 if (!op0) | |
1344 return NULL_TREE; | |
1345 } | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1346 else if (!INDIRECT_REF_P (ref) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1347 && TREE_CODE (ref) != MEM_REF) |
0 | 1348 return unshare_expr (ref); |
1349 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1350 if (TREE_CODE (ref) == MEM_REF) |
0 | 1351 { |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1352 ret = unshare_expr (ref); |
0 | 1353 idx = TREE_OPERAND (ref, 0); |
1354 idx_p = &TREE_OPERAND (ret, 0); | |
1355 } | |
1356 else if (TREE_CODE (ref) == COMPONENT_REF) | |
1357 { | |
1358 /* Check that the offset is loop invariant. */ | |
1359 if (TREE_OPERAND (ref, 2) | |
1360 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (ref, 2))) | |
1361 return NULL_TREE; | |
1362 | |
1363 return build3 (COMPONENT_REF, TREE_TYPE (ref), op0, | |
1364 unshare_expr (TREE_OPERAND (ref, 1)), | |
1365 unshare_expr (TREE_OPERAND (ref, 2))); | |
1366 } | |
1367 else if (TREE_CODE (ref) == ARRAY_REF) | |
1368 { | |
1369 /* Check that the lower bound and the step are loop invariant. */ | |
1370 if (TREE_OPERAND (ref, 2) | |
1371 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (ref, 2))) | |
1372 return NULL_TREE; | |
1373 if (TREE_OPERAND (ref, 3) | |
1374 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (ref, 3))) | |
1375 return NULL_TREE; | |
1376 | |
1377 ret = build4 (ARRAY_REF, TREE_TYPE (ref), op0, NULL_TREE, | |
1378 unshare_expr (TREE_OPERAND (ref, 2)), | |
1379 unshare_expr (TREE_OPERAND (ref, 3))); | |
1380 idx = TREE_OPERAND (ref, 1); | |
1381 idx_p = &TREE_OPERAND (ret, 1); | |
1382 } | |
1383 else | |
1384 return NULL_TREE; | |
1385 | |
1386 ok = simple_iv (loop, loop, idx, &iv, true); | |
1387 if (!ok) | |
1388 return NULL_TREE; | |
1389 iv.base = expand_simple_operations (iv.base); | |
1390 if (integer_zerop (iv.step)) | |
1391 *idx_p = unshare_expr (iv.base); | |
1392 else | |
1393 { | |
1394 type = TREE_TYPE (iv.base); | |
1395 if (POINTER_TYPE_P (type)) | |
1396 { | |
1397 val = fold_build2 (MULT_EXPR, sizetype, iv.step, | |
1398 size_int (iter)); | |
1399 val = fold_build2 (POINTER_PLUS_EXPR, type, iv.base, val); | |
1400 } | |
1401 else | |
1402 { | |
1403 val = fold_build2 (MULT_EXPR, type, iv.step, | |
1404 build_int_cst_type (type, iter)); | |
1405 val = fold_build2 (PLUS_EXPR, type, iv.base, val); | |
1406 } | |
1407 *idx_p = unshare_expr (val); | |
1408 } | |
1409 | |
1410 return ret; | |
1411 } | |
1412 | |
1413 /* Get the initialization expression for the INDEX-th temporary variable | |
1414 of CHAIN. */ | |
1415 | |
1416 static tree | |
1417 get_init_expr (chain_p chain, unsigned index) | |
1418 { | |
1419 if (chain->type == CT_COMBINATION) | |
1420 { | |
1421 tree e1 = get_init_expr (chain->ch1, index); | |
1422 tree e2 = get_init_expr (chain->ch2, index); | |
1423 | |
1424 return fold_build2 (chain->op, chain->rslt_type, e1, e2); | |
1425 } | |
1426 else | |
1427 return VEC_index (tree, chain->inits, index); | |
1428 } | |
1429 | |
1430 /* Marks all virtual operands of statement STMT for renaming. */ | |
1431 | |
1432 void | |
1433 mark_virtual_ops_for_renaming (gimple stmt) | |
1434 { | |
1435 tree var; | |
1436 | |
1437 if (gimple_code (stmt) == GIMPLE_PHI) | |
1438 { | |
1439 var = PHI_RESULT (stmt); | |
1440 if (is_gimple_reg (var)) | |
1441 return; | |
1442 | |
1443 if (TREE_CODE (var) == SSA_NAME) | |
1444 var = SSA_NAME_VAR (var); | |
1445 mark_sym_for_renaming (var); | |
1446 return; | |
1447 } | |
1448 | |
1449 update_stmt (stmt); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1450 if (gimple_vuse (stmt)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1451 mark_sym_for_renaming (gimple_vop (cfun)); |
0 | 1452 } |
1453 | |
1454 /* Returns a new temporary variable used for the I-th variable carrying | |
1455 value of REF. The variable's uid is marked in TMP_VARS. */ | |
1456 | |
1457 static tree | |
1458 predcom_tmp_var (tree ref, unsigned i, bitmap tmp_vars) | |
1459 { | |
1460 tree type = TREE_TYPE (ref); | |
1461 /* We never access the components of the temporary variable in predictive | |
1462 commoning. */ | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1463 tree var = create_tmp_reg (type, get_lsm_tmp_name (ref, i)); |
0 | 1464 |
1465 add_referenced_var (var); | |
1466 bitmap_set_bit (tmp_vars, DECL_UID (var)); | |
1467 return var; | |
1468 } | |
1469 | |
1470 /* Creates the variables for CHAIN, as well as phi nodes for them and | |
1471 initialization on entry to LOOP. Uids of the newly created | |
1472 temporary variables are marked in TMP_VARS. */ | |
1473 | |
1474 static void | |
1475 initialize_root_vars (struct loop *loop, chain_p chain, bitmap tmp_vars) | |
1476 { | |
1477 unsigned i; | |
1478 unsigned n = chain->length; | |
1479 dref root = get_chain_root (chain); | |
1480 bool reuse_first = !chain->has_max_use_after; | |
1481 tree ref, init, var, next; | |
1482 gimple phi; | |
1483 gimple_seq stmts; | |
1484 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop); | |
1485 | |
1486 /* If N == 0, then all the references are within the single iteration. And | |
1487 since this is an nonempty chain, reuse_first cannot be true. */ | |
1488 gcc_assert (n > 0 || !reuse_first); | |
1489 | |
1490 chain->vars = VEC_alloc (tree, heap, n + 1); | |
1491 | |
1492 if (chain->type == CT_COMBINATION) | |
1493 ref = gimple_assign_lhs (root->stmt); | |
1494 else | |
1495 ref = DR_REF (root->ref); | |
1496 | |
1497 for (i = 0; i < n + (reuse_first ? 0 : 1); i++) | |
1498 { | |
1499 var = predcom_tmp_var (ref, i, tmp_vars); | |
1500 VEC_quick_push (tree, chain->vars, var); | |
1501 } | |
1502 if (reuse_first) | |
1503 VEC_quick_push (tree, chain->vars, VEC_index (tree, chain->vars, 0)); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1504 |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1505 FOR_EACH_VEC_ELT (tree, chain->vars, i, var) |
0 | 1506 VEC_replace (tree, chain->vars, i, make_ssa_name (var, NULL)); |
1507 | |
1508 for (i = 0; i < n; i++) | |
1509 { | |
1510 var = VEC_index (tree, chain->vars, i); | |
1511 next = VEC_index (tree, chain->vars, i + 1); | |
1512 init = get_init_expr (chain, i); | |
1513 | |
1514 init = force_gimple_operand (init, &stmts, true, NULL_TREE); | |
1515 if (stmts) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1516 gsi_insert_seq_on_edge_immediate (entry, stmts); |
0 | 1517 |
1518 phi = create_phi_node (var, loop->header); | |
1519 SSA_NAME_DEF_STMT (var) = phi; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1520 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1521 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION); |
0 | 1522 } |
1523 } | |
1524 | |
1525 /* Create the variables and initialization statement for root of chain | |
1526 CHAIN. Uids of the newly created temporary variables are marked | |
1527 in TMP_VARS. */ | |
1528 | |
1529 static void | |
1530 initialize_root (struct loop *loop, chain_p chain, bitmap tmp_vars) | |
1531 { | |
1532 dref root = get_chain_root (chain); | |
1533 bool in_lhs = (chain->type == CT_STORE_LOAD | |
1534 || chain->type == CT_COMBINATION); | |
1535 | |
1536 initialize_root_vars (loop, chain, tmp_vars); | |
1537 replace_ref_with (root->stmt, | |
1538 VEC_index (tree, chain->vars, chain->length), | |
1539 true, in_lhs); | |
1540 } | |
1541 | |
1542 /* Initializes a variable for load motion for ROOT and prepares phi nodes and | |
1543 initialization on entry to LOOP if necessary. The ssa name for the variable | |
1544 is stored in VARS. If WRITTEN is true, also a phi node to copy its value | |
1545 around the loop is created. Uid of the newly created temporary variable | |
1546 is marked in TMP_VARS. INITS is the list containing the (single) | |
1547 initializer. */ | |
1548 | |
1549 static void | |
1550 initialize_root_vars_lm (struct loop *loop, dref root, bool written, | |
1551 VEC(tree, heap) **vars, VEC(tree, heap) *inits, | |
1552 bitmap tmp_vars) | |
1553 { | |
1554 unsigned i; | |
1555 tree ref = DR_REF (root->ref), init, var, next; | |
1556 gimple_seq stmts; | |
1557 gimple phi; | |
1558 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop); | |
1559 | |
1560 /* Find the initializer for the variable, and check that it cannot | |
1561 trap. */ | |
1562 init = VEC_index (tree, inits, 0); | |
1563 | |
1564 *vars = VEC_alloc (tree, heap, written ? 2 : 1); | |
1565 var = predcom_tmp_var (ref, 0, tmp_vars); | |
1566 VEC_quick_push (tree, *vars, var); | |
1567 if (written) | |
1568 VEC_quick_push (tree, *vars, VEC_index (tree, *vars, 0)); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1569 |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1570 FOR_EACH_VEC_ELT (tree, *vars, i, var) |
0 | 1571 VEC_replace (tree, *vars, i, make_ssa_name (var, NULL)); |
1572 | |
1573 var = VEC_index (tree, *vars, 0); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1574 |
0 | 1575 init = force_gimple_operand (init, &stmts, written, NULL_TREE); |
1576 if (stmts) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1577 gsi_insert_seq_on_edge_immediate (entry, stmts); |
0 | 1578 |
1579 if (written) | |
1580 { | |
1581 next = VEC_index (tree, *vars, 1); | |
1582 phi = create_phi_node (var, loop->header); | |
1583 SSA_NAME_DEF_STMT (var) = phi; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1584 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1585 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION); |
0 | 1586 } |
1587 else | |
1588 { | |
1589 gimple init_stmt = gimple_build_assign (var, init); | |
1590 mark_virtual_ops_for_renaming (init_stmt); | |
1591 gsi_insert_on_edge_immediate (entry, init_stmt); | |
1592 } | |
1593 } | |
1594 | |
1595 | |
1596 /* Execute load motion for references in chain CHAIN. Uids of the newly | |
1597 created temporary variables are marked in TMP_VARS. */ | |
1598 | |
1599 static void | |
1600 execute_load_motion (struct loop *loop, chain_p chain, bitmap tmp_vars) | |
1601 { | |
1602 VEC (tree, heap) *vars; | |
1603 dref a; | |
1604 unsigned n_writes = 0, ridx, i; | |
1605 tree var; | |
1606 | |
1607 gcc_assert (chain->type == CT_INVARIANT); | |
1608 gcc_assert (!chain->combined); | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1609 FOR_EACH_VEC_ELT (dref, chain->refs, i, a) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1610 if (DR_IS_WRITE (a->ref)) |
0 | 1611 n_writes++; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1612 |
0 | 1613 /* If there are no reads in the loop, there is nothing to do. */ |
1614 if (n_writes == VEC_length (dref, chain->refs)) | |
1615 return; | |
1616 | |
1617 initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0, | |
1618 &vars, chain->inits, tmp_vars); | |
1619 | |
1620 ridx = 0; | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1621 FOR_EACH_VEC_ELT (dref, chain->refs, i, a) |
0 | 1622 { |
1623 bool is_read = DR_IS_READ (a->ref); | |
1624 mark_virtual_ops_for_renaming (a->stmt); | |
1625 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1626 if (DR_IS_WRITE (a->ref)) |
0 | 1627 { |
1628 n_writes--; | |
1629 if (n_writes) | |
1630 { | |
1631 var = VEC_index (tree, vars, 0); | |
1632 var = make_ssa_name (SSA_NAME_VAR (var), NULL); | |
1633 VEC_replace (tree, vars, 0, var); | |
1634 } | |
1635 else | |
1636 ridx = 1; | |
1637 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1638 |
0 | 1639 replace_ref_with (a->stmt, VEC_index (tree, vars, ridx), |
1640 !is_read, !is_read); | |
1641 } | |
1642 | |
1643 VEC_free (tree, heap, vars); | |
1644 } | |
1645 | |
1646 /* Returns the single statement in that NAME is used, excepting | |
1647 the looparound phi nodes contained in one of the chains. If there is no | |
1648 such statement, or more statements, NULL is returned. */ | |
1649 | |
1650 static gimple | |
1651 single_nonlooparound_use (tree name) | |
1652 { | |
1653 use_operand_p use; | |
1654 imm_use_iterator it; | |
1655 gimple stmt, ret = NULL; | |
1656 | |
1657 FOR_EACH_IMM_USE_FAST (use, it, name) | |
1658 { | |
1659 stmt = USE_STMT (use); | |
1660 | |
1661 if (gimple_code (stmt) == GIMPLE_PHI) | |
1662 { | |
1663 /* Ignore uses in looparound phi nodes. Uses in other phi nodes | |
1664 could not be processed anyway, so just fail for them. */ | |
1665 if (bitmap_bit_p (looparound_phis, | |
1666 SSA_NAME_VERSION (PHI_RESULT (stmt)))) | |
1667 continue; | |
1668 | |
1669 return NULL; | |
1670 } | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1671 else if (is_gimple_debug (stmt)) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1672 continue; |
0 | 1673 else if (ret != NULL) |
1674 return NULL; | |
1675 else | |
1676 ret = stmt; | |
1677 } | |
1678 | |
1679 return ret; | |
1680 } | |
1681 | |
1682 /* Remove statement STMT, as well as the chain of assignments in that it is | |
1683 used. */ | |
1684 | |
1685 static void | |
1686 remove_stmt (gimple stmt) | |
1687 { | |
1688 tree name; | |
1689 gimple next; | |
1690 gimple_stmt_iterator psi; | |
1691 | |
1692 if (gimple_code (stmt) == GIMPLE_PHI) | |
1693 { | |
1694 name = PHI_RESULT (stmt); | |
1695 next = single_nonlooparound_use (name); | |
1696 psi = gsi_for_stmt (stmt); | |
1697 remove_phi_node (&psi, true); | |
1698 | |
1699 if (!next | |
1700 || !gimple_assign_ssa_name_copy_p (next) | |
1701 || gimple_assign_rhs1 (next) != name) | |
1702 return; | |
1703 | |
1704 stmt = next; | |
1705 } | |
1706 | |
1707 while (1) | |
1708 { | |
1709 gimple_stmt_iterator bsi; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1710 |
0 | 1711 bsi = gsi_for_stmt (stmt); |
1712 | |
1713 name = gimple_assign_lhs (stmt); | |
1714 gcc_assert (TREE_CODE (name) == SSA_NAME); | |
1715 | |
1716 next = single_nonlooparound_use (name); | |
1717 | |
1718 mark_virtual_ops_for_renaming (stmt); | |
1719 gsi_remove (&bsi, true); | |
1720 release_defs (stmt); | |
1721 | |
1722 if (!next | |
1723 || !gimple_assign_ssa_name_copy_p (next) | |
1724 || gimple_assign_rhs1 (next) != name) | |
1725 return; | |
1726 | |
1727 stmt = next; | |
1728 } | |
1729 } | |
1730 | |
1731 /* Perform the predictive commoning optimization for a chain CHAIN. | |
1732 Uids of the newly created temporary variables are marked in TMP_VARS.*/ | |
1733 | |
1734 static void | |
1735 execute_pred_commoning_chain (struct loop *loop, chain_p chain, | |
1736 bitmap tmp_vars) | |
1737 { | |
1738 unsigned i; | |
1739 dref a, root; | |
1740 tree var; | |
1741 | |
1742 if (chain->combined) | |
1743 { | |
1744 /* For combined chains, just remove the statements that are used to | |
1745 compute the values of the expression (except for the root one). */ | |
1746 for (i = 1; VEC_iterate (dref, chain->refs, i, a); i++) | |
1747 remove_stmt (a->stmt); | |
1748 } | |
1749 else | |
1750 { | |
1751 /* For non-combined chains, set up the variables that hold its value, | |
1752 and replace the uses of the original references by these | |
1753 variables. */ | |
1754 root = get_chain_root (chain); | |
1755 mark_virtual_ops_for_renaming (root->stmt); | |
1756 | |
1757 initialize_root (loop, chain, tmp_vars); | |
1758 for (i = 1; VEC_iterate (dref, chain->refs, i, a); i++) | |
1759 { | |
1760 mark_virtual_ops_for_renaming (a->stmt); | |
1761 var = VEC_index (tree, chain->vars, chain->length - a->distance); | |
1762 replace_ref_with (a->stmt, var, false, false); | |
1763 } | |
1764 } | |
1765 } | |
1766 | |
1767 /* Determines the unroll factor necessary to remove as many temporary variable | |
1768 copies as possible. CHAINS is the list of chains that will be | |
1769 optimized. */ | |
1770 | |
1771 static unsigned | |
1772 determine_unroll_factor (VEC (chain_p, heap) *chains) | |
1773 { | |
1774 chain_p chain; | |
1775 unsigned factor = 1, af, nfactor, i; | |
1776 unsigned max = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES); | |
1777 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1778 FOR_EACH_VEC_ELT (chain_p, chains, i, chain) |
0 | 1779 { |
1780 if (chain->type == CT_INVARIANT || chain->combined) | |
1781 continue; | |
1782 | |
1783 /* The best unroll factor for this chain is equal to the number of | |
1784 temporary variables that we create for it. */ | |
1785 af = chain->length; | |
1786 if (chain->has_max_use_after) | |
1787 af++; | |
1788 | |
1789 nfactor = factor * af / gcd (factor, af); | |
1790 if (nfactor <= max) | |
1791 factor = nfactor; | |
1792 } | |
1793 | |
1794 return factor; | |
1795 } | |
1796 | |
1797 /* Perform the predictive commoning optimization for CHAINS. | |
1798 Uids of the newly created temporary variables are marked in TMP_VARS. */ | |
1799 | |
1800 static void | |
1801 execute_pred_commoning (struct loop *loop, VEC (chain_p, heap) *chains, | |
1802 bitmap tmp_vars) | |
1803 { | |
1804 chain_p chain; | |
1805 unsigned i; | |
1806 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1807 FOR_EACH_VEC_ELT (chain_p, chains, i, chain) |
0 | 1808 { |
1809 if (chain->type == CT_INVARIANT) | |
1810 execute_load_motion (loop, chain, tmp_vars); | |
1811 else | |
1812 execute_pred_commoning_chain (loop, chain, tmp_vars); | |
1813 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1814 |
0 | 1815 update_ssa (TODO_update_ssa_only_virtuals); |
1816 } | |
1817 | |
1818 /* For each reference in CHAINS, if its defining statement is | |
1819 phi node, record the ssa name that is defined by it. */ | |
1820 | |
1821 static void | |
1822 replace_phis_by_defined_names (VEC (chain_p, heap) *chains) | |
1823 { | |
1824 chain_p chain; | |
1825 dref a; | |
1826 unsigned i, j; | |
1827 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1828 FOR_EACH_VEC_ELT (chain_p, chains, i, chain) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1829 FOR_EACH_VEC_ELT (dref, chain->refs, j, a) |
0 | 1830 { |
1831 if (gimple_code (a->stmt) == GIMPLE_PHI) | |
1832 { | |
1833 a->name_defined_by_phi = PHI_RESULT (a->stmt); | |
1834 a->stmt = NULL; | |
1835 } | |
1836 } | |
1837 } | |
1838 | |
1839 /* For each reference in CHAINS, if name_defined_by_phi is not | |
1840 NULL, use it to set the stmt field. */ | |
1841 | |
1842 static void | |
1843 replace_names_by_phis (VEC (chain_p, heap) *chains) | |
1844 { | |
1845 chain_p chain; | |
1846 dref a; | |
1847 unsigned i, j; | |
1848 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1849 FOR_EACH_VEC_ELT (chain_p, chains, i, chain) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1850 FOR_EACH_VEC_ELT (dref, chain->refs, j, a) |
0 | 1851 if (a->stmt == NULL) |
1852 { | |
1853 a->stmt = SSA_NAME_DEF_STMT (a->name_defined_by_phi); | |
1854 gcc_assert (gimple_code (a->stmt) == GIMPLE_PHI); | |
1855 a->name_defined_by_phi = NULL_TREE; | |
1856 } | |
1857 } | |
1858 | |
1859 /* Wrapper over execute_pred_commoning, to pass it as a callback | |
1860 to tree_transform_and_unroll_loop. */ | |
1861 | |
1862 struct epcc_data | |
1863 { | |
1864 VEC (chain_p, heap) *chains; | |
1865 bitmap tmp_vars; | |
1866 }; | |
1867 | |
1868 static void | |
1869 execute_pred_commoning_cbck (struct loop *loop, void *data) | |
1870 { | |
1871 struct epcc_data *const dta = (struct epcc_data *) data; | |
1872 | |
1873 /* Restore phi nodes that were replaced by ssa names before | |
1874 tree_transform_and_unroll_loop (see detailed description in | |
1875 tree_predictive_commoning_loop). */ | |
1876 replace_names_by_phis (dta->chains); | |
1877 execute_pred_commoning (loop, dta->chains, dta->tmp_vars); | |
1878 } | |
1879 | |
1880 /* Base NAME and all the names in the chain of phi nodes that use it | |
1881 on variable VAR. The phi nodes are recognized by being in the copies of | |
1882 the header of the LOOP. */ | |
1883 | |
1884 static void | |
1885 base_names_in_chain_on (struct loop *loop, tree name, tree var) | |
1886 { | |
1887 gimple stmt, phi; | |
1888 imm_use_iterator iter; | |
1889 | |
1890 SSA_NAME_VAR (name) = var; | |
1891 | |
1892 while (1) | |
1893 { | |
1894 phi = NULL; | |
1895 FOR_EACH_IMM_USE_STMT (stmt, iter, name) | |
1896 { | |
1897 if (gimple_code (stmt) == GIMPLE_PHI | |
1898 && flow_bb_inside_loop_p (loop, gimple_bb (stmt))) | |
1899 { | |
1900 phi = stmt; | |
1901 BREAK_FROM_IMM_USE_STMT (iter); | |
1902 } | |
1903 } | |
1904 if (!phi) | |
1905 return; | |
1906 | |
1907 name = PHI_RESULT (phi); | |
1908 SSA_NAME_VAR (name) = var; | |
1909 } | |
1910 } | |
1911 | |
1912 /* Given an unrolled LOOP after predictive commoning, remove the | |
1913 register copies arising from phi nodes by changing the base | |
1914 variables of SSA names. TMP_VARS is the set of the temporary variables | |
1915 for those we want to perform this. */ | |
1916 | |
1917 static void | |
1918 eliminate_temp_copies (struct loop *loop, bitmap tmp_vars) | |
1919 { | |
1920 edge e; | |
1921 gimple phi, stmt; | |
1922 tree name, use, var; | |
1923 gimple_stmt_iterator psi; | |
1924 | |
1925 e = loop_latch_edge (loop); | |
1926 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi)) | |
1927 { | |
1928 phi = gsi_stmt (psi); | |
1929 name = PHI_RESULT (phi); | |
1930 var = SSA_NAME_VAR (name); | |
1931 if (!bitmap_bit_p (tmp_vars, DECL_UID (var))) | |
1932 continue; | |
1933 use = PHI_ARG_DEF_FROM_EDGE (phi, e); | |
1934 gcc_assert (TREE_CODE (use) == SSA_NAME); | |
1935 | |
1936 /* Base all the ssa names in the ud and du chain of NAME on VAR. */ | |
1937 stmt = SSA_NAME_DEF_STMT (use); | |
1938 while (gimple_code (stmt) == GIMPLE_PHI | |
1939 /* In case we could not unroll the loop enough to eliminate | |
1940 all copies, we may reach the loop header before the defining | |
1941 statement (in that case, some register copies will be present | |
1942 in loop latch in the final code, corresponding to the newly | |
1943 created looparound phi nodes). */ | |
1944 && gimple_bb (stmt) != loop->header) | |
1945 { | |
1946 gcc_assert (single_pred_p (gimple_bb (stmt))); | |
1947 use = PHI_ARG_DEF (stmt, 0); | |
1948 stmt = SSA_NAME_DEF_STMT (use); | |
1949 } | |
1950 | |
1951 base_names_in_chain_on (loop, use, var); | |
1952 } | |
1953 } | |
1954 | |
1955 /* Returns true if CHAIN is suitable to be combined. */ | |
1956 | |
1957 static bool | |
1958 chain_can_be_combined_p (chain_p chain) | |
1959 { | |
1960 return (!chain->combined | |
1961 && (chain->type == CT_LOAD || chain->type == CT_COMBINATION)); | |
1962 } | |
1963 | |
1964 /* Returns the modify statement that uses NAME. Skips over assignment | |
1965 statements, NAME is replaced with the actual name used in the returned | |
1966 statement. */ | |
1967 | |
1968 static gimple | |
1969 find_use_stmt (tree *name) | |
1970 { | |
1971 gimple stmt; | |
1972 tree rhs, lhs; | |
1973 | |
1974 /* Skip over assignments. */ | |
1975 while (1) | |
1976 { | |
1977 stmt = single_nonlooparound_use (*name); | |
1978 if (!stmt) | |
1979 return NULL; | |
1980 | |
1981 if (gimple_code (stmt) != GIMPLE_ASSIGN) | |
1982 return NULL; | |
1983 | |
1984 lhs = gimple_assign_lhs (stmt); | |
1985 if (TREE_CODE (lhs) != SSA_NAME) | |
1986 return NULL; | |
1987 | |
1988 if (gimple_assign_copy_p (stmt)) | |
1989 { | |
1990 rhs = gimple_assign_rhs1 (stmt); | |
1991 if (rhs != *name) | |
1992 return NULL; | |
1993 | |
1994 *name = lhs; | |
1995 } | |
1996 else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)) | |
1997 == GIMPLE_BINARY_RHS) | |
1998 return stmt; | |
1999 else | |
2000 return NULL; | |
2001 } | |
2002 } | |
2003 | |
2004 /* Returns true if we may perform reassociation for operation CODE in TYPE. */ | |
2005 | |
2006 static bool | |
2007 may_reassociate_p (tree type, enum tree_code code) | |
2008 { | |
2009 if (FLOAT_TYPE_P (type) | |
2010 && !flag_unsafe_math_optimizations) | |
2011 return false; | |
2012 | |
2013 return (commutative_tree_code (code) | |
2014 && associative_tree_code (code)); | |
2015 } | |
2016 | |
2017 /* If the operation used in STMT is associative and commutative, go through the | |
2018 tree of the same operations and returns its root. Distance to the root | |
2019 is stored in DISTANCE. */ | |
2020 | |
2021 static gimple | |
2022 find_associative_operation_root (gimple stmt, unsigned *distance) | |
2023 { | |
2024 tree lhs; | |
2025 gimple next; | |
2026 enum tree_code code = gimple_assign_rhs_code (stmt); | |
2027 tree type = TREE_TYPE (gimple_assign_lhs (stmt)); | |
2028 unsigned dist = 0; | |
2029 | |
2030 if (!may_reassociate_p (type, code)) | |
2031 return NULL; | |
2032 | |
2033 while (1) | |
2034 { | |
2035 lhs = gimple_assign_lhs (stmt); | |
2036 gcc_assert (TREE_CODE (lhs) == SSA_NAME); | |
2037 | |
2038 next = find_use_stmt (&lhs); | |
2039 if (!next | |
2040 || gimple_assign_rhs_code (next) != code) | |
2041 break; | |
2042 | |
2043 stmt = next; | |
2044 dist++; | |
2045 } | |
2046 | |
2047 if (distance) | |
2048 *distance = dist; | |
2049 return stmt; | |
2050 } | |
2051 | |
2052 /* Returns the common statement in that NAME1 and NAME2 have a use. If there | |
2053 is no such statement, returns NULL_TREE. In case the operation used on | |
2054 NAME1 and NAME2 is associative and commutative, returns the root of the | |
2055 tree formed by this operation instead of the statement that uses NAME1 or | |
2056 NAME2. */ | |
2057 | |
2058 static gimple | |
2059 find_common_use_stmt (tree *name1, tree *name2) | |
2060 { | |
2061 gimple stmt1, stmt2; | |
2062 | |
2063 stmt1 = find_use_stmt (name1); | |
2064 if (!stmt1) | |
2065 return NULL; | |
2066 | |
2067 stmt2 = find_use_stmt (name2); | |
2068 if (!stmt2) | |
2069 return NULL; | |
2070 | |
2071 if (stmt1 == stmt2) | |
2072 return stmt1; | |
2073 | |
2074 stmt1 = find_associative_operation_root (stmt1, NULL); | |
2075 if (!stmt1) | |
2076 return NULL; | |
2077 stmt2 = find_associative_operation_root (stmt2, NULL); | |
2078 if (!stmt2) | |
2079 return NULL; | |
2080 | |
2081 return (stmt1 == stmt2 ? stmt1 : NULL); | |
2082 } | |
2083 | |
2084 /* Checks whether R1 and R2 are combined together using CODE, with the result | |
2085 in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1 | |
2086 if it is true. If CODE is ERROR_MARK, set these values instead. */ | |
2087 | |
2088 static bool | |
2089 combinable_refs_p (dref r1, dref r2, | |
2090 enum tree_code *code, bool *swap, tree *rslt_type) | |
2091 { | |
2092 enum tree_code acode; | |
2093 bool aswap; | |
2094 tree atype; | |
2095 tree name1, name2; | |
2096 gimple stmt; | |
2097 | |
2098 name1 = name_for_ref (r1); | |
2099 name2 = name_for_ref (r2); | |
2100 gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE); | |
2101 | |
2102 stmt = find_common_use_stmt (&name1, &name2); | |
2103 | |
2104 if (!stmt) | |
2105 return false; | |
2106 | |
2107 acode = gimple_assign_rhs_code (stmt); | |
2108 aswap = (!commutative_tree_code (acode) | |
2109 && gimple_assign_rhs1 (stmt) != name1); | |
2110 atype = TREE_TYPE (gimple_assign_lhs (stmt)); | |
2111 | |
2112 if (*code == ERROR_MARK) | |
2113 { | |
2114 *code = acode; | |
2115 *swap = aswap; | |
2116 *rslt_type = atype; | |
2117 return true; | |
2118 } | |
2119 | |
2120 return (*code == acode | |
2121 && *swap == aswap | |
2122 && *rslt_type == atype); | |
2123 } | |
2124 | |
2125 /* Remove OP from the operation on rhs of STMT, and replace STMT with | |
2126 an assignment of the remaining operand. */ | |
2127 | |
2128 static void | |
2129 remove_name_from_operation (gimple stmt, tree op) | |
2130 { | |
2131 tree other_op; | |
2132 gimple_stmt_iterator si; | |
2133 | |
2134 gcc_assert (is_gimple_assign (stmt)); | |
2135 | |
2136 if (gimple_assign_rhs1 (stmt) == op) | |
2137 other_op = gimple_assign_rhs2 (stmt); | |
2138 else | |
2139 other_op = gimple_assign_rhs1 (stmt); | |
2140 | |
2141 si = gsi_for_stmt (stmt); | |
2142 gimple_assign_set_rhs_from_tree (&si, other_op); | |
2143 | |
2144 /* We should not have reallocated STMT. */ | |
2145 gcc_assert (gsi_stmt (si) == stmt); | |
2146 | |
2147 update_stmt (stmt); | |
2148 } | |
2149 | |
2150 /* Reassociates the expression in that NAME1 and NAME2 are used so that they | |
2151 are combined in a single statement, and returns this statement. */ | |
2152 | |
2153 static gimple | |
2154 reassociate_to_the_same_stmt (tree name1, tree name2) | |
2155 { | |
2156 gimple stmt1, stmt2, root1, root2, s1, s2; | |
2157 gimple new_stmt, tmp_stmt; | |
2158 tree new_name, tmp_name, var, r1, r2; | |
2159 unsigned dist1, dist2; | |
2160 enum tree_code code; | |
2161 tree type = TREE_TYPE (name1); | |
2162 gimple_stmt_iterator bsi; | |
2163 | |
2164 stmt1 = find_use_stmt (&name1); | |
2165 stmt2 = find_use_stmt (&name2); | |
2166 root1 = find_associative_operation_root (stmt1, &dist1); | |
2167 root2 = find_associative_operation_root (stmt2, &dist2); | |
2168 code = gimple_assign_rhs_code (stmt1); | |
2169 | |
2170 gcc_assert (root1 && root2 && root1 == root2 | |
2171 && code == gimple_assign_rhs_code (stmt2)); | |
2172 | |
2173 /* Find the root of the nearest expression in that both NAME1 and NAME2 | |
2174 are used. */ | |
2175 r1 = name1; | |
2176 s1 = stmt1; | |
2177 r2 = name2; | |
2178 s2 = stmt2; | |
2179 | |
2180 while (dist1 > dist2) | |
2181 { | |
2182 s1 = find_use_stmt (&r1); | |
2183 r1 = gimple_assign_lhs (s1); | |
2184 dist1--; | |
2185 } | |
2186 while (dist2 > dist1) | |
2187 { | |
2188 s2 = find_use_stmt (&r2); | |
2189 r2 = gimple_assign_lhs (s2); | |
2190 dist2--; | |
2191 } | |
2192 | |
2193 while (s1 != s2) | |
2194 { | |
2195 s1 = find_use_stmt (&r1); | |
2196 r1 = gimple_assign_lhs (s1); | |
2197 s2 = find_use_stmt (&r2); | |
2198 r2 = gimple_assign_lhs (s2); | |
2199 } | |
2200 | |
2201 /* Remove NAME1 and NAME2 from the statements in that they are used | |
2202 currently. */ | |
2203 remove_name_from_operation (stmt1, name1); | |
2204 remove_name_from_operation (stmt2, name2); | |
2205 | |
2206 /* Insert the new statement combining NAME1 and NAME2 before S1, and | |
2207 combine it with the rhs of S1. */ | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
2208 var = create_tmp_reg (type, "predreastmp"); |
0 | 2209 add_referenced_var (var); |
2210 new_name = make_ssa_name (var, NULL); | |
2211 new_stmt = gimple_build_assign_with_ops (code, new_name, name1, name2); | |
2212 | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
2213 var = create_tmp_reg (type, "predreastmp"); |
0 | 2214 add_referenced_var (var); |
2215 tmp_name = make_ssa_name (var, NULL); | |
2216 | |
2217 /* Rhs of S1 may now be either a binary expression with operation | |
2218 CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1, | |
2219 so that name1 or name2 was removed from it). */ | |
2220 tmp_stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (s1), | |
2221 tmp_name, | |
2222 gimple_assign_rhs1 (s1), | |
2223 gimple_assign_rhs2 (s1)); | |
2224 | |
2225 bsi = gsi_for_stmt (s1); | |
2226 gimple_assign_set_rhs_with_ops (&bsi, code, new_name, tmp_name); | |
2227 s1 = gsi_stmt (bsi); | |
2228 update_stmt (s1); | |
2229 | |
2230 gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT); | |
2231 gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT); | |
2232 | |
2233 return new_stmt; | |
2234 } | |
2235 | |
2236 /* Returns the statement that combines references R1 and R2. In case R1 | |
2237 and R2 are not used in the same statement, but they are used with an | |
2238 associative and commutative operation in the same expression, reassociate | |
2239 the expression so that they are used in the same statement. */ | |
2240 | |
2241 static gimple | |
2242 stmt_combining_refs (dref r1, dref r2) | |
2243 { | |
2244 gimple stmt1, stmt2; | |
2245 tree name1 = name_for_ref (r1); | |
2246 tree name2 = name_for_ref (r2); | |
2247 | |
2248 stmt1 = find_use_stmt (&name1); | |
2249 stmt2 = find_use_stmt (&name2); | |
2250 if (stmt1 == stmt2) | |
2251 return stmt1; | |
2252 | |
2253 return reassociate_to_the_same_stmt (name1, name2); | |
2254 } | |
2255 | |
2256 /* Tries to combine chains CH1 and CH2 together. If this succeeds, the | |
2257 description of the new chain is returned, otherwise we return NULL. */ | |
2258 | |
2259 static chain_p | |
2260 combine_chains (chain_p ch1, chain_p ch2) | |
2261 { | |
2262 dref r1, r2, nw; | |
2263 enum tree_code op = ERROR_MARK; | |
2264 bool swap = false; | |
2265 chain_p new_chain; | |
2266 unsigned i; | |
2267 gimple root_stmt; | |
2268 tree rslt_type = NULL_TREE; | |
2269 | |
2270 if (ch1 == ch2) | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
2271 return NULL; |
0 | 2272 if (ch1->length != ch2->length) |
2273 return NULL; | |
2274 | |
2275 if (VEC_length (dref, ch1->refs) != VEC_length (dref, ch2->refs)) | |
2276 return NULL; | |
2277 | |
2278 for (i = 0; (VEC_iterate (dref, ch1->refs, i, r1) | |
2279 && VEC_iterate (dref, ch2->refs, i, r2)); i++) | |
2280 { | |
2281 if (r1->distance != r2->distance) | |
2282 return NULL; | |
2283 | |
2284 if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type)) | |
2285 return NULL; | |
2286 } | |
2287 | |
2288 if (swap) | |
2289 { | |
2290 chain_p tmp = ch1; | |
2291 ch1 = ch2; | |
2292 ch2 = tmp; | |
2293 } | |
2294 | |
2295 new_chain = XCNEW (struct chain); | |
2296 new_chain->type = CT_COMBINATION; | |
2297 new_chain->op = op; | |
2298 new_chain->ch1 = ch1; | |
2299 new_chain->ch2 = ch2; | |
2300 new_chain->rslt_type = rslt_type; | |
2301 new_chain->length = ch1->length; | |
2302 | |
2303 for (i = 0; (VEC_iterate (dref, ch1->refs, i, r1) | |
2304 && VEC_iterate (dref, ch2->refs, i, r2)); i++) | |
2305 { | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2306 nw = XCNEW (struct dref_d); |
0 | 2307 nw->stmt = stmt_combining_refs (r1, r2); |
2308 nw->distance = r1->distance; | |
2309 | |
2310 VEC_safe_push (dref, heap, new_chain->refs, nw); | |
2311 } | |
2312 | |
2313 new_chain->has_max_use_after = false; | |
2314 root_stmt = get_chain_root (new_chain)->stmt; | |
2315 for (i = 1; VEC_iterate (dref, new_chain->refs, i, nw); i++) | |
2316 { | |
2317 if (nw->distance == new_chain->length | |
2318 && !stmt_dominates_stmt_p (nw->stmt, root_stmt)) | |
2319 { | |
2320 new_chain->has_max_use_after = true; | |
2321 break; | |
2322 } | |
2323 } | |
2324 | |
2325 ch1->combined = true; | |
2326 ch2->combined = true; | |
2327 return new_chain; | |
2328 } | |
2329 | |
2330 /* Try to combine the CHAINS. */ | |
2331 | |
2332 static void | |
2333 try_combine_chains (VEC (chain_p, heap) **chains) | |
2334 { | |
2335 unsigned i, j; | |
2336 chain_p ch1, ch2, cch; | |
2337 VEC (chain_p, heap) *worklist = NULL; | |
2338 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2339 FOR_EACH_VEC_ELT (chain_p, *chains, i, ch1) |
0 | 2340 if (chain_can_be_combined_p (ch1)) |
2341 VEC_safe_push (chain_p, heap, worklist, ch1); | |
2342 | |
2343 while (!VEC_empty (chain_p, worklist)) | |
2344 { | |
2345 ch1 = VEC_pop (chain_p, worklist); | |
2346 if (!chain_can_be_combined_p (ch1)) | |
2347 continue; | |
2348 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2349 FOR_EACH_VEC_ELT (chain_p, *chains, j, ch2) |
0 | 2350 { |
2351 if (!chain_can_be_combined_p (ch2)) | |
2352 continue; | |
2353 | |
2354 cch = combine_chains (ch1, ch2); | |
2355 if (cch) | |
2356 { | |
2357 VEC_safe_push (chain_p, heap, worklist, cch); | |
2358 VEC_safe_push (chain_p, heap, *chains, cch); | |
2359 break; | |
2360 } | |
2361 } | |
2362 } | |
2363 } | |
2364 | |
2365 /* Prepare initializers for CHAIN in LOOP. Returns false if this is | |
2366 impossible because one of these initializers may trap, true otherwise. */ | |
2367 | |
2368 static bool | |
2369 prepare_initializers_chain (struct loop *loop, chain_p chain) | |
2370 { | |
2371 unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length; | |
2372 struct data_reference *dr = get_chain_root (chain)->ref; | |
2373 tree init; | |
2374 gimple_seq stmts; | |
2375 dref laref; | |
2376 edge entry = loop_preheader_edge (loop); | |
2377 | |
2378 /* Find the initializers for the variables, and check that they cannot | |
2379 trap. */ | |
2380 chain->inits = VEC_alloc (tree, heap, n); | |
2381 for (i = 0; i < n; i++) | |
2382 VEC_quick_push (tree, chain->inits, NULL_TREE); | |
2383 | |
2384 /* If we have replaced some looparound phi nodes, use their initializers | |
2385 instead of creating our own. */ | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2386 FOR_EACH_VEC_ELT (dref, chain->refs, i, laref) |
0 | 2387 { |
2388 if (gimple_code (laref->stmt) != GIMPLE_PHI) | |
2389 continue; | |
2390 | |
2391 gcc_assert (laref->distance > 0); | |
2392 VEC_replace (tree, chain->inits, n - laref->distance, | |
2393 PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry)); | |
2394 } | |
2395 | |
2396 for (i = 0; i < n; i++) | |
2397 { | |
2398 if (VEC_index (tree, chain->inits, i) != NULL_TREE) | |
2399 continue; | |
2400 | |
2401 init = ref_at_iteration (loop, DR_REF (dr), (int) i - n); | |
2402 if (!init) | |
2403 return false; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2404 |
0 | 2405 if (!chain->all_always_accessed && tree_could_trap_p (init)) |
2406 return false; | |
2407 | |
2408 init = force_gimple_operand (init, &stmts, false, NULL_TREE); | |
2409 if (stmts) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2410 gsi_insert_seq_on_edge_immediate (entry, stmts); |
0 | 2411 |
2412 VEC_replace (tree, chain->inits, i, init); | |
2413 } | |
2414 | |
2415 return true; | |
2416 } | |
2417 | |
2418 /* Prepare initializers for CHAINS in LOOP, and free chains that cannot | |
2419 be used because the initializers might trap. */ | |
2420 | |
2421 static void | |
2422 prepare_initializers (struct loop *loop, VEC (chain_p, heap) *chains) | |
2423 { | |
2424 chain_p chain; | |
2425 unsigned i; | |
2426 | |
2427 for (i = 0; i < VEC_length (chain_p, chains); ) | |
2428 { | |
2429 chain = VEC_index (chain_p, chains, i); | |
2430 if (prepare_initializers_chain (loop, chain)) | |
2431 i++; | |
2432 else | |
2433 { | |
2434 release_chain (chain); | |
2435 VEC_unordered_remove (chain_p, chains, i); | |
2436 } | |
2437 } | |
2438 } | |
2439 | |
2440 /* Performs predictive commoning for LOOP. Returns true if LOOP was | |
2441 unrolled. */ | |
2442 | |
2443 static bool | |
2444 tree_predictive_commoning_loop (struct loop *loop) | |
2445 { | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2446 VEC (loop_p, heap) *loop_nest; |
0 | 2447 VEC (data_reference_p, heap) *datarefs; |
2448 VEC (ddr_p, heap) *dependences; | |
2449 struct component *components; | |
2450 VEC (chain_p, heap) *chains = NULL; | |
2451 unsigned unroll_factor; | |
2452 struct tree_niter_desc desc; | |
2453 bool unroll = false; | |
2454 edge exit; | |
2455 bitmap tmp_vars; | |
2456 | |
2457 if (dump_file && (dump_flags & TDF_DETAILS)) | |
2458 fprintf (dump_file, "Processing loop %d\n", loop->num); | |
2459 | |
2460 /* Find the data references and split them into components according to their | |
2461 dependence relations. */ | |
2462 datarefs = VEC_alloc (data_reference_p, heap, 10); | |
2463 dependences = VEC_alloc (ddr_p, heap, 10); | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2464 loop_nest = VEC_alloc (loop_p, heap, 3); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2465 compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs, |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2466 &dependences); |
0 | 2467 if (dump_file && (dump_flags & TDF_DETAILS)) |
2468 dump_data_dependence_relations (dump_file, dependences); | |
2469 | |
2470 components = split_data_refs_to_components (loop, datarefs, dependences); | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2471 VEC_free (loop_p, heap, loop_nest); |
0 | 2472 free_dependence_relations (dependences); |
2473 if (!components) | |
2474 { | |
2475 free_data_refs (datarefs); | |
2476 return false; | |
2477 } | |
2478 | |
2479 if (dump_file && (dump_flags & TDF_DETAILS)) | |
2480 { | |
2481 fprintf (dump_file, "Initial state:\n\n"); | |
2482 dump_components (dump_file, components); | |
2483 } | |
2484 | |
2485 /* Find the suitable components and split them into chains. */ | |
2486 components = filter_suitable_components (loop, components); | |
2487 | |
2488 tmp_vars = BITMAP_ALLOC (NULL); | |
2489 looparound_phis = BITMAP_ALLOC (NULL); | |
2490 determine_roots (loop, components, &chains); | |
2491 release_components (components); | |
2492 | |
2493 if (!chains) | |
2494 { | |
2495 if (dump_file && (dump_flags & TDF_DETAILS)) | |
2496 fprintf (dump_file, | |
2497 "Predictive commoning failed: no suitable chains\n"); | |
2498 goto end; | |
2499 } | |
2500 prepare_initializers (loop, chains); | |
2501 | |
2502 /* Try to combine the chains that are always worked with together. */ | |
2503 try_combine_chains (&chains); | |
2504 | |
2505 if (dump_file && (dump_flags & TDF_DETAILS)) | |
2506 { | |
2507 fprintf (dump_file, "Before commoning:\n\n"); | |
2508 dump_chains (dump_file, chains); | |
2509 } | |
2510 | |
2511 /* Determine the unroll factor, and if the loop should be unrolled, ensure | |
2512 that its number of iterations is divisible by the factor. */ | |
2513 unroll_factor = determine_unroll_factor (chains); | |
2514 scev_reset (); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2515 unroll = (unroll_factor > 1 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2516 && can_unroll_loop_p (loop, unroll_factor, &desc)); |
0 | 2517 exit = single_dom_exit (loop); |
2518 | |
2519 /* Execute the predictive commoning transformations, and possibly unroll the | |
2520 loop. */ | |
2521 if (unroll) | |
2522 { | |
2523 struct epcc_data dta; | |
2524 | |
2525 if (dump_file && (dump_flags & TDF_DETAILS)) | |
2526 fprintf (dump_file, "Unrolling %u times.\n", unroll_factor); | |
2527 | |
2528 dta.chains = chains; | |
2529 dta.tmp_vars = tmp_vars; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2530 |
0 | 2531 update_ssa (TODO_update_ssa_only_virtuals); |
2532 | |
2533 /* Cfg manipulations performed in tree_transform_and_unroll_loop before | |
2534 execute_pred_commoning_cbck is called may cause phi nodes to be | |
2535 reallocated, which is a problem since CHAINS may point to these | |
2536 statements. To fix this, we store the ssa names defined by the | |
2537 phi nodes here instead of the phi nodes themselves, and restore | |
2538 the phi nodes in execute_pred_commoning_cbck. A bit hacky. */ | |
2539 replace_phis_by_defined_names (chains); | |
2540 | |
2541 tree_transform_and_unroll_loop (loop, unroll_factor, exit, &desc, | |
2542 execute_pred_commoning_cbck, &dta); | |
2543 eliminate_temp_copies (loop, tmp_vars); | |
2544 } | |
2545 else | |
2546 { | |
2547 if (dump_file && (dump_flags & TDF_DETAILS)) | |
2548 fprintf (dump_file, | |
2549 "Executing predictive commoning without unrolling.\n"); | |
2550 execute_pred_commoning (loop, chains, tmp_vars); | |
2551 } | |
2552 | |
2553 end: ; | |
2554 release_chains (chains); | |
2555 free_data_refs (datarefs); | |
2556 BITMAP_FREE (tmp_vars); | |
2557 BITMAP_FREE (looparound_phis); | |
2558 | |
2559 free_affine_expand_cache (&name_expansions); | |
2560 | |
2561 return unroll; | |
2562 } | |
2563 | |
2564 /* Runs predictive commoning. */ | |
2565 | |
2566 unsigned | |
2567 tree_predictive_commoning (void) | |
2568 { | |
2569 bool unrolled = false; | |
2570 struct loop *loop; | |
2571 loop_iterator li; | |
2572 unsigned ret = 0; | |
2573 | |
2574 initialize_original_copy_tables (); | |
2575 FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST) | |
2576 if (optimize_loop_for_speed_p (loop)) | |
2577 { | |
2578 unrolled |= tree_predictive_commoning_loop (loop); | |
2579 } | |
2580 | |
2581 if (unrolled) | |
2582 { | |
2583 scev_reset (); | |
2584 ret = TODO_cleanup_cfg; | |
2585 } | |
2586 free_original_copy_tables (); | |
2587 | |
2588 return ret; | |
2589 } |