0
|
1 /* Predictive commoning.
|
|
2 Copyright (C) 2005, 2007, 2008 Free Software Foundation, Inc.
|
|
3
|
|
4 This file is part of GCC.
|
|
5
|
|
6 GCC is free software; you can redistribute it and/or modify it
|
|
7 under the terms of the GNU General Public License as published by the
|
|
8 Free Software Foundation; either version 3, or (at your option) any
|
|
9 later version.
|
|
10
|
|
11 GCC is distributed in the hope that it will be useful, but WITHOUT
|
|
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
14 for more details.
|
|
15
|
|
16 You should have received a copy of the GNU General Public License
|
|
17 along with GCC; see the file COPYING3. If not see
|
|
18 <http://www.gnu.org/licenses/>. */
|
|
19
|
|
20 /* This file implements the predictive commoning optimization. Predictive
|
|
21 commoning can be viewed as CSE around a loop, and with some improvements,
|
|
22 as generalized strength reduction-- i.e., reusing values computed in
|
|
23 earlier iterations of a loop in the later ones. So far, the pass only
|
|
24 handles the most useful case, that is, reusing values of memory references.
|
|
25 If you think this is all just a special case of PRE, you are sort of right;
|
|
26 however, concentrating on loops is simpler, and makes it possible to
|
|
27 incorporate data dependence analysis to detect the opportunities, perform
|
|
28 loop unrolling to avoid copies together with renaming immediately,
|
|
29 and if needed, we could also take register pressure into account.
|
|
30
|
|
31 Let us demonstrate what is done on an example:
|
|
32
|
|
33 for (i = 0; i < 100; i++)
|
|
34 {
|
|
35 a[i+2] = a[i] + a[i+1];
|
|
36 b[10] = b[10] + i;
|
|
37 c[i] = c[99 - i];
|
|
38 d[i] = d[i + 1];
|
|
39 }
|
|
40
|
|
41 1) We find data references in the loop, and split them to mutually
|
|
42 independent groups (i.e., we find components of a data dependence
|
|
43 graph). We ignore read-read dependences whose distance is not constant.
|
|
44 (TODO -- we could also ignore antidependences). In this example, we
|
|
45 find the following groups:
|
|
46
|
|
47 a[i]{read}, a[i+1]{read}, a[i+2]{write}
|
|
48 b[10]{read}, b[10]{write}
|
|
49 c[99 - i]{read}, c[i]{write}
|
|
50 d[i + 1]{read}, d[i]{write}
|
|
51
|
|
52 2) Inside each of the group, we verify several conditions:
|
|
53 a) all the references must differ in indices only, and the indices
|
|
54 must all have the same step
|
|
55 b) the references must dominate loop latch (and thus, they must be
|
|
56 ordered by dominance relation).
|
|
57 c) the distance of the indices must be a small multiple of the step
|
|
58 We are then able to compute the difference of the references (# of
|
|
59 iterations before they point to the same place as the first of them).
|
|
60 Also, in case there are writes in the loop, we split the groups into
|
|
61 chains whose head is the write whose values are used by the reads in
|
|
62 the same chain. The chains are then processed independently,
|
|
63 making the further transformations simpler. Also, the shorter chains
|
|
64 need the same number of registers, but may require lower unrolling
|
|
65 factor in order to get rid of the copies on the loop latch.
|
|
66
|
|
67 In our example, we get the following chains (the chain for c is invalid).
|
|
68
|
|
69 a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
|
|
70 b[10]{read,+0}, b[10]{write,+0}
|
|
71 d[i + 1]{read,+0}, d[i]{write,+1}
|
|
72
|
|
73 3) For each read, we determine the read or write whose value it reuses,
|
|
74 together with the distance of this reuse. I.e. we take the last
|
|
75 reference before it with distance 0, or the last of the references
|
|
76 with the smallest positive distance to the read. Then, we remove
|
|
77 the references that are not used in any of these chains, discard the
|
|
78 empty groups, and propagate all the links so that they point to the
|
|
79 single root reference of the chain (adjusting their distance
|
|
80 appropriately). Some extra care needs to be taken for references with
|
|
81 step 0. In our example (the numbers indicate the distance of the
|
|
82 reuse),
|
|
83
|
|
84 a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
|
|
85 b[10] --> (*) 1, b[10] (*)
|
|
86
|
|
87 4) The chains are combined together if possible. If the corresponding
|
|
88 elements of two chains are always combined together with the same
|
|
89 operator, we remember just the result of this combination, instead
|
|
90 of remembering the values separately. We may need to perform
|
|
91 reassociation to enable combining, for example
|
|
92
|
|
93 e[i] + f[i+1] + e[i+1] + f[i]
|
|
94
|
|
95 can be reassociated as
|
|
96
|
|
97 (e[i] + f[i]) + (e[i+1] + f[i+1])
|
|
98
|
|
99 and we can combine the chains for e and f into one chain.
|
|
100
|
|
101 5) For each root reference (end of the chain) R, let N be maximum distance
|
|
102 of a reference reusing its value. Variables R0 upto RN are created,
|
|
103 together with phi nodes that transfer values from R1 .. RN to
|
|
104 R0 .. R(N-1).
|
|
105 Initial values are loaded to R0..R(N-1) (in case not all references
|
|
106 must necessarily be accessed and they may trap, we may fail here;
|
|
107 TODO sometimes, the loads could be guarded by a check for the number
|
|
108 of iterations). Values loaded/stored in roots are also copied to
|
|
109 RN. Other reads are replaced with the appropriate variable Ri.
|
|
110 Everything is put to SSA form.
|
|
111
|
|
112 As a small improvement, if R0 is dead after the root (i.e., all uses of
|
|
113 the value with the maximum distance dominate the root), we can avoid
|
|
114 creating RN and use R0 instead of it.
|
|
115
|
|
116 In our example, we get (only the parts concerning a and b are shown):
|
|
117 for (i = 0; i < 100; i++)
|
|
118 {
|
|
119 f = phi (a[0], s);
|
|
120 s = phi (a[1], f);
|
|
121 x = phi (b[10], x);
|
|
122
|
|
123 f = f + s;
|
|
124 a[i+2] = f;
|
|
125 x = x + i;
|
|
126 b[10] = x;
|
|
127 }
|
|
128
|
|
129 6) Factor F for unrolling is determined as the smallest common multiple of
|
|
130 (N + 1) for each root reference (N for references for that we avoided
|
|
131 creating RN). If F and the loop is small enough, loop is unrolled F
|
|
132 times. The stores to RN (R0) in the copies of the loop body are
|
|
133 periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
|
|
134 be coalesced and the copies can be eliminated.
|
|
135
|
|
136 TODO -- copy propagation and other optimizations may change the live
|
|
137 ranges of the temporary registers and prevent them from being coalesced;
|
|
138 this may increase the register pressure.
|
|
139
|
|
140 In our case, F = 2 and the (main loop of the) result is
|
|
141
|
|
142 for (i = 0; i < ...; i += 2)
|
|
143 {
|
|
144 f = phi (a[0], f);
|
|
145 s = phi (a[1], s);
|
|
146 x = phi (b[10], x);
|
|
147
|
|
148 f = f + s;
|
|
149 a[i+2] = f;
|
|
150 x = x + i;
|
|
151 b[10] = x;
|
|
152
|
|
153 s = s + f;
|
|
154 a[i+3] = s;
|
|
155 x = x + i;
|
|
156 b[10] = x;
|
|
157 }
|
|
158
|
|
159 TODO -- stores killing other stores can be taken into account, e.g.,
|
|
160 for (i = 0; i < n; i++)
|
|
161 {
|
|
162 a[i] = 1;
|
|
163 a[i+2] = 2;
|
|
164 }
|
|
165
|
|
166 can be replaced with
|
|
167
|
|
168 t0 = a[0];
|
|
169 t1 = a[1];
|
|
170 for (i = 0; i < n; i++)
|
|
171 {
|
|
172 a[i] = 1;
|
|
173 t2 = 2;
|
|
174 t0 = t1;
|
|
175 t1 = t2;
|
|
176 }
|
|
177 a[n] = t0;
|
|
178 a[n+1] = t1;
|
|
179
|
|
180 The interesting part is that this would generalize store motion; still, since
|
|
181 sm is performed elsewhere, it does not seem that important.
|
|
182
|
|
183 Predictive commoning can be generalized for arbitrary computations (not
|
|
184 just memory loads), and also nontrivial transfer functions (e.g., replacing
|
|
185 i * i with ii_last + 2 * i + 1), to generalize strength reduction. */
|
|
186
|
|
187 #include "config.h"
|
|
188 #include "system.h"
|
|
189 #include "coretypes.h"
|
|
190 #include "tm.h"
|
|
191 #include "tree.h"
|
|
192 #include "tm_p.h"
|
|
193 #include "cfgloop.h"
|
|
194 #include "tree-flow.h"
|
|
195 #include "ggc.h"
|
|
196 #include "tree-data-ref.h"
|
|
197 #include "tree-scalar-evolution.h"
|
|
198 #include "tree-chrec.h"
|
|
199 #include "params.h"
|
|
200 #include "diagnostic.h"
|
|
201 #include "tree-pass.h"
|
|
202 #include "tree-affine.h"
|
|
203 #include "tree-inline.h"
|
|
204
|
|
205 /* The maximum number of iterations between the considered memory
|
|
206 references. */
|
|
207
|
|
208 #define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
|
|
209
|
|
210 /* Data references (or phi nodes that carry data reference values across
|
|
211 loop iterations). */
|
|
212
|
|
213 typedef struct dref
|
|
214 {
|
|
215 /* The reference itself. */
|
|
216 struct data_reference *ref;
|
|
217
|
|
218 /* The statement in that the reference appears. */
|
|
219 gimple stmt;
|
|
220
|
|
221 /* In case that STMT is a phi node, this field is set to the SSA name
|
|
222 defined by it in replace_phis_by_defined_names (in order to avoid
|
|
223 pointing to phi node that got reallocated in the meantime). */
|
|
224 tree name_defined_by_phi;
|
|
225
|
|
226 /* Distance of the reference from the root of the chain (in number of
|
|
227 iterations of the loop). */
|
|
228 unsigned distance;
|
|
229
|
|
230 /* Number of iterations offset from the first reference in the component. */
|
|
231 double_int offset;
|
|
232
|
|
233 /* Number of the reference in a component, in dominance ordering. */
|
|
234 unsigned pos;
|
|
235
|
|
236 /* True if the memory reference is always accessed when the loop is
|
|
237 entered. */
|
|
238 unsigned always_accessed : 1;
|
|
239 } *dref;
|
|
240
|
|
241 DEF_VEC_P (dref);
|
|
242 DEF_VEC_ALLOC_P (dref, heap);
|
|
243
|
|
244 /* Type of the chain of the references. */
|
|
245
|
|
246 enum chain_type
|
|
247 {
|
|
248 /* The addresses of the references in the chain are constant. */
|
|
249 CT_INVARIANT,
|
|
250
|
|
251 /* There are only loads in the chain. */
|
|
252 CT_LOAD,
|
|
253
|
|
254 /* Root of the chain is store, the rest are loads. */
|
|
255 CT_STORE_LOAD,
|
|
256
|
|
257 /* A combination of two chains. */
|
|
258 CT_COMBINATION
|
|
259 };
|
|
260
|
|
261 /* Chains of data references. */
|
|
262
|
|
263 typedef struct chain
|
|
264 {
|
|
265 /* Type of the chain. */
|
|
266 enum chain_type type;
|
|
267
|
|
268 /* For combination chains, the operator and the two chains that are
|
|
269 combined, and the type of the result. */
|
|
270 enum tree_code op;
|
|
271 tree rslt_type;
|
|
272 struct chain *ch1, *ch2;
|
|
273
|
|
274 /* The references in the chain. */
|
|
275 VEC(dref,heap) *refs;
|
|
276
|
|
277 /* The maximum distance of the reference in the chain from the root. */
|
|
278 unsigned length;
|
|
279
|
|
280 /* The variables used to copy the value throughout iterations. */
|
|
281 VEC(tree,heap) *vars;
|
|
282
|
|
283 /* Initializers for the variables. */
|
|
284 VEC(tree,heap) *inits;
|
|
285
|
|
286 /* True if there is a use of a variable with the maximal distance
|
|
287 that comes after the root in the loop. */
|
|
288 unsigned has_max_use_after : 1;
|
|
289
|
|
290 /* True if all the memory references in the chain are always accessed. */
|
|
291 unsigned all_always_accessed : 1;
|
|
292
|
|
293 /* True if this chain was combined together with some other chain. */
|
|
294 unsigned combined : 1;
|
|
295 } *chain_p;
|
|
296
|
|
297 DEF_VEC_P (chain_p);
|
|
298 DEF_VEC_ALLOC_P (chain_p, heap);
|
|
299
|
|
300 /* Describes the knowledge about the step of the memory references in
|
|
301 the component. */
|
|
302
|
|
303 enum ref_step_type
|
|
304 {
|
|
305 /* The step is zero. */
|
|
306 RS_INVARIANT,
|
|
307
|
|
308 /* The step is nonzero. */
|
|
309 RS_NONZERO,
|
|
310
|
|
311 /* The step may or may not be nonzero. */
|
|
312 RS_ANY
|
|
313 };
|
|
314
|
|
315 /* Components of the data dependence graph. */
|
|
316
|
|
317 struct component
|
|
318 {
|
|
319 /* The references in the component. */
|
|
320 VEC(dref,heap) *refs;
|
|
321
|
|
322 /* What we know about the step of the references in the component. */
|
|
323 enum ref_step_type comp_step;
|
|
324
|
|
325 /* Next component in the list. */
|
|
326 struct component *next;
|
|
327 };
|
|
328
|
|
329 /* Bitmap of ssa names defined by looparound phi nodes covered by chains. */
|
|
330
|
|
331 static bitmap looparound_phis;
|
|
332
|
|
333 /* Cache used by tree_to_aff_combination_expand. */
|
|
334
|
|
335 static struct pointer_map_t *name_expansions;
|
|
336
|
|
337 /* Dumps data reference REF to FILE. */
|
|
338
|
|
339 extern void dump_dref (FILE *, dref);
|
|
340 void
|
|
341 dump_dref (FILE *file, dref ref)
|
|
342 {
|
|
343 if (ref->ref)
|
|
344 {
|
|
345 fprintf (file, " ");
|
|
346 print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM);
|
|
347 fprintf (file, " (id %u%s)\n", ref->pos,
|
|
348 DR_IS_READ (ref->ref) ? "" : ", write");
|
|
349
|
|
350 fprintf (file, " offset ");
|
|
351 dump_double_int (file, ref->offset, false);
|
|
352 fprintf (file, "\n");
|
|
353
|
|
354 fprintf (file, " distance %u\n", ref->distance);
|
|
355 }
|
|
356 else
|
|
357 {
|
|
358 if (gimple_code (ref->stmt) == GIMPLE_PHI)
|
|
359 fprintf (file, " looparound ref\n");
|
|
360 else
|
|
361 fprintf (file, " combination ref\n");
|
|
362 fprintf (file, " in statement ");
|
|
363 print_gimple_stmt (file, ref->stmt, 0, TDF_SLIM);
|
|
364 fprintf (file, "\n");
|
|
365 fprintf (file, " distance %u\n", ref->distance);
|
|
366 }
|
|
367
|
|
368 }
|
|
369
|
|
370 /* Dumps CHAIN to FILE. */
|
|
371
|
|
372 extern void dump_chain (FILE *, chain_p);
|
|
373 void
|
|
374 dump_chain (FILE *file, chain_p chain)
|
|
375 {
|
|
376 dref a;
|
|
377 const char *chain_type;
|
|
378 unsigned i;
|
|
379 tree var;
|
|
380
|
|
381 switch (chain->type)
|
|
382 {
|
|
383 case CT_INVARIANT:
|
|
384 chain_type = "Load motion";
|
|
385 break;
|
|
386
|
|
387 case CT_LOAD:
|
|
388 chain_type = "Loads-only";
|
|
389 break;
|
|
390
|
|
391 case CT_STORE_LOAD:
|
|
392 chain_type = "Store-loads";
|
|
393 break;
|
|
394
|
|
395 case CT_COMBINATION:
|
|
396 chain_type = "Combination";
|
|
397 break;
|
|
398
|
|
399 default:
|
|
400 gcc_unreachable ();
|
|
401 }
|
|
402
|
|
403 fprintf (file, "%s chain %p%s\n", chain_type, (void *) chain,
|
|
404 chain->combined ? " (combined)" : "");
|
|
405 if (chain->type != CT_INVARIANT)
|
|
406 fprintf (file, " max distance %u%s\n", chain->length,
|
|
407 chain->has_max_use_after ? "" : ", may reuse first");
|
|
408
|
|
409 if (chain->type == CT_COMBINATION)
|
|
410 {
|
|
411 fprintf (file, " equal to %p %s %p in type ",
|
|
412 (void *) chain->ch1, op_symbol_code (chain->op),
|
|
413 (void *) chain->ch2);
|
|
414 print_generic_expr (file, chain->rslt_type, TDF_SLIM);
|
|
415 fprintf (file, "\n");
|
|
416 }
|
|
417
|
|
418 if (chain->vars)
|
|
419 {
|
|
420 fprintf (file, " vars");
|
|
421 for (i = 0; VEC_iterate (tree, chain->vars, i, var); i++)
|
|
422 {
|
|
423 fprintf (file, " ");
|
|
424 print_generic_expr (file, var, TDF_SLIM);
|
|
425 }
|
|
426 fprintf (file, "\n");
|
|
427 }
|
|
428
|
|
429 if (chain->inits)
|
|
430 {
|
|
431 fprintf (file, " inits");
|
|
432 for (i = 0; VEC_iterate (tree, chain->inits, i, var); i++)
|
|
433 {
|
|
434 fprintf (file, " ");
|
|
435 print_generic_expr (file, var, TDF_SLIM);
|
|
436 }
|
|
437 fprintf (file, "\n");
|
|
438 }
|
|
439
|
|
440 fprintf (file, " references:\n");
|
|
441 for (i = 0; VEC_iterate (dref, chain->refs, i, a); i++)
|
|
442 dump_dref (file, a);
|
|
443
|
|
444 fprintf (file, "\n");
|
|
445 }
|
|
446
|
|
447 /* Dumps CHAINS to FILE. */
|
|
448
|
|
449 extern void dump_chains (FILE *, VEC (chain_p, heap) *);
|
|
450 void
|
|
451 dump_chains (FILE *file, VEC (chain_p, heap) *chains)
|
|
452 {
|
|
453 chain_p chain;
|
|
454 unsigned i;
|
|
455
|
|
456 for (i = 0; VEC_iterate (chain_p, chains, i, chain); i++)
|
|
457 dump_chain (file, chain);
|
|
458 }
|
|
459
|
|
460 /* Dumps COMP to FILE. */
|
|
461
|
|
462 extern void dump_component (FILE *, struct component *);
|
|
463 void
|
|
464 dump_component (FILE *file, struct component *comp)
|
|
465 {
|
|
466 dref a;
|
|
467 unsigned i;
|
|
468
|
|
469 fprintf (file, "Component%s:\n",
|
|
470 comp->comp_step == RS_INVARIANT ? " (invariant)" : "");
|
|
471 for (i = 0; VEC_iterate (dref, comp->refs, i, a); i++)
|
|
472 dump_dref (file, a);
|
|
473 fprintf (file, "\n");
|
|
474 }
|
|
475
|
|
476 /* Dumps COMPS to FILE. */
|
|
477
|
|
478 extern void dump_components (FILE *, struct component *);
|
|
479 void
|
|
480 dump_components (FILE *file, struct component *comps)
|
|
481 {
|
|
482 struct component *comp;
|
|
483
|
|
484 for (comp = comps; comp; comp = comp->next)
|
|
485 dump_component (file, comp);
|
|
486 }
|
|
487
|
|
488 /* Frees a chain CHAIN. */
|
|
489
|
|
490 static void
|
|
491 release_chain (chain_p chain)
|
|
492 {
|
|
493 dref ref;
|
|
494 unsigned i;
|
|
495
|
|
496 if (chain == NULL)
|
|
497 return;
|
|
498
|
|
499 for (i = 0; VEC_iterate (dref, chain->refs, i, ref); i++)
|
|
500 free (ref);
|
|
501
|
|
502 VEC_free (dref, heap, chain->refs);
|
|
503 VEC_free (tree, heap, chain->vars);
|
|
504 VEC_free (tree, heap, chain->inits);
|
|
505
|
|
506 free (chain);
|
|
507 }
|
|
508
|
|
509 /* Frees CHAINS. */
|
|
510
|
|
511 static void
|
|
512 release_chains (VEC (chain_p, heap) *chains)
|
|
513 {
|
|
514 unsigned i;
|
|
515 chain_p chain;
|
|
516
|
|
517 for (i = 0; VEC_iterate (chain_p, chains, i, chain); i++)
|
|
518 release_chain (chain);
|
|
519 VEC_free (chain_p, heap, chains);
|
|
520 }
|
|
521
|
|
522 /* Frees a component COMP. */
|
|
523
|
|
524 static void
|
|
525 release_component (struct component *comp)
|
|
526 {
|
|
527 VEC_free (dref, heap, comp->refs);
|
|
528 free (comp);
|
|
529 }
|
|
530
|
|
531 /* Frees list of components COMPS. */
|
|
532
|
|
533 static void
|
|
534 release_components (struct component *comps)
|
|
535 {
|
|
536 struct component *act, *next;
|
|
537
|
|
538 for (act = comps; act; act = next)
|
|
539 {
|
|
540 next = act->next;
|
|
541 release_component (act);
|
|
542 }
|
|
543 }
|
|
544
|
|
545 /* Finds a root of tree given by FATHERS containing A, and performs path
|
|
546 shortening. */
|
|
547
|
|
548 static unsigned
|
|
549 component_of (unsigned fathers[], unsigned a)
|
|
550 {
|
|
551 unsigned root, n;
|
|
552
|
|
553 for (root = a; root != fathers[root]; root = fathers[root])
|
|
554 continue;
|
|
555
|
|
556 for (; a != root; a = n)
|
|
557 {
|
|
558 n = fathers[a];
|
|
559 fathers[a] = root;
|
|
560 }
|
|
561
|
|
562 return root;
|
|
563 }
|
|
564
|
|
565 /* Join operation for DFU. FATHERS gives the tree, SIZES are sizes of the
|
|
566 components, A and B are components to merge. */
|
|
567
|
|
568 static void
|
|
569 merge_comps (unsigned fathers[], unsigned sizes[], unsigned a, unsigned b)
|
|
570 {
|
|
571 unsigned ca = component_of (fathers, a);
|
|
572 unsigned cb = component_of (fathers, b);
|
|
573
|
|
574 if (ca == cb)
|
|
575 return;
|
|
576
|
|
577 if (sizes[ca] < sizes[cb])
|
|
578 {
|
|
579 sizes[cb] += sizes[ca];
|
|
580 fathers[ca] = cb;
|
|
581 }
|
|
582 else
|
|
583 {
|
|
584 sizes[ca] += sizes[cb];
|
|
585 fathers[cb] = ca;
|
|
586 }
|
|
587 }
|
|
588
|
|
589 /* Returns true if A is a reference that is suitable for predictive commoning
|
|
590 in the innermost loop that contains it. REF_STEP is set according to the
|
|
591 step of the reference A. */
|
|
592
|
|
593 static bool
|
|
594 suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step)
|
|
595 {
|
|
596 tree ref = DR_REF (a), step = DR_STEP (a);
|
|
597
|
|
598 if (!step
|
|
599 || !is_gimple_reg_type (TREE_TYPE (ref))
|
|
600 || tree_could_throw_p (ref))
|
|
601 return false;
|
|
602
|
|
603 if (integer_zerop (step))
|
|
604 *ref_step = RS_INVARIANT;
|
|
605 else if (integer_nonzerop (step))
|
|
606 *ref_step = RS_NONZERO;
|
|
607 else
|
|
608 *ref_step = RS_ANY;
|
|
609
|
|
610 return true;
|
|
611 }
|
|
612
|
|
613 /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET. */
|
|
614
|
|
615 static void
|
|
616 aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset)
|
|
617 {
|
|
618 aff_tree delta;
|
|
619
|
|
620 tree_to_aff_combination_expand (DR_OFFSET (dr), sizetype, offset,
|
|
621 &name_expansions);
|
|
622 aff_combination_const (&delta, sizetype, tree_to_double_int (DR_INIT (dr)));
|
|
623 aff_combination_add (offset, &delta);
|
|
624 }
|
|
625
|
|
626 /* Determines number of iterations of the innermost enclosing loop before B
|
|
627 refers to exactly the same location as A and stores it to OFF. If A and
|
|
628 B do not have the same step, they never meet, or anything else fails,
|
|
629 returns false, otherwise returns true. Both A and B are assumed to
|
|
630 satisfy suitable_reference_p. */
|
|
631
|
|
632 static bool
|
|
633 determine_offset (struct data_reference *a, struct data_reference *b,
|
|
634 double_int *off)
|
|
635 {
|
|
636 aff_tree diff, baseb, step;
|
|
637 tree typea, typeb;
|
|
638
|
|
639 /* Check that both the references access the location in the same type. */
|
|
640 typea = TREE_TYPE (DR_REF (a));
|
|
641 typeb = TREE_TYPE (DR_REF (b));
|
|
642 if (!useless_type_conversion_p (typeb, typea))
|
|
643 return false;
|
|
644
|
|
645 /* Check whether the base address and the step of both references is the
|
|
646 same. */
|
|
647 if (!operand_equal_p (DR_STEP (a), DR_STEP (b), 0)
|
|
648 || !operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b), 0))
|
|
649 return false;
|
|
650
|
|
651 if (integer_zerop (DR_STEP (a)))
|
|
652 {
|
|
653 /* If the references have loop invariant address, check that they access
|
|
654 exactly the same location. */
|
|
655 *off = double_int_zero;
|
|
656 return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0)
|
|
657 && operand_equal_p (DR_INIT (a), DR_INIT (b), 0));
|
|
658 }
|
|
659
|
|
660 /* Compare the offsets of the addresses, and check whether the difference
|
|
661 is a multiple of step. */
|
|
662 aff_combination_dr_offset (a, &diff);
|
|
663 aff_combination_dr_offset (b, &baseb);
|
|
664 aff_combination_scale (&baseb, double_int_minus_one);
|
|
665 aff_combination_add (&diff, &baseb);
|
|
666
|
|
667 tree_to_aff_combination_expand (DR_STEP (a), sizetype,
|
|
668 &step, &name_expansions);
|
|
669 return aff_combination_constant_multiple_p (&diff, &step, off);
|
|
670 }
|
|
671
|
|
672 /* Returns the last basic block in LOOP for that we are sure that
|
|
673 it is executed whenever the loop is entered. */
|
|
674
|
|
675 static basic_block
|
|
676 last_always_executed_block (struct loop *loop)
|
|
677 {
|
|
678 unsigned i;
|
|
679 VEC (edge, heap) *exits = get_loop_exit_edges (loop);
|
|
680 edge ex;
|
|
681 basic_block last = loop->latch;
|
|
682
|
|
683 for (i = 0; VEC_iterate (edge, exits, i, ex); i++)
|
|
684 last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src);
|
|
685 VEC_free (edge, heap, exits);
|
|
686
|
|
687 return last;
|
|
688 }
|
|
689
|
|
690 /* Splits dependence graph on DATAREFS described by DEPENDS to components. */
|
|
691
|
|
692 static struct component *
|
|
693 split_data_refs_to_components (struct loop *loop,
|
|
694 VEC (data_reference_p, heap) *datarefs,
|
|
695 VEC (ddr_p, heap) *depends)
|
|
696 {
|
|
697 unsigned i, n = VEC_length (data_reference_p, datarefs);
|
|
698 unsigned ca, ia, ib, bad;
|
|
699 unsigned *comp_father = XNEWVEC (unsigned, n + 1);
|
|
700 unsigned *comp_size = XNEWVEC (unsigned, n + 1);
|
|
701 struct component **comps;
|
|
702 struct data_reference *dr, *dra, *drb;
|
|
703 struct data_dependence_relation *ddr;
|
|
704 struct component *comp_list = NULL, *comp;
|
|
705 dref dataref;
|
|
706 basic_block last_always_executed = last_always_executed_block (loop);
|
|
707
|
|
708 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
|
|
709 {
|
|
710 if (!DR_REF (dr))
|
|
711 {
|
|
712 /* A fake reference for call or asm_expr that may clobber memory;
|
|
713 just fail. */
|
|
714 goto end;
|
|
715 }
|
|
716 dr->aux = (void *) (size_t) i;
|
|
717 comp_father[i] = i;
|
|
718 comp_size[i] = 1;
|
|
719 }
|
|
720
|
|
721 /* A component reserved for the "bad" data references. */
|
|
722 comp_father[n] = n;
|
|
723 comp_size[n] = 1;
|
|
724
|
|
725 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
|
|
726 {
|
|
727 enum ref_step_type dummy;
|
|
728
|
|
729 if (!suitable_reference_p (dr, &dummy))
|
|
730 {
|
|
731 ia = (unsigned) (size_t) dr->aux;
|
|
732 merge_comps (comp_father, comp_size, n, ia);
|
|
733 }
|
|
734 }
|
|
735
|
|
736 for (i = 0; VEC_iterate (ddr_p, depends, i, ddr); i++)
|
|
737 {
|
|
738 double_int dummy_off;
|
|
739
|
|
740 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
|
|
741 continue;
|
|
742
|
|
743 dra = DDR_A (ddr);
|
|
744 drb = DDR_B (ddr);
|
|
745 ia = component_of (comp_father, (unsigned) (size_t) dra->aux);
|
|
746 ib = component_of (comp_father, (unsigned) (size_t) drb->aux);
|
|
747 if (ia == ib)
|
|
748 continue;
|
|
749
|
|
750 bad = component_of (comp_father, n);
|
|
751
|
|
752 /* If both A and B are reads, we may ignore unsuitable dependences. */
|
|
753 if (DR_IS_READ (dra) && DR_IS_READ (drb)
|
|
754 && (ia == bad || ib == bad
|
|
755 || !determine_offset (dra, drb, &dummy_off)))
|
|
756 continue;
|
|
757
|
|
758 merge_comps (comp_father, comp_size, ia, ib);
|
|
759 }
|
|
760
|
|
761 comps = XCNEWVEC (struct component *, n);
|
|
762 bad = component_of (comp_father, n);
|
|
763 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
|
|
764 {
|
|
765 ia = (unsigned) (size_t) dr->aux;
|
|
766 ca = component_of (comp_father, ia);
|
|
767 if (ca == bad)
|
|
768 continue;
|
|
769
|
|
770 comp = comps[ca];
|
|
771 if (!comp)
|
|
772 {
|
|
773 comp = XCNEW (struct component);
|
|
774 comp->refs = VEC_alloc (dref, heap, comp_size[ca]);
|
|
775 comps[ca] = comp;
|
|
776 }
|
|
777
|
|
778 dataref = XCNEW (struct dref);
|
|
779 dataref->ref = dr;
|
|
780 dataref->stmt = DR_STMT (dr);
|
|
781 dataref->offset = double_int_zero;
|
|
782 dataref->distance = 0;
|
|
783
|
|
784 dataref->always_accessed
|
|
785 = dominated_by_p (CDI_DOMINATORS, last_always_executed,
|
|
786 gimple_bb (dataref->stmt));
|
|
787 dataref->pos = VEC_length (dref, comp->refs);
|
|
788 VEC_quick_push (dref, comp->refs, dataref);
|
|
789 }
|
|
790
|
|
791 for (i = 0; i < n; i++)
|
|
792 {
|
|
793 comp = comps[i];
|
|
794 if (comp)
|
|
795 {
|
|
796 comp->next = comp_list;
|
|
797 comp_list = comp;
|
|
798 }
|
|
799 }
|
|
800 free (comps);
|
|
801
|
|
802 end:
|
|
803 free (comp_father);
|
|
804 free (comp_size);
|
|
805 return comp_list;
|
|
806 }
|
|
807
|
|
808 /* Returns true if the component COMP satisfies the conditions
|
|
809 described in 2) at the beginning of this file. LOOP is the current
|
|
810 loop. */
|
|
811
|
|
812 static bool
|
|
813 suitable_component_p (struct loop *loop, struct component *comp)
|
|
814 {
|
|
815 unsigned i;
|
|
816 dref a, first;
|
|
817 basic_block ba, bp = loop->header;
|
|
818 bool ok, has_write = false;
|
|
819
|
|
820 for (i = 0; VEC_iterate (dref, comp->refs, i, a); i++)
|
|
821 {
|
|
822 ba = gimple_bb (a->stmt);
|
|
823
|
|
824 if (!just_once_each_iteration_p (loop, ba))
|
|
825 return false;
|
|
826
|
|
827 gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp));
|
|
828 bp = ba;
|
|
829
|
|
830 if (!DR_IS_READ (a->ref))
|
|
831 has_write = true;
|
|
832 }
|
|
833
|
|
834 first = VEC_index (dref, comp->refs, 0);
|
|
835 ok = suitable_reference_p (first->ref, &comp->comp_step);
|
|
836 gcc_assert (ok);
|
|
837 first->offset = double_int_zero;
|
|
838
|
|
839 for (i = 1; VEC_iterate (dref, comp->refs, i, a); i++)
|
|
840 {
|
|
841 if (!determine_offset (first->ref, a->ref, &a->offset))
|
|
842 return false;
|
|
843
|
|
844 #ifdef ENABLE_CHECKING
|
|
845 {
|
|
846 enum ref_step_type a_step;
|
|
847 ok = suitable_reference_p (a->ref, &a_step);
|
|
848 gcc_assert (ok && a_step == comp->comp_step);
|
|
849 }
|
|
850 #endif
|
|
851 }
|
|
852
|
|
853 /* If there is a write inside the component, we must know whether the
|
|
854 step is nonzero or not -- we would not otherwise be able to recognize
|
|
855 whether the value accessed by reads comes from the OFFSET-th iteration
|
|
856 or the previous one. */
|
|
857 if (has_write && comp->comp_step == RS_ANY)
|
|
858 return false;
|
|
859
|
|
860 return true;
|
|
861 }
|
|
862
|
|
863 /* Check the conditions on references inside each of components COMPS,
|
|
864 and remove the unsuitable components from the list. The new list
|
|
865 of components is returned. The conditions are described in 2) at
|
|
866 the beginning of this file. LOOP is the current loop. */
|
|
867
|
|
868 static struct component *
|
|
869 filter_suitable_components (struct loop *loop, struct component *comps)
|
|
870 {
|
|
871 struct component **comp, *act;
|
|
872
|
|
873 for (comp = &comps; *comp; )
|
|
874 {
|
|
875 act = *comp;
|
|
876 if (suitable_component_p (loop, act))
|
|
877 comp = &act->next;
|
|
878 else
|
|
879 {
|
|
880 dref ref;
|
|
881 unsigned i;
|
|
882
|
|
883 *comp = act->next;
|
|
884 for (i = 0; VEC_iterate (dref, act->refs, i, ref); i++)
|
|
885 free (ref);
|
|
886 release_component (act);
|
|
887 }
|
|
888 }
|
|
889
|
|
890 return comps;
|
|
891 }
|
|
892
|
|
893 /* Compares two drefs A and B by their offset and position. Callback for
|
|
894 qsort. */
|
|
895
|
|
896 static int
|
|
897 order_drefs (const void *a, const void *b)
|
|
898 {
|
|
899 const dref *const da = (const dref *) a;
|
|
900 const dref *const db = (const dref *) b;
|
|
901 int offcmp = double_int_scmp ((*da)->offset, (*db)->offset);
|
|
902
|
|
903 if (offcmp != 0)
|
|
904 return offcmp;
|
|
905
|
|
906 return (*da)->pos - (*db)->pos;
|
|
907 }
|
|
908
|
|
909 /* Returns root of the CHAIN. */
|
|
910
|
|
911 static inline dref
|
|
912 get_chain_root (chain_p chain)
|
|
913 {
|
|
914 return VEC_index (dref, chain->refs, 0);
|
|
915 }
|
|
916
|
|
917 /* Adds REF to the chain CHAIN. */
|
|
918
|
|
919 static void
|
|
920 add_ref_to_chain (chain_p chain, dref ref)
|
|
921 {
|
|
922 dref root = get_chain_root (chain);
|
|
923 double_int dist;
|
|
924
|
|
925 gcc_assert (double_int_scmp (root->offset, ref->offset) <= 0);
|
|
926 dist = double_int_add (ref->offset, double_int_neg (root->offset));
|
|
927 if (double_int_ucmp (uhwi_to_double_int (MAX_DISTANCE), dist) <= 0)
|
|
928 {
|
|
929 free (ref);
|
|
930 return;
|
|
931 }
|
|
932 gcc_assert (double_int_fits_in_uhwi_p (dist));
|
|
933
|
|
934 VEC_safe_push (dref, heap, chain->refs, ref);
|
|
935
|
|
936 ref->distance = double_int_to_uhwi (dist);
|
|
937
|
|
938 if (ref->distance >= chain->length)
|
|
939 {
|
|
940 chain->length = ref->distance;
|
|
941 chain->has_max_use_after = false;
|
|
942 }
|
|
943
|
|
944 if (ref->distance == chain->length
|
|
945 && ref->pos > root->pos)
|
|
946 chain->has_max_use_after = true;
|
|
947
|
|
948 chain->all_always_accessed &= ref->always_accessed;
|
|
949 }
|
|
950
|
|
951 /* Returns the chain for invariant component COMP. */
|
|
952
|
|
953 static chain_p
|
|
954 make_invariant_chain (struct component *comp)
|
|
955 {
|
|
956 chain_p chain = XCNEW (struct chain);
|
|
957 unsigned i;
|
|
958 dref ref;
|
|
959
|
|
960 chain->type = CT_INVARIANT;
|
|
961
|
|
962 chain->all_always_accessed = true;
|
|
963
|
|
964 for (i = 0; VEC_iterate (dref, comp->refs, i, ref); i++)
|
|
965 {
|
|
966 VEC_safe_push (dref, heap, chain->refs, ref);
|
|
967 chain->all_always_accessed &= ref->always_accessed;
|
|
968 }
|
|
969
|
|
970 return chain;
|
|
971 }
|
|
972
|
|
973 /* Make a new chain rooted at REF. */
|
|
974
|
|
975 static chain_p
|
|
976 make_rooted_chain (dref ref)
|
|
977 {
|
|
978 chain_p chain = XCNEW (struct chain);
|
|
979
|
|
980 chain->type = DR_IS_READ (ref->ref) ? CT_LOAD : CT_STORE_LOAD;
|
|
981
|
|
982 VEC_safe_push (dref, heap, chain->refs, ref);
|
|
983 chain->all_always_accessed = ref->always_accessed;
|
|
984
|
|
985 ref->distance = 0;
|
|
986
|
|
987 return chain;
|
|
988 }
|
|
989
|
|
990 /* Returns true if CHAIN is not trivial. */
|
|
991
|
|
992 static bool
|
|
993 nontrivial_chain_p (chain_p chain)
|
|
994 {
|
|
995 return chain != NULL && VEC_length (dref, chain->refs) > 1;
|
|
996 }
|
|
997
|
|
998 /* Returns the ssa name that contains the value of REF, or NULL_TREE if there
|
|
999 is no such name. */
|
|
1000
|
|
1001 static tree
|
|
1002 name_for_ref (dref ref)
|
|
1003 {
|
|
1004 tree name;
|
|
1005
|
|
1006 if (is_gimple_assign (ref->stmt))
|
|
1007 {
|
|
1008 if (!ref->ref || DR_IS_READ (ref->ref))
|
|
1009 name = gimple_assign_lhs (ref->stmt);
|
|
1010 else
|
|
1011 name = gimple_assign_rhs1 (ref->stmt);
|
|
1012 }
|
|
1013 else
|
|
1014 name = PHI_RESULT (ref->stmt);
|
|
1015
|
|
1016 return (TREE_CODE (name) == SSA_NAME ? name : NULL_TREE);
|
|
1017 }
|
|
1018
|
|
1019 /* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
|
|
1020 iterations of the innermost enclosing loop). */
|
|
1021
|
|
1022 static bool
|
|
1023 valid_initializer_p (struct data_reference *ref,
|
|
1024 unsigned distance, struct data_reference *root)
|
|
1025 {
|
|
1026 aff_tree diff, base, step;
|
|
1027 double_int off;
|
|
1028
|
|
1029 /* Both REF and ROOT must be accessing the same object. */
|
|
1030 if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0))
|
|
1031 return false;
|
|
1032
|
|
1033 /* The initializer is defined outside of loop, hence its address must be
|
|
1034 invariant inside the loop. */
|
|
1035 gcc_assert (integer_zerop (DR_STEP (ref)));
|
|
1036
|
|
1037 /* If the address of the reference is invariant, initializer must access
|
|
1038 exactly the same location. */
|
|
1039 if (integer_zerop (DR_STEP (root)))
|
|
1040 return (operand_equal_p (DR_OFFSET (ref), DR_OFFSET (root), 0)
|
|
1041 && operand_equal_p (DR_INIT (ref), DR_INIT (root), 0));
|
|
1042
|
|
1043 /* Verify that this index of REF is equal to the root's index at
|
|
1044 -DISTANCE-th iteration. */
|
|
1045 aff_combination_dr_offset (root, &diff);
|
|
1046 aff_combination_dr_offset (ref, &base);
|
|
1047 aff_combination_scale (&base, double_int_minus_one);
|
|
1048 aff_combination_add (&diff, &base);
|
|
1049
|
|
1050 tree_to_aff_combination_expand (DR_STEP (root), sizetype, &step,
|
|
1051 &name_expansions);
|
|
1052 if (!aff_combination_constant_multiple_p (&diff, &step, &off))
|
|
1053 return false;
|
|
1054
|
|
1055 if (!double_int_equal_p (off, uhwi_to_double_int (distance)))
|
|
1056 return false;
|
|
1057
|
|
1058 return true;
|
|
1059 }
|
|
1060
|
|
1061 /* Finds looparound phi node of LOOP that copies the value of REF, and if its
|
|
1062 initial value is correct (equal to initial value of REF shifted by one
|
|
1063 iteration), returns the phi node. Otherwise, NULL_TREE is returned. ROOT
|
|
1064 is the root of the current chain. */
|
|
1065
|
|
1066 static gimple
|
|
1067 find_looparound_phi (struct loop *loop, dref ref, dref root)
|
|
1068 {
|
|
1069 tree name, init, init_ref;
|
|
1070 gimple phi = NULL, init_stmt;
|
|
1071 edge latch = loop_latch_edge (loop);
|
|
1072 struct data_reference init_dr;
|
|
1073 gimple_stmt_iterator psi;
|
|
1074
|
|
1075 if (is_gimple_assign (ref->stmt))
|
|
1076 {
|
|
1077 if (DR_IS_READ (ref->ref))
|
|
1078 name = gimple_assign_lhs (ref->stmt);
|
|
1079 else
|
|
1080 name = gimple_assign_rhs1 (ref->stmt);
|
|
1081 }
|
|
1082 else
|
|
1083 name = PHI_RESULT (ref->stmt);
|
|
1084 if (!name)
|
|
1085 return NULL;
|
|
1086
|
|
1087 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
|
|
1088 {
|
|
1089 phi = gsi_stmt (psi);
|
|
1090 if (PHI_ARG_DEF_FROM_EDGE (phi, latch) == name)
|
|
1091 break;
|
|
1092 }
|
|
1093
|
|
1094 if (gsi_end_p (psi))
|
|
1095 return NULL;
|
|
1096
|
|
1097 init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
|
|
1098 if (TREE_CODE (init) != SSA_NAME)
|
|
1099 return NULL;
|
|
1100 init_stmt = SSA_NAME_DEF_STMT (init);
|
|
1101 if (gimple_code (init_stmt) != GIMPLE_ASSIGN)
|
|
1102 return NULL;
|
|
1103 gcc_assert (gimple_assign_lhs (init_stmt) == init);
|
|
1104
|
|
1105 init_ref = gimple_assign_rhs1 (init_stmt);
|
|
1106 if (!REFERENCE_CLASS_P (init_ref)
|
|
1107 && !DECL_P (init_ref))
|
|
1108 return NULL;
|
|
1109
|
|
1110 /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
|
|
1111 loop enclosing PHI). */
|
|
1112 memset (&init_dr, 0, sizeof (struct data_reference));
|
|
1113 DR_REF (&init_dr) = init_ref;
|
|
1114 DR_STMT (&init_dr) = phi;
|
|
1115 if (!dr_analyze_innermost (&init_dr))
|
|
1116 return NULL;
|
|
1117
|
|
1118 if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
|
|
1119 return NULL;
|
|
1120
|
|
1121 return phi;
|
|
1122 }
|
|
1123
|
|
1124 /* Adds a reference for the looparound copy of REF in PHI to CHAIN. */
|
|
1125
|
|
1126 static void
|
|
1127 insert_looparound_copy (chain_p chain, dref ref, gimple phi)
|
|
1128 {
|
|
1129 dref nw = XCNEW (struct dref), aref;
|
|
1130 unsigned i;
|
|
1131
|
|
1132 nw->stmt = phi;
|
|
1133 nw->distance = ref->distance + 1;
|
|
1134 nw->always_accessed = 1;
|
|
1135
|
|
1136 for (i = 0; VEC_iterate (dref, chain->refs, i, aref); i++)
|
|
1137 if (aref->distance >= nw->distance)
|
|
1138 break;
|
|
1139 VEC_safe_insert (dref, heap, chain->refs, i, nw);
|
|
1140
|
|
1141 if (nw->distance > chain->length)
|
|
1142 {
|
|
1143 chain->length = nw->distance;
|
|
1144 chain->has_max_use_after = false;
|
|
1145 }
|
|
1146 }
|
|
1147
|
|
1148 /* For references in CHAIN that are copied around the LOOP (created previously
|
|
1149 by PRE, or by user), add the results of such copies to the chain. This
|
|
1150 enables us to remove the copies by unrolling, and may need less registers
|
|
1151 (also, it may allow us to combine chains together). */
|
|
1152
|
|
1153 static void
|
|
1154 add_looparound_copies (struct loop *loop, chain_p chain)
|
|
1155 {
|
|
1156 unsigned i;
|
|
1157 dref ref, root = get_chain_root (chain);
|
|
1158 gimple phi;
|
|
1159
|
|
1160 for (i = 0; VEC_iterate (dref, chain->refs, i, ref); i++)
|
|
1161 {
|
|
1162 phi = find_looparound_phi (loop, ref, root);
|
|
1163 if (!phi)
|
|
1164 continue;
|
|
1165
|
|
1166 bitmap_set_bit (looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi)));
|
|
1167 insert_looparound_copy (chain, ref, phi);
|
|
1168 }
|
|
1169 }
|
|
1170
|
|
1171 /* Find roots of the values and determine distances in the component COMP.
|
|
1172 The references are redistributed into CHAINS. LOOP is the current
|
|
1173 loop. */
|
|
1174
|
|
1175 static void
|
|
1176 determine_roots_comp (struct loop *loop,
|
|
1177 struct component *comp,
|
|
1178 VEC (chain_p, heap) **chains)
|
|
1179 {
|
|
1180 unsigned i;
|
|
1181 dref a;
|
|
1182 chain_p chain = NULL;
|
|
1183
|
|
1184 /* Invariants are handled specially. */
|
|
1185 if (comp->comp_step == RS_INVARIANT)
|
|
1186 {
|
|
1187 chain = make_invariant_chain (comp);
|
|
1188 VEC_safe_push (chain_p, heap, *chains, chain);
|
|
1189 return;
|
|
1190 }
|
|
1191
|
|
1192 qsort (VEC_address (dref, comp->refs), VEC_length (dref, comp->refs),
|
|
1193 sizeof (dref), order_drefs);
|
|
1194
|
|
1195 for (i = 0; VEC_iterate (dref, comp->refs, i, a); i++)
|
|
1196 {
|
|
1197 if (!chain || !DR_IS_READ (a->ref))
|
|
1198 {
|
|
1199 if (nontrivial_chain_p (chain))
|
|
1200 VEC_safe_push (chain_p, heap, *chains, chain);
|
|
1201 else
|
|
1202 release_chain (chain);
|
|
1203 chain = make_rooted_chain (a);
|
|
1204 continue;
|
|
1205 }
|
|
1206
|
|
1207 add_ref_to_chain (chain, a);
|
|
1208 }
|
|
1209
|
|
1210 if (nontrivial_chain_p (chain))
|
|
1211 {
|
|
1212 add_looparound_copies (loop, chain);
|
|
1213 VEC_safe_push (chain_p, heap, *chains, chain);
|
|
1214 }
|
|
1215 else
|
|
1216 release_chain (chain);
|
|
1217 }
|
|
1218
|
|
1219 /* Find roots of the values and determine distances in components COMPS, and
|
|
1220 separates the references to CHAINS. LOOP is the current loop. */
|
|
1221
|
|
1222 static void
|
|
1223 determine_roots (struct loop *loop,
|
|
1224 struct component *comps, VEC (chain_p, heap) **chains)
|
|
1225 {
|
|
1226 struct component *comp;
|
|
1227
|
|
1228 for (comp = comps; comp; comp = comp->next)
|
|
1229 determine_roots_comp (loop, comp, chains);
|
|
1230 }
|
|
1231
|
|
1232 /* Replace the reference in statement STMT with temporary variable
|
|
1233 NEW_TREE. If SET is true, NEW_TREE is instead initialized to the value of
|
|
1234 the reference in the statement. IN_LHS is true if the reference
|
|
1235 is in the lhs of STMT, false if it is in rhs. */
|
|
1236
|
|
1237 static void
|
|
1238 replace_ref_with (gimple stmt, tree new_tree, bool set, bool in_lhs)
|
|
1239 {
|
|
1240 tree val;
|
|
1241 gimple new_stmt;
|
|
1242 gimple_stmt_iterator bsi, psi;
|
|
1243
|
|
1244 if (gimple_code (stmt) == GIMPLE_PHI)
|
|
1245 {
|
|
1246 gcc_assert (!in_lhs && !set);
|
|
1247
|
|
1248 val = PHI_RESULT (stmt);
|
|
1249 bsi = gsi_after_labels (gimple_bb (stmt));
|
|
1250 psi = gsi_for_stmt (stmt);
|
|
1251 remove_phi_node (&psi, false);
|
|
1252
|
|
1253 /* Turn the phi node into GIMPLE_ASSIGN. */
|
|
1254 new_stmt = gimple_build_assign (val, new_tree);
|
|
1255 gsi_insert_before (&bsi, new_stmt, GSI_NEW_STMT);
|
|
1256 return;
|
|
1257 }
|
|
1258
|
|
1259 /* Since the reference is of gimple_reg type, it should only
|
|
1260 appear as lhs or rhs of modify statement. */
|
|
1261 gcc_assert (is_gimple_assign (stmt));
|
|
1262
|
|
1263 bsi = gsi_for_stmt (stmt);
|
|
1264
|
|
1265 /* If we do not need to initialize NEW_TREE, just replace the use of OLD. */
|
|
1266 if (!set)
|
|
1267 {
|
|
1268 gcc_assert (!in_lhs);
|
|
1269 gimple_assign_set_rhs_from_tree (&bsi, new_tree);
|
|
1270 stmt = gsi_stmt (bsi);
|
|
1271 update_stmt (stmt);
|
|
1272 return;
|
|
1273 }
|
|
1274
|
|
1275 if (in_lhs)
|
|
1276 {
|
|
1277 /* We have statement
|
|
1278
|
|
1279 OLD = VAL
|
|
1280
|
|
1281 If OLD is a memory reference, then VAL is gimple_val, and we transform
|
|
1282 this to
|
|
1283
|
|
1284 OLD = VAL
|
|
1285 NEW = VAL
|
|
1286
|
|
1287 Otherwise, we are replacing a combination chain,
|
|
1288 VAL is the expression that performs the combination, and OLD is an
|
|
1289 SSA name. In this case, we transform the assignment to
|
|
1290
|
|
1291 OLD = VAL
|
|
1292 NEW = OLD
|
|
1293
|
|
1294 */
|
|
1295
|
|
1296 val = gimple_assign_lhs (stmt);
|
|
1297 if (TREE_CODE (val) != SSA_NAME)
|
|
1298 {
|
|
1299 gcc_assert (gimple_assign_copy_p (stmt));
|
|
1300 val = gimple_assign_rhs1 (stmt);
|
|
1301 }
|
|
1302 }
|
|
1303 else
|
|
1304 {
|
|
1305 /* VAL = OLD
|
|
1306
|
|
1307 is transformed to
|
|
1308
|
|
1309 VAL = OLD
|
|
1310 NEW = VAL */
|
|
1311
|
|
1312 val = gimple_assign_lhs (stmt);
|
|
1313 }
|
|
1314
|
|
1315 new_stmt = gimple_build_assign (new_tree, unshare_expr (val));
|
|
1316 gsi_insert_after (&bsi, new_stmt, GSI_NEW_STMT);
|
|
1317 }
|
|
1318
|
|
1319 /* Returns the reference to the address of REF in the ITER-th iteration of
|
|
1320 LOOP, or NULL if we fail to determine it (ITER may be negative). We
|
|
1321 try to preserve the original shape of the reference (not rewrite it
|
|
1322 as an indirect ref to the address), to make tree_could_trap_p in
|
|
1323 prepare_initializers_chain return false more often. */
|
|
1324
|
|
1325 static tree
|
|
1326 ref_at_iteration (struct loop *loop, tree ref, int iter)
|
|
1327 {
|
|
1328 tree idx, *idx_p, type, val, op0 = NULL_TREE, ret;
|
|
1329 affine_iv iv;
|
|
1330 bool ok;
|
|
1331
|
|
1332 if (handled_component_p (ref))
|
|
1333 {
|
|
1334 op0 = ref_at_iteration (loop, TREE_OPERAND (ref, 0), iter);
|
|
1335 if (!op0)
|
|
1336 return NULL_TREE;
|
|
1337 }
|
|
1338 else if (!INDIRECT_REF_P (ref))
|
|
1339 return unshare_expr (ref);
|
|
1340
|
|
1341 if (TREE_CODE (ref) == INDIRECT_REF)
|
|
1342 {
|
|
1343 ret = build1 (INDIRECT_REF, TREE_TYPE (ref), NULL_TREE);
|
|
1344 idx = TREE_OPERAND (ref, 0);
|
|
1345 idx_p = &TREE_OPERAND (ret, 0);
|
|
1346 }
|
|
1347 else if (TREE_CODE (ref) == COMPONENT_REF)
|
|
1348 {
|
|
1349 /* Check that the offset is loop invariant. */
|
|
1350 if (TREE_OPERAND (ref, 2)
|
|
1351 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (ref, 2)))
|
|
1352 return NULL_TREE;
|
|
1353
|
|
1354 return build3 (COMPONENT_REF, TREE_TYPE (ref), op0,
|
|
1355 unshare_expr (TREE_OPERAND (ref, 1)),
|
|
1356 unshare_expr (TREE_OPERAND (ref, 2)));
|
|
1357 }
|
|
1358 else if (TREE_CODE (ref) == ARRAY_REF)
|
|
1359 {
|
|
1360 /* Check that the lower bound and the step are loop invariant. */
|
|
1361 if (TREE_OPERAND (ref, 2)
|
|
1362 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (ref, 2)))
|
|
1363 return NULL_TREE;
|
|
1364 if (TREE_OPERAND (ref, 3)
|
|
1365 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (ref, 3)))
|
|
1366 return NULL_TREE;
|
|
1367
|
|
1368 ret = build4 (ARRAY_REF, TREE_TYPE (ref), op0, NULL_TREE,
|
|
1369 unshare_expr (TREE_OPERAND (ref, 2)),
|
|
1370 unshare_expr (TREE_OPERAND (ref, 3)));
|
|
1371 idx = TREE_OPERAND (ref, 1);
|
|
1372 idx_p = &TREE_OPERAND (ret, 1);
|
|
1373 }
|
|
1374 else
|
|
1375 return NULL_TREE;
|
|
1376
|
|
1377 ok = simple_iv (loop, loop, idx, &iv, true);
|
|
1378 if (!ok)
|
|
1379 return NULL_TREE;
|
|
1380 iv.base = expand_simple_operations (iv.base);
|
|
1381 if (integer_zerop (iv.step))
|
|
1382 *idx_p = unshare_expr (iv.base);
|
|
1383 else
|
|
1384 {
|
|
1385 type = TREE_TYPE (iv.base);
|
|
1386 if (POINTER_TYPE_P (type))
|
|
1387 {
|
|
1388 val = fold_build2 (MULT_EXPR, sizetype, iv.step,
|
|
1389 size_int (iter));
|
|
1390 val = fold_build2 (POINTER_PLUS_EXPR, type, iv.base, val);
|
|
1391 }
|
|
1392 else
|
|
1393 {
|
|
1394 val = fold_build2 (MULT_EXPR, type, iv.step,
|
|
1395 build_int_cst_type (type, iter));
|
|
1396 val = fold_build2 (PLUS_EXPR, type, iv.base, val);
|
|
1397 }
|
|
1398 *idx_p = unshare_expr (val);
|
|
1399 }
|
|
1400
|
|
1401 return ret;
|
|
1402 }
|
|
1403
|
|
1404 /* Get the initialization expression for the INDEX-th temporary variable
|
|
1405 of CHAIN. */
|
|
1406
|
|
1407 static tree
|
|
1408 get_init_expr (chain_p chain, unsigned index)
|
|
1409 {
|
|
1410 if (chain->type == CT_COMBINATION)
|
|
1411 {
|
|
1412 tree e1 = get_init_expr (chain->ch1, index);
|
|
1413 tree e2 = get_init_expr (chain->ch2, index);
|
|
1414
|
|
1415 return fold_build2 (chain->op, chain->rslt_type, e1, e2);
|
|
1416 }
|
|
1417 else
|
|
1418 return VEC_index (tree, chain->inits, index);
|
|
1419 }
|
|
1420
|
|
1421 /* Marks all virtual operands of statement STMT for renaming. */
|
|
1422
|
|
1423 void
|
|
1424 mark_virtual_ops_for_renaming (gimple stmt)
|
|
1425 {
|
|
1426 ssa_op_iter iter;
|
|
1427 tree var;
|
|
1428
|
|
1429 if (gimple_code (stmt) == GIMPLE_PHI)
|
|
1430 {
|
|
1431 var = PHI_RESULT (stmt);
|
|
1432 if (is_gimple_reg (var))
|
|
1433 return;
|
|
1434
|
|
1435 if (TREE_CODE (var) == SSA_NAME)
|
|
1436 var = SSA_NAME_VAR (var);
|
|
1437 mark_sym_for_renaming (var);
|
|
1438 return;
|
|
1439 }
|
|
1440
|
|
1441 update_stmt (stmt);
|
|
1442
|
|
1443 FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_ALL_VIRTUALS)
|
|
1444 {
|
|
1445 if (TREE_CODE (var) == SSA_NAME)
|
|
1446 var = SSA_NAME_VAR (var);
|
|
1447 mark_sym_for_renaming (var);
|
|
1448 }
|
|
1449 }
|
|
1450
|
|
1451 /* Calls mark_virtual_ops_for_renaming for all members of LIST. */
|
|
1452
|
|
1453 static void
|
|
1454 mark_virtual_ops_for_renaming_list (gimple_seq list)
|
|
1455 {
|
|
1456 gimple_stmt_iterator gsi;
|
|
1457
|
|
1458 for (gsi = gsi_start (list); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
1459 mark_virtual_ops_for_renaming (gsi_stmt (gsi));
|
|
1460 }
|
|
1461
|
|
1462 /* Returns a new temporary variable used for the I-th variable carrying
|
|
1463 value of REF. The variable's uid is marked in TMP_VARS. */
|
|
1464
|
|
1465 static tree
|
|
1466 predcom_tmp_var (tree ref, unsigned i, bitmap tmp_vars)
|
|
1467 {
|
|
1468 tree type = TREE_TYPE (ref);
|
|
1469 tree var = create_tmp_var (type, get_lsm_tmp_name (ref, i));
|
|
1470
|
|
1471 /* We never access the components of the temporary variable in predictive
|
|
1472 commoning. */
|
|
1473 if (TREE_CODE (type) == COMPLEX_TYPE
|
|
1474 || TREE_CODE (type) == VECTOR_TYPE)
|
|
1475 DECL_GIMPLE_REG_P (var) = 1;
|
|
1476
|
|
1477 add_referenced_var (var);
|
|
1478 bitmap_set_bit (tmp_vars, DECL_UID (var));
|
|
1479 return var;
|
|
1480 }
|
|
1481
|
|
1482 /* Creates the variables for CHAIN, as well as phi nodes for them and
|
|
1483 initialization on entry to LOOP. Uids of the newly created
|
|
1484 temporary variables are marked in TMP_VARS. */
|
|
1485
|
|
1486 static void
|
|
1487 initialize_root_vars (struct loop *loop, chain_p chain, bitmap tmp_vars)
|
|
1488 {
|
|
1489 unsigned i;
|
|
1490 unsigned n = chain->length;
|
|
1491 dref root = get_chain_root (chain);
|
|
1492 bool reuse_first = !chain->has_max_use_after;
|
|
1493 tree ref, init, var, next;
|
|
1494 gimple phi;
|
|
1495 gimple_seq stmts;
|
|
1496 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
|
|
1497
|
|
1498 /* If N == 0, then all the references are within the single iteration. And
|
|
1499 since this is an nonempty chain, reuse_first cannot be true. */
|
|
1500 gcc_assert (n > 0 || !reuse_first);
|
|
1501
|
|
1502 chain->vars = VEC_alloc (tree, heap, n + 1);
|
|
1503
|
|
1504 if (chain->type == CT_COMBINATION)
|
|
1505 ref = gimple_assign_lhs (root->stmt);
|
|
1506 else
|
|
1507 ref = DR_REF (root->ref);
|
|
1508
|
|
1509 for (i = 0; i < n + (reuse_first ? 0 : 1); i++)
|
|
1510 {
|
|
1511 var = predcom_tmp_var (ref, i, tmp_vars);
|
|
1512 VEC_quick_push (tree, chain->vars, var);
|
|
1513 }
|
|
1514 if (reuse_first)
|
|
1515 VEC_quick_push (tree, chain->vars, VEC_index (tree, chain->vars, 0));
|
|
1516
|
|
1517 for (i = 0; VEC_iterate (tree, chain->vars, i, var); i++)
|
|
1518 VEC_replace (tree, chain->vars, i, make_ssa_name (var, NULL));
|
|
1519
|
|
1520 for (i = 0; i < n; i++)
|
|
1521 {
|
|
1522 var = VEC_index (tree, chain->vars, i);
|
|
1523 next = VEC_index (tree, chain->vars, i + 1);
|
|
1524 init = get_init_expr (chain, i);
|
|
1525
|
|
1526 init = force_gimple_operand (init, &stmts, true, NULL_TREE);
|
|
1527 if (stmts)
|
|
1528 {
|
|
1529 mark_virtual_ops_for_renaming_list (stmts);
|
|
1530 gsi_insert_seq_on_edge_immediate (entry, stmts);
|
|
1531 }
|
|
1532
|
|
1533 phi = create_phi_node (var, loop->header);
|
|
1534 SSA_NAME_DEF_STMT (var) = phi;
|
|
1535 add_phi_arg (phi, init, entry);
|
|
1536 add_phi_arg (phi, next, latch);
|
|
1537 }
|
|
1538 }
|
|
1539
|
|
1540 /* Create the variables and initialization statement for root of chain
|
|
1541 CHAIN. Uids of the newly created temporary variables are marked
|
|
1542 in TMP_VARS. */
|
|
1543
|
|
1544 static void
|
|
1545 initialize_root (struct loop *loop, chain_p chain, bitmap tmp_vars)
|
|
1546 {
|
|
1547 dref root = get_chain_root (chain);
|
|
1548 bool in_lhs = (chain->type == CT_STORE_LOAD
|
|
1549 || chain->type == CT_COMBINATION);
|
|
1550
|
|
1551 initialize_root_vars (loop, chain, tmp_vars);
|
|
1552 replace_ref_with (root->stmt,
|
|
1553 VEC_index (tree, chain->vars, chain->length),
|
|
1554 true, in_lhs);
|
|
1555 }
|
|
1556
|
|
1557 /* Initializes a variable for load motion for ROOT and prepares phi nodes and
|
|
1558 initialization on entry to LOOP if necessary. The ssa name for the variable
|
|
1559 is stored in VARS. If WRITTEN is true, also a phi node to copy its value
|
|
1560 around the loop is created. Uid of the newly created temporary variable
|
|
1561 is marked in TMP_VARS. INITS is the list containing the (single)
|
|
1562 initializer. */
|
|
1563
|
|
1564 static void
|
|
1565 initialize_root_vars_lm (struct loop *loop, dref root, bool written,
|
|
1566 VEC(tree, heap) **vars, VEC(tree, heap) *inits,
|
|
1567 bitmap tmp_vars)
|
|
1568 {
|
|
1569 unsigned i;
|
|
1570 tree ref = DR_REF (root->ref), init, var, next;
|
|
1571 gimple_seq stmts;
|
|
1572 gimple phi;
|
|
1573 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
|
|
1574
|
|
1575 /* Find the initializer for the variable, and check that it cannot
|
|
1576 trap. */
|
|
1577 init = VEC_index (tree, inits, 0);
|
|
1578
|
|
1579 *vars = VEC_alloc (tree, heap, written ? 2 : 1);
|
|
1580 var = predcom_tmp_var (ref, 0, tmp_vars);
|
|
1581 VEC_quick_push (tree, *vars, var);
|
|
1582 if (written)
|
|
1583 VEC_quick_push (tree, *vars, VEC_index (tree, *vars, 0));
|
|
1584
|
|
1585 for (i = 0; VEC_iterate (tree, *vars, i, var); i++)
|
|
1586 VEC_replace (tree, *vars, i, make_ssa_name (var, NULL));
|
|
1587
|
|
1588 var = VEC_index (tree, *vars, 0);
|
|
1589
|
|
1590 init = force_gimple_operand (init, &stmts, written, NULL_TREE);
|
|
1591 if (stmts)
|
|
1592 {
|
|
1593 mark_virtual_ops_for_renaming_list (stmts);
|
|
1594 gsi_insert_seq_on_edge_immediate (entry, stmts);
|
|
1595 }
|
|
1596
|
|
1597 if (written)
|
|
1598 {
|
|
1599 next = VEC_index (tree, *vars, 1);
|
|
1600 phi = create_phi_node (var, loop->header);
|
|
1601 SSA_NAME_DEF_STMT (var) = phi;
|
|
1602 add_phi_arg (phi, init, entry);
|
|
1603 add_phi_arg (phi, next, latch);
|
|
1604 }
|
|
1605 else
|
|
1606 {
|
|
1607 gimple init_stmt = gimple_build_assign (var, init);
|
|
1608 mark_virtual_ops_for_renaming (init_stmt);
|
|
1609 gsi_insert_on_edge_immediate (entry, init_stmt);
|
|
1610 }
|
|
1611 }
|
|
1612
|
|
1613
|
|
1614 /* Execute load motion for references in chain CHAIN. Uids of the newly
|
|
1615 created temporary variables are marked in TMP_VARS. */
|
|
1616
|
|
1617 static void
|
|
1618 execute_load_motion (struct loop *loop, chain_p chain, bitmap tmp_vars)
|
|
1619 {
|
|
1620 VEC (tree, heap) *vars;
|
|
1621 dref a;
|
|
1622 unsigned n_writes = 0, ridx, i;
|
|
1623 tree var;
|
|
1624
|
|
1625 gcc_assert (chain->type == CT_INVARIANT);
|
|
1626 gcc_assert (!chain->combined);
|
|
1627 for (i = 0; VEC_iterate (dref, chain->refs, i, a); i++)
|
|
1628 if (!DR_IS_READ (a->ref))
|
|
1629 n_writes++;
|
|
1630
|
|
1631 /* If there are no reads in the loop, there is nothing to do. */
|
|
1632 if (n_writes == VEC_length (dref, chain->refs))
|
|
1633 return;
|
|
1634
|
|
1635 initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0,
|
|
1636 &vars, chain->inits, tmp_vars);
|
|
1637
|
|
1638 ridx = 0;
|
|
1639 for (i = 0; VEC_iterate (dref, chain->refs, i, a); i++)
|
|
1640 {
|
|
1641 bool is_read = DR_IS_READ (a->ref);
|
|
1642 mark_virtual_ops_for_renaming (a->stmt);
|
|
1643
|
|
1644 if (!DR_IS_READ (a->ref))
|
|
1645 {
|
|
1646 n_writes--;
|
|
1647 if (n_writes)
|
|
1648 {
|
|
1649 var = VEC_index (tree, vars, 0);
|
|
1650 var = make_ssa_name (SSA_NAME_VAR (var), NULL);
|
|
1651 VEC_replace (tree, vars, 0, var);
|
|
1652 }
|
|
1653 else
|
|
1654 ridx = 1;
|
|
1655 }
|
|
1656
|
|
1657 replace_ref_with (a->stmt, VEC_index (tree, vars, ridx),
|
|
1658 !is_read, !is_read);
|
|
1659 }
|
|
1660
|
|
1661 VEC_free (tree, heap, vars);
|
|
1662 }
|
|
1663
|
|
1664 /* Returns the single statement in that NAME is used, excepting
|
|
1665 the looparound phi nodes contained in one of the chains. If there is no
|
|
1666 such statement, or more statements, NULL is returned. */
|
|
1667
|
|
1668 static gimple
|
|
1669 single_nonlooparound_use (tree name)
|
|
1670 {
|
|
1671 use_operand_p use;
|
|
1672 imm_use_iterator it;
|
|
1673 gimple stmt, ret = NULL;
|
|
1674
|
|
1675 FOR_EACH_IMM_USE_FAST (use, it, name)
|
|
1676 {
|
|
1677 stmt = USE_STMT (use);
|
|
1678
|
|
1679 if (gimple_code (stmt) == GIMPLE_PHI)
|
|
1680 {
|
|
1681 /* Ignore uses in looparound phi nodes. Uses in other phi nodes
|
|
1682 could not be processed anyway, so just fail for them. */
|
|
1683 if (bitmap_bit_p (looparound_phis,
|
|
1684 SSA_NAME_VERSION (PHI_RESULT (stmt))))
|
|
1685 continue;
|
|
1686
|
|
1687 return NULL;
|
|
1688 }
|
|
1689 else if (ret != NULL)
|
|
1690 return NULL;
|
|
1691 else
|
|
1692 ret = stmt;
|
|
1693 }
|
|
1694
|
|
1695 return ret;
|
|
1696 }
|
|
1697
|
|
1698 /* Remove statement STMT, as well as the chain of assignments in that it is
|
|
1699 used. */
|
|
1700
|
|
1701 static void
|
|
1702 remove_stmt (gimple stmt)
|
|
1703 {
|
|
1704 tree name;
|
|
1705 gimple next;
|
|
1706 gimple_stmt_iterator psi;
|
|
1707
|
|
1708 if (gimple_code (stmt) == GIMPLE_PHI)
|
|
1709 {
|
|
1710 name = PHI_RESULT (stmt);
|
|
1711 next = single_nonlooparound_use (name);
|
|
1712 psi = gsi_for_stmt (stmt);
|
|
1713 remove_phi_node (&psi, true);
|
|
1714
|
|
1715 if (!next
|
|
1716 || !gimple_assign_ssa_name_copy_p (next)
|
|
1717 || gimple_assign_rhs1 (next) != name)
|
|
1718 return;
|
|
1719
|
|
1720 stmt = next;
|
|
1721 }
|
|
1722
|
|
1723 while (1)
|
|
1724 {
|
|
1725 gimple_stmt_iterator bsi;
|
|
1726
|
|
1727 bsi = gsi_for_stmt (stmt);
|
|
1728
|
|
1729 name = gimple_assign_lhs (stmt);
|
|
1730 gcc_assert (TREE_CODE (name) == SSA_NAME);
|
|
1731
|
|
1732 next = single_nonlooparound_use (name);
|
|
1733
|
|
1734 mark_virtual_ops_for_renaming (stmt);
|
|
1735 gsi_remove (&bsi, true);
|
|
1736 release_defs (stmt);
|
|
1737
|
|
1738 if (!next
|
|
1739 || !gimple_assign_ssa_name_copy_p (next)
|
|
1740 || gimple_assign_rhs1 (next) != name)
|
|
1741 return;
|
|
1742
|
|
1743 stmt = next;
|
|
1744 }
|
|
1745 }
|
|
1746
|
|
1747 /* Perform the predictive commoning optimization for a chain CHAIN.
|
|
1748 Uids of the newly created temporary variables are marked in TMP_VARS.*/
|
|
1749
|
|
1750 static void
|
|
1751 execute_pred_commoning_chain (struct loop *loop, chain_p chain,
|
|
1752 bitmap tmp_vars)
|
|
1753 {
|
|
1754 unsigned i;
|
|
1755 dref a, root;
|
|
1756 tree var;
|
|
1757
|
|
1758 if (chain->combined)
|
|
1759 {
|
|
1760 /* For combined chains, just remove the statements that are used to
|
|
1761 compute the values of the expression (except for the root one). */
|
|
1762 for (i = 1; VEC_iterate (dref, chain->refs, i, a); i++)
|
|
1763 remove_stmt (a->stmt);
|
|
1764 }
|
|
1765 else
|
|
1766 {
|
|
1767 /* For non-combined chains, set up the variables that hold its value,
|
|
1768 and replace the uses of the original references by these
|
|
1769 variables. */
|
|
1770 root = get_chain_root (chain);
|
|
1771 mark_virtual_ops_for_renaming (root->stmt);
|
|
1772
|
|
1773 initialize_root (loop, chain, tmp_vars);
|
|
1774 for (i = 1; VEC_iterate (dref, chain->refs, i, a); i++)
|
|
1775 {
|
|
1776 mark_virtual_ops_for_renaming (a->stmt);
|
|
1777 var = VEC_index (tree, chain->vars, chain->length - a->distance);
|
|
1778 replace_ref_with (a->stmt, var, false, false);
|
|
1779 }
|
|
1780 }
|
|
1781 }
|
|
1782
|
|
1783 /* Determines the unroll factor necessary to remove as many temporary variable
|
|
1784 copies as possible. CHAINS is the list of chains that will be
|
|
1785 optimized. */
|
|
1786
|
|
1787 static unsigned
|
|
1788 determine_unroll_factor (VEC (chain_p, heap) *chains)
|
|
1789 {
|
|
1790 chain_p chain;
|
|
1791 unsigned factor = 1, af, nfactor, i;
|
|
1792 unsigned max = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
|
|
1793
|
|
1794 for (i = 0; VEC_iterate (chain_p, chains, i, chain); i++)
|
|
1795 {
|
|
1796 if (chain->type == CT_INVARIANT || chain->combined)
|
|
1797 continue;
|
|
1798
|
|
1799 /* The best unroll factor for this chain is equal to the number of
|
|
1800 temporary variables that we create for it. */
|
|
1801 af = chain->length;
|
|
1802 if (chain->has_max_use_after)
|
|
1803 af++;
|
|
1804
|
|
1805 nfactor = factor * af / gcd (factor, af);
|
|
1806 if (nfactor <= max)
|
|
1807 factor = nfactor;
|
|
1808 }
|
|
1809
|
|
1810 return factor;
|
|
1811 }
|
|
1812
|
|
1813 /* Perform the predictive commoning optimization for CHAINS.
|
|
1814 Uids of the newly created temporary variables are marked in TMP_VARS. */
|
|
1815
|
|
1816 static void
|
|
1817 execute_pred_commoning (struct loop *loop, VEC (chain_p, heap) *chains,
|
|
1818 bitmap tmp_vars)
|
|
1819 {
|
|
1820 chain_p chain;
|
|
1821 unsigned i;
|
|
1822
|
|
1823 for (i = 0; VEC_iterate (chain_p, chains, i, chain); i++)
|
|
1824 {
|
|
1825 if (chain->type == CT_INVARIANT)
|
|
1826 execute_load_motion (loop, chain, tmp_vars);
|
|
1827 else
|
|
1828 execute_pred_commoning_chain (loop, chain, tmp_vars);
|
|
1829 }
|
|
1830
|
|
1831 update_ssa (TODO_update_ssa_only_virtuals);
|
|
1832 }
|
|
1833
|
|
1834 /* For each reference in CHAINS, if its defining statement is
|
|
1835 phi node, record the ssa name that is defined by it. */
|
|
1836
|
|
1837 static void
|
|
1838 replace_phis_by_defined_names (VEC (chain_p, heap) *chains)
|
|
1839 {
|
|
1840 chain_p chain;
|
|
1841 dref a;
|
|
1842 unsigned i, j;
|
|
1843
|
|
1844 for (i = 0; VEC_iterate (chain_p, chains, i, chain); i++)
|
|
1845 for (j = 0; VEC_iterate (dref, chain->refs, j, a); j++)
|
|
1846 {
|
|
1847 if (gimple_code (a->stmt) == GIMPLE_PHI)
|
|
1848 {
|
|
1849 a->name_defined_by_phi = PHI_RESULT (a->stmt);
|
|
1850 a->stmt = NULL;
|
|
1851 }
|
|
1852 }
|
|
1853 }
|
|
1854
|
|
1855 /* For each reference in CHAINS, if name_defined_by_phi is not
|
|
1856 NULL, use it to set the stmt field. */
|
|
1857
|
|
1858 static void
|
|
1859 replace_names_by_phis (VEC (chain_p, heap) *chains)
|
|
1860 {
|
|
1861 chain_p chain;
|
|
1862 dref a;
|
|
1863 unsigned i, j;
|
|
1864
|
|
1865 for (i = 0; VEC_iterate (chain_p, chains, i, chain); i++)
|
|
1866 for (j = 0; VEC_iterate (dref, chain->refs, j, a); j++)
|
|
1867 if (a->stmt == NULL)
|
|
1868 {
|
|
1869 a->stmt = SSA_NAME_DEF_STMT (a->name_defined_by_phi);
|
|
1870 gcc_assert (gimple_code (a->stmt) == GIMPLE_PHI);
|
|
1871 a->name_defined_by_phi = NULL_TREE;
|
|
1872 }
|
|
1873 }
|
|
1874
|
|
1875 /* Wrapper over execute_pred_commoning, to pass it as a callback
|
|
1876 to tree_transform_and_unroll_loop. */
|
|
1877
|
|
1878 struct epcc_data
|
|
1879 {
|
|
1880 VEC (chain_p, heap) *chains;
|
|
1881 bitmap tmp_vars;
|
|
1882 };
|
|
1883
|
|
1884 static void
|
|
1885 execute_pred_commoning_cbck (struct loop *loop, void *data)
|
|
1886 {
|
|
1887 struct epcc_data *const dta = (struct epcc_data *) data;
|
|
1888
|
|
1889 /* Restore phi nodes that were replaced by ssa names before
|
|
1890 tree_transform_and_unroll_loop (see detailed description in
|
|
1891 tree_predictive_commoning_loop). */
|
|
1892 replace_names_by_phis (dta->chains);
|
|
1893 execute_pred_commoning (loop, dta->chains, dta->tmp_vars);
|
|
1894 }
|
|
1895
|
|
1896 /* Returns true if we can and should unroll LOOP FACTOR times. Number
|
|
1897 of iterations of the loop is returned in NITER. */
|
|
1898
|
|
1899 static bool
|
|
1900 should_unroll_loop_p (struct loop *loop, unsigned factor,
|
|
1901 struct tree_niter_desc *niter)
|
|
1902 {
|
|
1903 edge exit;
|
|
1904
|
|
1905 if (factor == 1)
|
|
1906 return false;
|
|
1907
|
|
1908 /* Check whether unrolling is possible. We only want to unroll loops
|
|
1909 for that we are able to determine number of iterations. We also
|
|
1910 want to split the extra iterations of the loop from its end,
|
|
1911 therefore we require that the loop has precisely one
|
|
1912 exit. */
|
|
1913
|
|
1914 exit = single_dom_exit (loop);
|
|
1915 if (!exit)
|
|
1916 return false;
|
|
1917
|
|
1918 if (!number_of_iterations_exit (loop, exit, niter, false))
|
|
1919 return false;
|
|
1920
|
|
1921 /* And of course, we must be able to duplicate the loop. */
|
|
1922 if (!can_duplicate_loop_p (loop))
|
|
1923 return false;
|
|
1924
|
|
1925 /* The final loop should be small enough. */
|
|
1926 if (tree_num_loop_insns (loop, &eni_size_weights) * factor
|
|
1927 > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS))
|
|
1928 return false;
|
|
1929
|
|
1930 return true;
|
|
1931 }
|
|
1932
|
|
1933 /* Base NAME and all the names in the chain of phi nodes that use it
|
|
1934 on variable VAR. The phi nodes are recognized by being in the copies of
|
|
1935 the header of the LOOP. */
|
|
1936
|
|
1937 static void
|
|
1938 base_names_in_chain_on (struct loop *loop, tree name, tree var)
|
|
1939 {
|
|
1940 gimple stmt, phi;
|
|
1941 imm_use_iterator iter;
|
|
1942 edge e;
|
|
1943
|
|
1944 SSA_NAME_VAR (name) = var;
|
|
1945
|
|
1946 while (1)
|
|
1947 {
|
|
1948 phi = NULL;
|
|
1949 FOR_EACH_IMM_USE_STMT (stmt, iter, name)
|
|
1950 {
|
|
1951 if (gimple_code (stmt) == GIMPLE_PHI
|
|
1952 && flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
|
|
1953 {
|
|
1954 phi = stmt;
|
|
1955 BREAK_FROM_IMM_USE_STMT (iter);
|
|
1956 }
|
|
1957 }
|
|
1958 if (!phi)
|
|
1959 return;
|
|
1960
|
|
1961 if (gimple_bb (phi) == loop->header)
|
|
1962 e = loop_latch_edge (loop);
|
|
1963 else
|
|
1964 e = single_pred_edge (gimple_bb (stmt));
|
|
1965
|
|
1966 name = PHI_RESULT (phi);
|
|
1967 SSA_NAME_VAR (name) = var;
|
|
1968 }
|
|
1969 }
|
|
1970
|
|
1971 /* Given an unrolled LOOP after predictive commoning, remove the
|
|
1972 register copies arising from phi nodes by changing the base
|
|
1973 variables of SSA names. TMP_VARS is the set of the temporary variables
|
|
1974 for those we want to perform this. */
|
|
1975
|
|
1976 static void
|
|
1977 eliminate_temp_copies (struct loop *loop, bitmap tmp_vars)
|
|
1978 {
|
|
1979 edge e;
|
|
1980 gimple phi, stmt;
|
|
1981 tree name, use, var;
|
|
1982 gimple_stmt_iterator psi;
|
|
1983
|
|
1984 e = loop_latch_edge (loop);
|
|
1985 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
|
|
1986 {
|
|
1987 phi = gsi_stmt (psi);
|
|
1988 name = PHI_RESULT (phi);
|
|
1989 var = SSA_NAME_VAR (name);
|
|
1990 if (!bitmap_bit_p (tmp_vars, DECL_UID (var)))
|
|
1991 continue;
|
|
1992 use = PHI_ARG_DEF_FROM_EDGE (phi, e);
|
|
1993 gcc_assert (TREE_CODE (use) == SSA_NAME);
|
|
1994
|
|
1995 /* Base all the ssa names in the ud and du chain of NAME on VAR. */
|
|
1996 stmt = SSA_NAME_DEF_STMT (use);
|
|
1997 while (gimple_code (stmt) == GIMPLE_PHI
|
|
1998 /* In case we could not unroll the loop enough to eliminate
|
|
1999 all copies, we may reach the loop header before the defining
|
|
2000 statement (in that case, some register copies will be present
|
|
2001 in loop latch in the final code, corresponding to the newly
|
|
2002 created looparound phi nodes). */
|
|
2003 && gimple_bb (stmt) != loop->header)
|
|
2004 {
|
|
2005 gcc_assert (single_pred_p (gimple_bb (stmt)));
|
|
2006 use = PHI_ARG_DEF (stmt, 0);
|
|
2007 stmt = SSA_NAME_DEF_STMT (use);
|
|
2008 }
|
|
2009
|
|
2010 base_names_in_chain_on (loop, use, var);
|
|
2011 }
|
|
2012 }
|
|
2013
|
|
2014 /* Returns true if CHAIN is suitable to be combined. */
|
|
2015
|
|
2016 static bool
|
|
2017 chain_can_be_combined_p (chain_p chain)
|
|
2018 {
|
|
2019 return (!chain->combined
|
|
2020 && (chain->type == CT_LOAD || chain->type == CT_COMBINATION));
|
|
2021 }
|
|
2022
|
|
2023 /* Returns the modify statement that uses NAME. Skips over assignment
|
|
2024 statements, NAME is replaced with the actual name used in the returned
|
|
2025 statement. */
|
|
2026
|
|
2027 static gimple
|
|
2028 find_use_stmt (tree *name)
|
|
2029 {
|
|
2030 gimple stmt;
|
|
2031 tree rhs, lhs;
|
|
2032
|
|
2033 /* Skip over assignments. */
|
|
2034 while (1)
|
|
2035 {
|
|
2036 stmt = single_nonlooparound_use (*name);
|
|
2037 if (!stmt)
|
|
2038 return NULL;
|
|
2039
|
|
2040 if (gimple_code (stmt) != GIMPLE_ASSIGN)
|
|
2041 return NULL;
|
|
2042
|
|
2043 lhs = gimple_assign_lhs (stmt);
|
|
2044 if (TREE_CODE (lhs) != SSA_NAME)
|
|
2045 return NULL;
|
|
2046
|
|
2047 if (gimple_assign_copy_p (stmt))
|
|
2048 {
|
|
2049 rhs = gimple_assign_rhs1 (stmt);
|
|
2050 if (rhs != *name)
|
|
2051 return NULL;
|
|
2052
|
|
2053 *name = lhs;
|
|
2054 }
|
|
2055 else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
|
|
2056 == GIMPLE_BINARY_RHS)
|
|
2057 return stmt;
|
|
2058 else
|
|
2059 return NULL;
|
|
2060 }
|
|
2061 }
|
|
2062
|
|
2063 /* Returns true if we may perform reassociation for operation CODE in TYPE. */
|
|
2064
|
|
2065 static bool
|
|
2066 may_reassociate_p (tree type, enum tree_code code)
|
|
2067 {
|
|
2068 if (FLOAT_TYPE_P (type)
|
|
2069 && !flag_unsafe_math_optimizations)
|
|
2070 return false;
|
|
2071
|
|
2072 return (commutative_tree_code (code)
|
|
2073 && associative_tree_code (code));
|
|
2074 }
|
|
2075
|
|
2076 /* If the operation used in STMT is associative and commutative, go through the
|
|
2077 tree of the same operations and returns its root. Distance to the root
|
|
2078 is stored in DISTANCE. */
|
|
2079
|
|
2080 static gimple
|
|
2081 find_associative_operation_root (gimple stmt, unsigned *distance)
|
|
2082 {
|
|
2083 tree lhs;
|
|
2084 gimple next;
|
|
2085 enum tree_code code = gimple_assign_rhs_code (stmt);
|
|
2086 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
|
|
2087 unsigned dist = 0;
|
|
2088
|
|
2089 if (!may_reassociate_p (type, code))
|
|
2090 return NULL;
|
|
2091
|
|
2092 while (1)
|
|
2093 {
|
|
2094 lhs = gimple_assign_lhs (stmt);
|
|
2095 gcc_assert (TREE_CODE (lhs) == SSA_NAME);
|
|
2096
|
|
2097 next = find_use_stmt (&lhs);
|
|
2098 if (!next
|
|
2099 || gimple_assign_rhs_code (next) != code)
|
|
2100 break;
|
|
2101
|
|
2102 stmt = next;
|
|
2103 dist++;
|
|
2104 }
|
|
2105
|
|
2106 if (distance)
|
|
2107 *distance = dist;
|
|
2108 return stmt;
|
|
2109 }
|
|
2110
|
|
2111 /* Returns the common statement in that NAME1 and NAME2 have a use. If there
|
|
2112 is no such statement, returns NULL_TREE. In case the operation used on
|
|
2113 NAME1 and NAME2 is associative and commutative, returns the root of the
|
|
2114 tree formed by this operation instead of the statement that uses NAME1 or
|
|
2115 NAME2. */
|
|
2116
|
|
2117 static gimple
|
|
2118 find_common_use_stmt (tree *name1, tree *name2)
|
|
2119 {
|
|
2120 gimple stmt1, stmt2;
|
|
2121
|
|
2122 stmt1 = find_use_stmt (name1);
|
|
2123 if (!stmt1)
|
|
2124 return NULL;
|
|
2125
|
|
2126 stmt2 = find_use_stmt (name2);
|
|
2127 if (!stmt2)
|
|
2128 return NULL;
|
|
2129
|
|
2130 if (stmt1 == stmt2)
|
|
2131 return stmt1;
|
|
2132
|
|
2133 stmt1 = find_associative_operation_root (stmt1, NULL);
|
|
2134 if (!stmt1)
|
|
2135 return NULL;
|
|
2136 stmt2 = find_associative_operation_root (stmt2, NULL);
|
|
2137 if (!stmt2)
|
|
2138 return NULL;
|
|
2139
|
|
2140 return (stmt1 == stmt2 ? stmt1 : NULL);
|
|
2141 }
|
|
2142
|
|
2143 /* Checks whether R1 and R2 are combined together using CODE, with the result
|
|
2144 in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
|
|
2145 if it is true. If CODE is ERROR_MARK, set these values instead. */
|
|
2146
|
|
2147 static bool
|
|
2148 combinable_refs_p (dref r1, dref r2,
|
|
2149 enum tree_code *code, bool *swap, tree *rslt_type)
|
|
2150 {
|
|
2151 enum tree_code acode;
|
|
2152 bool aswap;
|
|
2153 tree atype;
|
|
2154 tree name1, name2;
|
|
2155 gimple stmt;
|
|
2156
|
|
2157 name1 = name_for_ref (r1);
|
|
2158 name2 = name_for_ref (r2);
|
|
2159 gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE);
|
|
2160
|
|
2161 stmt = find_common_use_stmt (&name1, &name2);
|
|
2162
|
|
2163 if (!stmt)
|
|
2164 return false;
|
|
2165
|
|
2166 acode = gimple_assign_rhs_code (stmt);
|
|
2167 aswap = (!commutative_tree_code (acode)
|
|
2168 && gimple_assign_rhs1 (stmt) != name1);
|
|
2169 atype = TREE_TYPE (gimple_assign_lhs (stmt));
|
|
2170
|
|
2171 if (*code == ERROR_MARK)
|
|
2172 {
|
|
2173 *code = acode;
|
|
2174 *swap = aswap;
|
|
2175 *rslt_type = atype;
|
|
2176 return true;
|
|
2177 }
|
|
2178
|
|
2179 return (*code == acode
|
|
2180 && *swap == aswap
|
|
2181 && *rslt_type == atype);
|
|
2182 }
|
|
2183
|
|
2184 /* Remove OP from the operation on rhs of STMT, and replace STMT with
|
|
2185 an assignment of the remaining operand. */
|
|
2186
|
|
2187 static void
|
|
2188 remove_name_from_operation (gimple stmt, tree op)
|
|
2189 {
|
|
2190 tree other_op;
|
|
2191 gimple_stmt_iterator si;
|
|
2192
|
|
2193 gcc_assert (is_gimple_assign (stmt));
|
|
2194
|
|
2195 if (gimple_assign_rhs1 (stmt) == op)
|
|
2196 other_op = gimple_assign_rhs2 (stmt);
|
|
2197 else
|
|
2198 other_op = gimple_assign_rhs1 (stmt);
|
|
2199
|
|
2200 si = gsi_for_stmt (stmt);
|
|
2201 gimple_assign_set_rhs_from_tree (&si, other_op);
|
|
2202
|
|
2203 /* We should not have reallocated STMT. */
|
|
2204 gcc_assert (gsi_stmt (si) == stmt);
|
|
2205
|
|
2206 update_stmt (stmt);
|
|
2207 }
|
|
2208
|
|
2209 /* Reassociates the expression in that NAME1 and NAME2 are used so that they
|
|
2210 are combined in a single statement, and returns this statement. */
|
|
2211
|
|
2212 static gimple
|
|
2213 reassociate_to_the_same_stmt (tree name1, tree name2)
|
|
2214 {
|
|
2215 gimple stmt1, stmt2, root1, root2, s1, s2;
|
|
2216 gimple new_stmt, tmp_stmt;
|
|
2217 tree new_name, tmp_name, var, r1, r2;
|
|
2218 unsigned dist1, dist2;
|
|
2219 enum tree_code code;
|
|
2220 tree type = TREE_TYPE (name1);
|
|
2221 gimple_stmt_iterator bsi;
|
|
2222
|
|
2223 stmt1 = find_use_stmt (&name1);
|
|
2224 stmt2 = find_use_stmt (&name2);
|
|
2225 root1 = find_associative_operation_root (stmt1, &dist1);
|
|
2226 root2 = find_associative_operation_root (stmt2, &dist2);
|
|
2227 code = gimple_assign_rhs_code (stmt1);
|
|
2228
|
|
2229 gcc_assert (root1 && root2 && root1 == root2
|
|
2230 && code == gimple_assign_rhs_code (stmt2));
|
|
2231
|
|
2232 /* Find the root of the nearest expression in that both NAME1 and NAME2
|
|
2233 are used. */
|
|
2234 r1 = name1;
|
|
2235 s1 = stmt1;
|
|
2236 r2 = name2;
|
|
2237 s2 = stmt2;
|
|
2238
|
|
2239 while (dist1 > dist2)
|
|
2240 {
|
|
2241 s1 = find_use_stmt (&r1);
|
|
2242 r1 = gimple_assign_lhs (s1);
|
|
2243 dist1--;
|
|
2244 }
|
|
2245 while (dist2 > dist1)
|
|
2246 {
|
|
2247 s2 = find_use_stmt (&r2);
|
|
2248 r2 = gimple_assign_lhs (s2);
|
|
2249 dist2--;
|
|
2250 }
|
|
2251
|
|
2252 while (s1 != s2)
|
|
2253 {
|
|
2254 s1 = find_use_stmt (&r1);
|
|
2255 r1 = gimple_assign_lhs (s1);
|
|
2256 s2 = find_use_stmt (&r2);
|
|
2257 r2 = gimple_assign_lhs (s2);
|
|
2258 }
|
|
2259
|
|
2260 /* Remove NAME1 and NAME2 from the statements in that they are used
|
|
2261 currently. */
|
|
2262 remove_name_from_operation (stmt1, name1);
|
|
2263 remove_name_from_operation (stmt2, name2);
|
|
2264
|
|
2265 /* Insert the new statement combining NAME1 and NAME2 before S1, and
|
|
2266 combine it with the rhs of S1. */
|
|
2267 var = create_tmp_var (type, "predreastmp");
|
|
2268 add_referenced_var (var);
|
|
2269 new_name = make_ssa_name (var, NULL);
|
|
2270 new_stmt = gimple_build_assign_with_ops (code, new_name, name1, name2);
|
|
2271
|
|
2272 var = create_tmp_var (type, "predreastmp");
|
|
2273 add_referenced_var (var);
|
|
2274 tmp_name = make_ssa_name (var, NULL);
|
|
2275
|
|
2276 /* Rhs of S1 may now be either a binary expression with operation
|
|
2277 CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
|
|
2278 so that name1 or name2 was removed from it). */
|
|
2279 tmp_stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (s1),
|
|
2280 tmp_name,
|
|
2281 gimple_assign_rhs1 (s1),
|
|
2282 gimple_assign_rhs2 (s1));
|
|
2283
|
|
2284 bsi = gsi_for_stmt (s1);
|
|
2285 gimple_assign_set_rhs_with_ops (&bsi, code, new_name, tmp_name);
|
|
2286 s1 = gsi_stmt (bsi);
|
|
2287 update_stmt (s1);
|
|
2288
|
|
2289 gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
|
|
2290 gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT);
|
|
2291
|
|
2292 return new_stmt;
|
|
2293 }
|
|
2294
|
|
2295 /* Returns the statement that combines references R1 and R2. In case R1
|
|
2296 and R2 are not used in the same statement, but they are used with an
|
|
2297 associative and commutative operation in the same expression, reassociate
|
|
2298 the expression so that they are used in the same statement. */
|
|
2299
|
|
2300 static gimple
|
|
2301 stmt_combining_refs (dref r1, dref r2)
|
|
2302 {
|
|
2303 gimple stmt1, stmt2;
|
|
2304 tree name1 = name_for_ref (r1);
|
|
2305 tree name2 = name_for_ref (r2);
|
|
2306
|
|
2307 stmt1 = find_use_stmt (&name1);
|
|
2308 stmt2 = find_use_stmt (&name2);
|
|
2309 if (stmt1 == stmt2)
|
|
2310 return stmt1;
|
|
2311
|
|
2312 return reassociate_to_the_same_stmt (name1, name2);
|
|
2313 }
|
|
2314
|
|
2315 /* Tries to combine chains CH1 and CH2 together. If this succeeds, the
|
|
2316 description of the new chain is returned, otherwise we return NULL. */
|
|
2317
|
|
2318 static chain_p
|
|
2319 combine_chains (chain_p ch1, chain_p ch2)
|
|
2320 {
|
|
2321 dref r1, r2, nw;
|
|
2322 enum tree_code op = ERROR_MARK;
|
|
2323 bool swap = false;
|
|
2324 chain_p new_chain;
|
|
2325 unsigned i;
|
|
2326 gimple root_stmt;
|
|
2327 tree rslt_type = NULL_TREE;
|
|
2328
|
|
2329 if (ch1 == ch2)
|
|
2330 return false;
|
|
2331 if (ch1->length != ch2->length)
|
|
2332 return NULL;
|
|
2333
|
|
2334 if (VEC_length (dref, ch1->refs) != VEC_length (dref, ch2->refs))
|
|
2335 return NULL;
|
|
2336
|
|
2337 for (i = 0; (VEC_iterate (dref, ch1->refs, i, r1)
|
|
2338 && VEC_iterate (dref, ch2->refs, i, r2)); i++)
|
|
2339 {
|
|
2340 if (r1->distance != r2->distance)
|
|
2341 return NULL;
|
|
2342
|
|
2343 if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type))
|
|
2344 return NULL;
|
|
2345 }
|
|
2346
|
|
2347 if (swap)
|
|
2348 {
|
|
2349 chain_p tmp = ch1;
|
|
2350 ch1 = ch2;
|
|
2351 ch2 = tmp;
|
|
2352 }
|
|
2353
|
|
2354 new_chain = XCNEW (struct chain);
|
|
2355 new_chain->type = CT_COMBINATION;
|
|
2356 new_chain->op = op;
|
|
2357 new_chain->ch1 = ch1;
|
|
2358 new_chain->ch2 = ch2;
|
|
2359 new_chain->rslt_type = rslt_type;
|
|
2360 new_chain->length = ch1->length;
|
|
2361
|
|
2362 for (i = 0; (VEC_iterate (dref, ch1->refs, i, r1)
|
|
2363 && VEC_iterate (dref, ch2->refs, i, r2)); i++)
|
|
2364 {
|
|
2365 nw = XCNEW (struct dref);
|
|
2366 nw->stmt = stmt_combining_refs (r1, r2);
|
|
2367 nw->distance = r1->distance;
|
|
2368
|
|
2369 VEC_safe_push (dref, heap, new_chain->refs, nw);
|
|
2370 }
|
|
2371
|
|
2372 new_chain->has_max_use_after = false;
|
|
2373 root_stmt = get_chain_root (new_chain)->stmt;
|
|
2374 for (i = 1; VEC_iterate (dref, new_chain->refs, i, nw); i++)
|
|
2375 {
|
|
2376 if (nw->distance == new_chain->length
|
|
2377 && !stmt_dominates_stmt_p (nw->stmt, root_stmt))
|
|
2378 {
|
|
2379 new_chain->has_max_use_after = true;
|
|
2380 break;
|
|
2381 }
|
|
2382 }
|
|
2383
|
|
2384 ch1->combined = true;
|
|
2385 ch2->combined = true;
|
|
2386 return new_chain;
|
|
2387 }
|
|
2388
|
|
2389 /* Try to combine the CHAINS. */
|
|
2390
|
|
2391 static void
|
|
2392 try_combine_chains (VEC (chain_p, heap) **chains)
|
|
2393 {
|
|
2394 unsigned i, j;
|
|
2395 chain_p ch1, ch2, cch;
|
|
2396 VEC (chain_p, heap) *worklist = NULL;
|
|
2397
|
|
2398 for (i = 0; VEC_iterate (chain_p, *chains, i, ch1); i++)
|
|
2399 if (chain_can_be_combined_p (ch1))
|
|
2400 VEC_safe_push (chain_p, heap, worklist, ch1);
|
|
2401
|
|
2402 while (!VEC_empty (chain_p, worklist))
|
|
2403 {
|
|
2404 ch1 = VEC_pop (chain_p, worklist);
|
|
2405 if (!chain_can_be_combined_p (ch1))
|
|
2406 continue;
|
|
2407
|
|
2408 for (j = 0; VEC_iterate (chain_p, *chains, j, ch2); j++)
|
|
2409 {
|
|
2410 if (!chain_can_be_combined_p (ch2))
|
|
2411 continue;
|
|
2412
|
|
2413 cch = combine_chains (ch1, ch2);
|
|
2414 if (cch)
|
|
2415 {
|
|
2416 VEC_safe_push (chain_p, heap, worklist, cch);
|
|
2417 VEC_safe_push (chain_p, heap, *chains, cch);
|
|
2418 break;
|
|
2419 }
|
|
2420 }
|
|
2421 }
|
|
2422 }
|
|
2423
|
|
2424 /* Sets alias information based on data reference DR for REF,
|
|
2425 if necessary. */
|
|
2426
|
|
2427 static void
|
|
2428 set_alias_info (tree ref, struct data_reference *dr)
|
|
2429 {
|
|
2430 tree var;
|
|
2431 tree tag = DR_SYMBOL_TAG (dr);
|
|
2432
|
|
2433 gcc_assert (tag != NULL_TREE);
|
|
2434
|
|
2435 ref = get_base_address (ref);
|
|
2436 if (!ref || !INDIRECT_REF_P (ref))
|
|
2437 return;
|
|
2438
|
|
2439 var = SSA_NAME_VAR (TREE_OPERAND (ref, 0));
|
|
2440 if (var_ann (var)->symbol_mem_tag)
|
|
2441 return;
|
|
2442
|
|
2443 if (!MTAG_P (tag))
|
|
2444 new_type_alias (var, tag, ref);
|
|
2445 else
|
|
2446 var_ann (var)->symbol_mem_tag = tag;
|
|
2447 }
|
|
2448
|
|
2449 /* Prepare initializers for CHAIN in LOOP. Returns false if this is
|
|
2450 impossible because one of these initializers may trap, true otherwise. */
|
|
2451
|
|
2452 static bool
|
|
2453 prepare_initializers_chain (struct loop *loop, chain_p chain)
|
|
2454 {
|
|
2455 unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length;
|
|
2456 struct data_reference *dr = get_chain_root (chain)->ref;
|
|
2457 tree init;
|
|
2458 gimple_seq stmts;
|
|
2459 dref laref;
|
|
2460 edge entry = loop_preheader_edge (loop);
|
|
2461
|
|
2462 /* Find the initializers for the variables, and check that they cannot
|
|
2463 trap. */
|
|
2464 chain->inits = VEC_alloc (tree, heap, n);
|
|
2465 for (i = 0; i < n; i++)
|
|
2466 VEC_quick_push (tree, chain->inits, NULL_TREE);
|
|
2467
|
|
2468 /* If we have replaced some looparound phi nodes, use their initializers
|
|
2469 instead of creating our own. */
|
|
2470 for (i = 0; VEC_iterate (dref, chain->refs, i, laref); i++)
|
|
2471 {
|
|
2472 if (gimple_code (laref->stmt) != GIMPLE_PHI)
|
|
2473 continue;
|
|
2474
|
|
2475 gcc_assert (laref->distance > 0);
|
|
2476 VEC_replace (tree, chain->inits, n - laref->distance,
|
|
2477 PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry));
|
|
2478 }
|
|
2479
|
|
2480 for (i = 0; i < n; i++)
|
|
2481 {
|
|
2482 if (VEC_index (tree, chain->inits, i) != NULL_TREE)
|
|
2483 continue;
|
|
2484
|
|
2485 init = ref_at_iteration (loop, DR_REF (dr), (int) i - n);
|
|
2486 if (!init)
|
|
2487 return false;
|
|
2488
|
|
2489 if (!chain->all_always_accessed && tree_could_trap_p (init))
|
|
2490 return false;
|
|
2491
|
|
2492 init = force_gimple_operand (init, &stmts, false, NULL_TREE);
|
|
2493 if (stmts)
|
|
2494 {
|
|
2495 mark_virtual_ops_for_renaming_list (stmts);
|
|
2496 gsi_insert_seq_on_edge_immediate (entry, stmts);
|
|
2497 }
|
|
2498 set_alias_info (init, dr);
|
|
2499
|
|
2500 VEC_replace (tree, chain->inits, i, init);
|
|
2501 }
|
|
2502
|
|
2503 return true;
|
|
2504 }
|
|
2505
|
|
2506 /* Prepare initializers for CHAINS in LOOP, and free chains that cannot
|
|
2507 be used because the initializers might trap. */
|
|
2508
|
|
2509 static void
|
|
2510 prepare_initializers (struct loop *loop, VEC (chain_p, heap) *chains)
|
|
2511 {
|
|
2512 chain_p chain;
|
|
2513 unsigned i;
|
|
2514
|
|
2515 for (i = 0; i < VEC_length (chain_p, chains); )
|
|
2516 {
|
|
2517 chain = VEC_index (chain_p, chains, i);
|
|
2518 if (prepare_initializers_chain (loop, chain))
|
|
2519 i++;
|
|
2520 else
|
|
2521 {
|
|
2522 release_chain (chain);
|
|
2523 VEC_unordered_remove (chain_p, chains, i);
|
|
2524 }
|
|
2525 }
|
|
2526 }
|
|
2527
|
|
2528 /* Performs predictive commoning for LOOP. Returns true if LOOP was
|
|
2529 unrolled. */
|
|
2530
|
|
2531 static bool
|
|
2532 tree_predictive_commoning_loop (struct loop *loop)
|
|
2533 {
|
|
2534 VEC (data_reference_p, heap) *datarefs;
|
|
2535 VEC (ddr_p, heap) *dependences;
|
|
2536 struct component *components;
|
|
2537 VEC (chain_p, heap) *chains = NULL;
|
|
2538 unsigned unroll_factor;
|
|
2539 struct tree_niter_desc desc;
|
|
2540 bool unroll = false;
|
|
2541 edge exit;
|
|
2542 bitmap tmp_vars;
|
|
2543
|
|
2544 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
2545 fprintf (dump_file, "Processing loop %d\n", loop->num);
|
|
2546
|
|
2547 /* Find the data references and split them into components according to their
|
|
2548 dependence relations. */
|
|
2549 datarefs = VEC_alloc (data_reference_p, heap, 10);
|
|
2550 dependences = VEC_alloc (ddr_p, heap, 10);
|
|
2551 compute_data_dependences_for_loop (loop, true, &datarefs, &dependences);
|
|
2552 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
2553 dump_data_dependence_relations (dump_file, dependences);
|
|
2554
|
|
2555 components = split_data_refs_to_components (loop, datarefs, dependences);
|
|
2556 free_dependence_relations (dependences);
|
|
2557 if (!components)
|
|
2558 {
|
|
2559 free_data_refs (datarefs);
|
|
2560 return false;
|
|
2561 }
|
|
2562
|
|
2563 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
2564 {
|
|
2565 fprintf (dump_file, "Initial state:\n\n");
|
|
2566 dump_components (dump_file, components);
|
|
2567 }
|
|
2568
|
|
2569 /* Find the suitable components and split them into chains. */
|
|
2570 components = filter_suitable_components (loop, components);
|
|
2571
|
|
2572 tmp_vars = BITMAP_ALLOC (NULL);
|
|
2573 looparound_phis = BITMAP_ALLOC (NULL);
|
|
2574 determine_roots (loop, components, &chains);
|
|
2575 release_components (components);
|
|
2576
|
|
2577 if (!chains)
|
|
2578 {
|
|
2579 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
2580 fprintf (dump_file,
|
|
2581 "Predictive commoning failed: no suitable chains\n");
|
|
2582 goto end;
|
|
2583 }
|
|
2584 prepare_initializers (loop, chains);
|
|
2585
|
|
2586 /* Try to combine the chains that are always worked with together. */
|
|
2587 try_combine_chains (&chains);
|
|
2588
|
|
2589 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
2590 {
|
|
2591 fprintf (dump_file, "Before commoning:\n\n");
|
|
2592 dump_chains (dump_file, chains);
|
|
2593 }
|
|
2594
|
|
2595 /* Determine the unroll factor, and if the loop should be unrolled, ensure
|
|
2596 that its number of iterations is divisible by the factor. */
|
|
2597 unroll_factor = determine_unroll_factor (chains);
|
|
2598 scev_reset ();
|
|
2599 unroll = should_unroll_loop_p (loop, unroll_factor, &desc);
|
|
2600 exit = single_dom_exit (loop);
|
|
2601
|
|
2602 /* Execute the predictive commoning transformations, and possibly unroll the
|
|
2603 loop. */
|
|
2604 if (unroll)
|
|
2605 {
|
|
2606 struct epcc_data dta;
|
|
2607
|
|
2608 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
2609 fprintf (dump_file, "Unrolling %u times.\n", unroll_factor);
|
|
2610
|
|
2611 dta.chains = chains;
|
|
2612 dta.tmp_vars = tmp_vars;
|
|
2613
|
|
2614 update_ssa (TODO_update_ssa_only_virtuals);
|
|
2615
|
|
2616 /* Cfg manipulations performed in tree_transform_and_unroll_loop before
|
|
2617 execute_pred_commoning_cbck is called may cause phi nodes to be
|
|
2618 reallocated, which is a problem since CHAINS may point to these
|
|
2619 statements. To fix this, we store the ssa names defined by the
|
|
2620 phi nodes here instead of the phi nodes themselves, and restore
|
|
2621 the phi nodes in execute_pred_commoning_cbck. A bit hacky. */
|
|
2622 replace_phis_by_defined_names (chains);
|
|
2623
|
|
2624 tree_transform_and_unroll_loop (loop, unroll_factor, exit, &desc,
|
|
2625 execute_pred_commoning_cbck, &dta);
|
|
2626 eliminate_temp_copies (loop, tmp_vars);
|
|
2627 }
|
|
2628 else
|
|
2629 {
|
|
2630 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
2631 fprintf (dump_file,
|
|
2632 "Executing predictive commoning without unrolling.\n");
|
|
2633 execute_pred_commoning (loop, chains, tmp_vars);
|
|
2634 }
|
|
2635
|
|
2636 end: ;
|
|
2637 release_chains (chains);
|
|
2638 free_data_refs (datarefs);
|
|
2639 BITMAP_FREE (tmp_vars);
|
|
2640 BITMAP_FREE (looparound_phis);
|
|
2641
|
|
2642 free_affine_expand_cache (&name_expansions);
|
|
2643
|
|
2644 return unroll;
|
|
2645 }
|
|
2646
|
|
2647 /* Runs predictive commoning. */
|
|
2648
|
|
2649 unsigned
|
|
2650 tree_predictive_commoning (void)
|
|
2651 {
|
|
2652 bool unrolled = false;
|
|
2653 struct loop *loop;
|
|
2654 loop_iterator li;
|
|
2655 unsigned ret = 0;
|
|
2656
|
|
2657 initialize_original_copy_tables ();
|
|
2658 FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST)
|
|
2659 if (optimize_loop_for_speed_p (loop))
|
|
2660 {
|
|
2661 unrolled |= tree_predictive_commoning_loop (loop);
|
|
2662 }
|
|
2663
|
|
2664 if (unrolled)
|
|
2665 {
|
|
2666 scev_reset ();
|
|
2667 ret = TODO_cleanup_cfg;
|
|
2668 }
|
|
2669 free_original_copy_tables ();
|
|
2670
|
|
2671 return ret;
|
|
2672 }
|