comparison gcc/tree-predcom.c @ 16:04ced10e8804

gcc 7
author kono
date Fri, 27 Oct 2017 22:46:09 +0900
parents f6334be47118
children 84e7813d76e9
comparison
equal deleted inserted replaced
15:561a7518be6b 16:04ced10e8804
1 /* Predictive commoning. 1 /* Predictive commoning.
2 Copyright (C) 2005, 2007, 2008, 2009, 2010, 2011 2 Copyright (C) 2005-2017 Free Software Foundation, Inc.
3 Free Software Foundation, Inc.
4 3
5 This file is part of GCC. 4 This file is part of GCC.
6 5
7 GCC is free software; you can redistribute it and/or modify it 6 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the 7 under the terms of the GNU General Public License as published by the
98 (e[i] + f[i]) + (e[i+1] + f[i+1]) 97 (e[i] + f[i]) + (e[i+1] + f[i+1])
99 98
100 and we can combine the chains for e and f into one chain. 99 and we can combine the chains for e and f into one chain.
101 100
102 5) For each root reference (end of the chain) R, let N be maximum distance 101 5) For each root reference (end of the chain) R, let N be maximum distance
103 of a reference reusing its value. Variables R0 upto RN are created, 102 of a reference reusing its value. Variables R0 up to RN are created,
104 together with phi nodes that transfer values from R1 .. RN to 103 together with phi nodes that transfer values from R1 .. RN to
105 R0 .. R(N-1). 104 R0 .. R(N-1).
106 Initial values are loaded to R0..R(N-1) (in case not all references 105 Initial values are loaded to R0..R(N-1) (in case not all references
107 must necessarily be accessed and they may trap, we may fail here; 106 must necessarily be accessed and they may trap, we may fail here;
108 TODO sometimes, the loads could be guarded by a check for the number 107 TODO sometimes, the loads could be guarded by a check for the number
155 a[i+3] = s; 154 a[i+3] = s;
156 x = x + i; 155 x = x + i;
157 b[10] = x; 156 b[10] = x;
158 } 157 }
159 158
160 TODO -- stores killing other stores can be taken into account, e.g., 159 Apart from predictive commoning on Load-Load and Store-Load chains, we
161 for (i = 0; i < n; i++) 160 also support Store-Store chains -- stores killed by other store can be
162 { 161 eliminated. Given below example:
163 a[i] = 1; 162
164 a[i+2] = 2; 163 for (i = 0; i < n; i++)
165 } 164 {
166 165 a[i] = 1;
167 can be replaced with 166 a[i+2] = 2;
168 167 }
169 t0 = a[0]; 168
170 t1 = a[1]; 169 It can be replaced with:
171 for (i = 0; i < n; i++) 170
172 { 171 t0 = a[0];
173 a[i] = 1; 172 t1 = a[1];
174 t2 = 2; 173 for (i = 0; i < n; i++)
175 t0 = t1; 174 {
176 t1 = t2; 175 a[i] = 1;
177 } 176 t2 = 2;
178 a[n] = t0; 177 t0 = t1;
179 a[n+1] = t1; 178 t1 = t2;
180 179 }
181 The interesting part is that this would generalize store motion; still, since 180 a[n] = t0;
182 sm is performed elsewhere, it does not seem that important. 181 a[n+1] = t1;
182
183 If the loop runs more than 1 iterations, it can be further simplified into:
184
185 for (i = 0; i < n; i++)
186 {
187 a[i] = 1;
188 }
189 a[n] = 2;
190 a[n+1] = 2;
191
192 The interesting part is this can be viewed either as general store motion
193 or general dead store elimination in either intra/inter-iterations way.
194
195 TODO: For now, we don't support store-store chains in multi-exit loops. We
196 force to not unroll in case of store-store chain even if other chains might
197 ask for unroll.
183 198
184 Predictive commoning can be generalized for arbitrary computations (not 199 Predictive commoning can be generalized for arbitrary computations (not
185 just memory loads), and also nontrivial transfer functions (e.g., replacing 200 just memory loads), and also nontrivial transfer functions (e.g., replacing
186 i * i with ii_last + 2 * i + 1), to generalize strength reduction. */ 201 i * i with ii_last + 2 * i + 1), to generalize strength reduction. */
187 202
188 #include "config.h" 203 #include "config.h"
189 #include "system.h" 204 #include "system.h"
190 #include "coretypes.h" 205 #include "coretypes.h"
191 #include "tm.h" 206 #include "backend.h"
207 #include "rtl.h"
192 #include "tree.h" 208 #include "tree.h"
193 #include "tm_p.h" 209 #include "gimple.h"
210 #include "predict.h"
211 #include "tree-pass.h"
212 #include "ssa.h"
213 #include "gimple-pretty-print.h"
214 #include "alias.h"
215 #include "fold-const.h"
194 #include "cfgloop.h" 216 #include "cfgloop.h"
195 #include "tree-flow.h" 217 #include "tree-eh.h"
196 #include "ggc.h" 218 #include "gimplify.h"
219 #include "gimple-iterator.h"
220 #include "gimplify-me.h"
221 #include "tree-ssa-loop-ivopts.h"
222 #include "tree-ssa-loop-manip.h"
223 #include "tree-ssa-loop-niter.h"
224 #include "tree-ssa-loop.h"
225 #include "tree-into-ssa.h"
226 #include "tree-dfa.h"
227 #include "tree-ssa.h"
197 #include "tree-data-ref.h" 228 #include "tree-data-ref.h"
198 #include "tree-scalar-evolution.h" 229 #include "tree-scalar-evolution.h"
199 #include "tree-chrec.h"
200 #include "params.h" 230 #include "params.h"
201 #include "tree-pretty-print.h"
202 #include "gimple-pretty-print.h"
203 #include "tree-pass.h"
204 #include "tree-affine.h" 231 #include "tree-affine.h"
205 #include "tree-inline.h" 232 #include "builtins.h"
206 233
207 /* The maximum number of iterations between the considered memory 234 /* The maximum number of iterations between the considered memory
208 references. */ 235 references. */
209 236
210 #define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8) 237 #define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
216 { 243 {
217 /* The reference itself. */ 244 /* The reference itself. */
218 struct data_reference *ref; 245 struct data_reference *ref;
219 246
220 /* The statement in that the reference appears. */ 247 /* The statement in that the reference appears. */
221 gimple stmt; 248 gimple *stmt;
222 249
223 /* In case that STMT is a phi node, this field is set to the SSA name 250 /* In case that STMT is a phi node, this field is set to the SSA name
224 defined by it in replace_phis_by_defined_names (in order to avoid 251 defined by it in replace_phis_by_defined_names (in order to avoid
225 pointing to phi node that got reallocated in the meantime). */ 252 pointing to phi node that got reallocated in the meantime). */
226 tree name_defined_by_phi; 253 tree name_defined_by_phi;
228 /* Distance of the reference from the root of the chain (in number of 255 /* Distance of the reference from the root of the chain (in number of
229 iterations of the loop). */ 256 iterations of the loop). */
230 unsigned distance; 257 unsigned distance;
231 258
232 /* Number of iterations offset from the first reference in the component. */ 259 /* Number of iterations offset from the first reference in the component. */
233 double_int offset; 260 widest_int offset;
234 261
235 /* Number of the reference in a component, in dominance ordering. */ 262 /* Number of the reference in a component, in dominance ordering. */
236 unsigned pos; 263 unsigned pos;
237 264
238 /* True if the memory reference is always accessed when the loop is 265 /* True if the memory reference is always accessed when the loop is
239 entered. */ 266 entered. */
240 unsigned always_accessed : 1; 267 unsigned always_accessed : 1;
241 } *dref; 268 } *dref;
242 269
243 DEF_VEC_P (dref);
244 DEF_VEC_ALLOC_P (dref, heap);
245 270
246 /* Type of the chain of the references. */ 271 /* Type of the chain of the references. */
247 272
248 enum chain_type 273 enum chain_type
249 { 274 {
253 /* There are only loads in the chain. */ 278 /* There are only loads in the chain. */
254 CT_LOAD, 279 CT_LOAD,
255 280
256 /* Root of the chain is store, the rest are loads. */ 281 /* Root of the chain is store, the rest are loads. */
257 CT_STORE_LOAD, 282 CT_STORE_LOAD,
283
284 /* There are only stores in the chain. */
285 CT_STORE_STORE,
258 286
259 /* A combination of two chains. */ 287 /* A combination of two chains. */
260 CT_COMBINATION 288 CT_COMBINATION
261 }; 289 };
262 290
272 enum tree_code op; 300 enum tree_code op;
273 tree rslt_type; 301 tree rslt_type;
274 struct chain *ch1, *ch2; 302 struct chain *ch1, *ch2;
275 303
276 /* The references in the chain. */ 304 /* The references in the chain. */
277 VEC(dref,heap) *refs; 305 vec<dref> refs;
278 306
279 /* The maximum distance of the reference in the chain from the root. */ 307 /* The maximum distance of the reference in the chain from the root. */
280 unsigned length; 308 unsigned length;
281 309
282 /* The variables used to copy the value throughout iterations. */ 310 /* The variables used to copy the value throughout iterations. */
283 VEC(tree,heap) *vars; 311 vec<tree> vars;
284 312
285 /* Initializers for the variables. */ 313 /* Initializers for the variables. */
286 VEC(tree,heap) *inits; 314 vec<tree> inits;
315
316 /* Finalizers for the eliminated stores. */
317 vec<tree> finis;
318
319 /* gimple stmts intializing the initial variables of the chain. */
320 gimple_seq init_seq;
321
322 /* gimple stmts finalizing the eliminated stores of the chain. */
323 gimple_seq fini_seq;
287 324
288 /* True if there is a use of a variable with the maximal distance 325 /* True if there is a use of a variable with the maximal distance
289 that comes after the root in the loop. */ 326 that comes after the root in the loop. */
290 unsigned has_max_use_after : 1; 327 unsigned has_max_use_after : 1;
291 328
292 /* True if all the memory references in the chain are always accessed. */ 329 /* True if all the memory references in the chain are always accessed. */
293 unsigned all_always_accessed : 1; 330 unsigned all_always_accessed : 1;
294 331
295 /* True if this chain was combined together with some other chain. */ 332 /* True if this chain was combined together with some other chain. */
296 unsigned combined : 1; 333 unsigned combined : 1;
334
335 /* True if this is store elimination chain and eliminated stores store
336 loop invariant value into memory. */
337 unsigned inv_store_elimination : 1;
297 } *chain_p; 338 } *chain_p;
298 339
299 DEF_VEC_P (chain_p);
300 DEF_VEC_ALLOC_P (chain_p, heap);
301 340
302 /* Describes the knowledge about the step of the memory references in 341 /* Describes the knowledge about the step of the memory references in
303 the component. */ 342 the component. */
304 343
305 enum ref_step_type 344 enum ref_step_type
317 /* Components of the data dependence graph. */ 356 /* Components of the data dependence graph. */
318 357
319 struct component 358 struct component
320 { 359 {
321 /* The references in the component. */ 360 /* The references in the component. */
322 VEC(dref,heap) *refs; 361 vec<dref> refs;
323 362
324 /* What we know about the step of the references in the component. */ 363 /* What we know about the step of the references in the component. */
325 enum ref_step_type comp_step; 364 enum ref_step_type comp_step;
365
366 /* True if all references in component are stores and we try to do
367 intra/inter loop iteration dead store elimination. */
368 bool eliminate_store_p;
326 369
327 /* Next component in the list. */ 370 /* Next component in the list. */
328 struct component *next; 371 struct component *next;
329 }; 372 };
330 373
332 375
333 static bitmap looparound_phis; 376 static bitmap looparound_phis;
334 377
335 /* Cache used by tree_to_aff_combination_expand. */ 378 /* Cache used by tree_to_aff_combination_expand. */
336 379
337 static struct pointer_map_t *name_expansions; 380 static hash_map<tree, name_expansion *> *name_expansions;
338 381
339 /* Dumps data reference REF to FILE. */ 382 /* Dumps data reference REF to FILE. */
340 383
341 extern void dump_dref (FILE *, dref); 384 extern void dump_dref (FILE *, dref);
342 void 385 void
348 print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM); 391 print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM);
349 fprintf (file, " (id %u%s)\n", ref->pos, 392 fprintf (file, " (id %u%s)\n", ref->pos,
350 DR_IS_READ (ref->ref) ? "" : ", write"); 393 DR_IS_READ (ref->ref) ? "" : ", write");
351 394
352 fprintf (file, " offset "); 395 fprintf (file, " offset ");
353 dump_double_int (file, ref->offset, false); 396 print_decs (ref->offset, file);
354 fprintf (file, "\n"); 397 fprintf (file, "\n");
355 398
356 fprintf (file, " distance %u\n", ref->distance); 399 fprintf (file, " distance %u\n", ref->distance);
357 } 400 }
358 else 401 else
392 435
393 case CT_STORE_LOAD: 436 case CT_STORE_LOAD:
394 chain_type = "Store-loads"; 437 chain_type = "Store-loads";
395 break; 438 break;
396 439
440 case CT_STORE_STORE:
441 chain_type = "Store-stores";
442 break;
443
397 case CT_COMBINATION: 444 case CT_COMBINATION:
398 chain_type = "Combination"; 445 chain_type = "Combination";
399 break; 446 break;
400 447
401 default: 448 default:
415 (void *) chain->ch2); 462 (void *) chain->ch2);
416 print_generic_expr (file, chain->rslt_type, TDF_SLIM); 463 print_generic_expr (file, chain->rslt_type, TDF_SLIM);
417 fprintf (file, "\n"); 464 fprintf (file, "\n");
418 } 465 }
419 466
420 if (chain->vars) 467 if (chain->vars.exists ())
421 { 468 {
422 fprintf (file, " vars"); 469 fprintf (file, " vars");
423 FOR_EACH_VEC_ELT (tree, chain->vars, i, var) 470 FOR_EACH_VEC_ELT (chain->vars, i, var)
424 { 471 {
425 fprintf (file, " "); 472 fprintf (file, " ");
426 print_generic_expr (file, var, TDF_SLIM); 473 print_generic_expr (file, var, TDF_SLIM);
427 } 474 }
428 fprintf (file, "\n"); 475 fprintf (file, "\n");
429 } 476 }
430 477
431 if (chain->inits) 478 if (chain->inits.exists ())
432 { 479 {
433 fprintf (file, " inits"); 480 fprintf (file, " inits");
434 FOR_EACH_VEC_ELT (tree, chain->inits, i, var) 481 FOR_EACH_VEC_ELT (chain->inits, i, var)
435 { 482 {
436 fprintf (file, " "); 483 fprintf (file, " ");
437 print_generic_expr (file, var, TDF_SLIM); 484 print_generic_expr (file, var, TDF_SLIM);
438 } 485 }
439 fprintf (file, "\n"); 486 fprintf (file, "\n");
440 } 487 }
441 488
442 fprintf (file, " references:\n"); 489 fprintf (file, " references:\n");
443 FOR_EACH_VEC_ELT (dref, chain->refs, i, a) 490 FOR_EACH_VEC_ELT (chain->refs, i, a)
444 dump_dref (file, a); 491 dump_dref (file, a);
445 492
446 fprintf (file, "\n"); 493 fprintf (file, "\n");
447 } 494 }
448 495
449 /* Dumps CHAINS to FILE. */ 496 /* Dumps CHAINS to FILE. */
450 497
451 extern void dump_chains (FILE *, VEC (chain_p, heap) *); 498 extern void dump_chains (FILE *, vec<chain_p> );
452 void 499 void
453 dump_chains (FILE *file, VEC (chain_p, heap) *chains) 500 dump_chains (FILE *file, vec<chain_p> chains)
454 { 501 {
455 chain_p chain; 502 chain_p chain;
456 unsigned i; 503 unsigned i;
457 504
458 FOR_EACH_VEC_ELT (chain_p, chains, i, chain) 505 FOR_EACH_VEC_ELT (chains, i, chain)
459 dump_chain (file, chain); 506 dump_chain (file, chain);
460 } 507 }
461 508
462 /* Dumps COMP to FILE. */ 509 /* Dumps COMP to FILE. */
463 510
468 dref a; 515 dref a;
469 unsigned i; 516 unsigned i;
470 517
471 fprintf (file, "Component%s:\n", 518 fprintf (file, "Component%s:\n",
472 comp->comp_step == RS_INVARIANT ? " (invariant)" : ""); 519 comp->comp_step == RS_INVARIANT ? " (invariant)" : "");
473 FOR_EACH_VEC_ELT (dref, comp->refs, i, a) 520 FOR_EACH_VEC_ELT (comp->refs, i, a)
474 dump_dref (file, a); 521 dump_dref (file, a);
475 fprintf (file, "\n"); 522 fprintf (file, "\n");
476 } 523 }
477 524
478 /* Dumps COMPS to FILE. */ 525 /* Dumps COMPS to FILE. */
496 unsigned i; 543 unsigned i;
497 544
498 if (chain == NULL) 545 if (chain == NULL)
499 return; 546 return;
500 547
501 FOR_EACH_VEC_ELT (dref, chain->refs, i, ref) 548 FOR_EACH_VEC_ELT (chain->refs, i, ref)
502 free (ref); 549 free (ref);
503 550
504 VEC_free (dref, heap, chain->refs); 551 chain->refs.release ();
505 VEC_free (tree, heap, chain->vars); 552 chain->vars.release ();
506 VEC_free (tree, heap, chain->inits); 553 chain->inits.release ();
554 if (chain->init_seq)
555 gimple_seq_discard (chain->init_seq);
556
557 chain->finis.release ();
558 if (chain->fini_seq)
559 gimple_seq_discard (chain->fini_seq);
507 560
508 free (chain); 561 free (chain);
509 } 562 }
510 563
511 /* Frees CHAINS. */ 564 /* Frees CHAINS. */
512 565
513 static void 566 static void
514 release_chains (VEC (chain_p, heap) *chains) 567 release_chains (vec<chain_p> chains)
515 { 568 {
516 unsigned i; 569 unsigned i;
517 chain_p chain; 570 chain_p chain;
518 571
519 FOR_EACH_VEC_ELT (chain_p, chains, i, chain) 572 FOR_EACH_VEC_ELT (chains, i, chain)
520 release_chain (chain); 573 release_chain (chain);
521 VEC_free (chain_p, heap, chains); 574 chains.release ();
522 } 575 }
523 576
524 /* Frees a component COMP. */ 577 /* Frees a component COMP. */
525 578
526 static void 579 static void
527 release_component (struct component *comp) 580 release_component (struct component *comp)
528 { 581 {
529 VEC_free (dref, heap, comp->refs); 582 comp->refs.release ();
530 free (comp); 583 free (comp);
531 } 584 }
532 585
533 /* Frees list of components COMPS. */ 586 /* Frees list of components COMPS. */
534 587
596 suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step) 649 suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step)
597 { 650 {
598 tree ref = DR_REF (a), step = DR_STEP (a); 651 tree ref = DR_REF (a), step = DR_STEP (a);
599 652
600 if (!step 653 if (!step
654 || TREE_THIS_VOLATILE (ref)
601 || !is_gimple_reg_type (TREE_TYPE (ref)) 655 || !is_gimple_reg_type (TREE_TYPE (ref))
602 || tree_could_throw_p (ref)) 656 || tree_could_throw_p (ref))
603 return false; 657 return false;
604 658
605 if (integer_zerop (step)) 659 if (integer_zerop (step))
615 /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET. */ 669 /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET. */
616 670
617 static void 671 static void
618 aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset) 672 aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset)
619 { 673 {
674 tree type = TREE_TYPE (DR_OFFSET (dr));
620 aff_tree delta; 675 aff_tree delta;
621 676
622 tree_to_aff_combination_expand (DR_OFFSET (dr), sizetype, offset, 677 tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset,
623 &name_expansions); 678 &name_expansions);
624 aff_combination_const (&delta, sizetype, tree_to_double_int (DR_INIT (dr))); 679 aff_combination_const (&delta, type, wi::to_widest (DR_INIT (dr)));
625 aff_combination_add (offset, &delta); 680 aff_combination_add (offset, &delta);
626 } 681 }
627 682
628 /* Determines number of iterations of the innermost enclosing loop before B 683 /* Determines number of iterations of the innermost enclosing loop before B
629 refers to exactly the same location as A and stores it to OFF. If A and 684 refers to exactly the same location as A and stores it to OFF. If A and
631 returns false, otherwise returns true. Both A and B are assumed to 686 returns false, otherwise returns true. Both A and B are assumed to
632 satisfy suitable_reference_p. */ 687 satisfy suitable_reference_p. */
633 688
634 static bool 689 static bool
635 determine_offset (struct data_reference *a, struct data_reference *b, 690 determine_offset (struct data_reference *a, struct data_reference *b,
636 double_int *off) 691 widest_int *off)
637 { 692 {
638 aff_tree diff, baseb, step; 693 aff_tree diff, baseb, step;
639 tree typea, typeb; 694 tree typea, typeb;
640 695
641 /* Check that both the references access the location in the same type. */ 696 /* Check that both the references access the location in the same type. */
652 707
653 if (integer_zerop (DR_STEP (a))) 708 if (integer_zerop (DR_STEP (a)))
654 { 709 {
655 /* If the references have loop invariant address, check that they access 710 /* If the references have loop invariant address, check that they access
656 exactly the same location. */ 711 exactly the same location. */
657 *off = double_int_zero; 712 *off = 0;
658 return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0) 713 return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0)
659 && operand_equal_p (DR_INIT (a), DR_INIT (b), 0)); 714 && operand_equal_p (DR_INIT (a), DR_INIT (b), 0));
660 } 715 }
661 716
662 /* Compare the offsets of the addresses, and check whether the difference 717 /* Compare the offsets of the addresses, and check whether the difference
663 is a multiple of step. */ 718 is a multiple of step. */
664 aff_combination_dr_offset (a, &diff); 719 aff_combination_dr_offset (a, &diff);
665 aff_combination_dr_offset (b, &baseb); 720 aff_combination_dr_offset (b, &baseb);
666 aff_combination_scale (&baseb, double_int_minus_one); 721 aff_combination_scale (&baseb, -1);
667 aff_combination_add (&diff, &baseb); 722 aff_combination_add (&diff, &baseb);
668 723
669 tree_to_aff_combination_expand (DR_STEP (a), sizetype, 724 tree_to_aff_combination_expand (DR_STEP (a), TREE_TYPE (DR_STEP (a)),
670 &step, &name_expansions); 725 &step, &name_expansions);
671 return aff_combination_constant_multiple_p (&diff, &step, off); 726 return aff_combination_constant_multiple_p (&diff, &step, off);
672 } 727 }
673 728
674 /* Returns the last basic block in LOOP for that we are sure that 729 /* Returns the last basic block in LOOP for that we are sure that
676 731
677 static basic_block 732 static basic_block
678 last_always_executed_block (struct loop *loop) 733 last_always_executed_block (struct loop *loop)
679 { 734 {
680 unsigned i; 735 unsigned i;
681 VEC (edge, heap) *exits = get_loop_exit_edges (loop); 736 vec<edge> exits = get_loop_exit_edges (loop);
682 edge ex; 737 edge ex;
683 basic_block last = loop->latch; 738 basic_block last = loop->latch;
684 739
685 FOR_EACH_VEC_ELT (edge, exits, i, ex) 740 FOR_EACH_VEC_ELT (exits, i, ex)
686 last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src); 741 last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src);
687 VEC_free (edge, heap, exits); 742 exits.release ();
688 743
689 return last; 744 return last;
690 } 745 }
691 746
692 /* Splits dependence graph on DATAREFS described by DEPENDS to components. */ 747 /* Splits dependence graph on DATAREFS described by DEPENDS to components. */
693 748
694 static struct component * 749 static struct component *
695 split_data_refs_to_components (struct loop *loop, 750 split_data_refs_to_components (struct loop *loop,
696 VEC (data_reference_p, heap) *datarefs, 751 vec<data_reference_p> datarefs,
697 VEC (ddr_p, heap) *depends) 752 vec<ddr_p> depends)
698 { 753 {
699 unsigned i, n = VEC_length (data_reference_p, datarefs); 754 unsigned i, n = datarefs.length ();
700 unsigned ca, ia, ib, bad; 755 unsigned ca, ia, ib, bad;
701 unsigned *comp_father = XNEWVEC (unsigned, n + 1); 756 unsigned *comp_father = XNEWVEC (unsigned, n + 1);
702 unsigned *comp_size = XNEWVEC (unsigned, n + 1); 757 unsigned *comp_size = XNEWVEC (unsigned, n + 1);
703 struct component **comps; 758 struct component **comps;
704 struct data_reference *dr, *dra, *drb; 759 struct data_reference *dr, *dra, *drb;
705 struct data_dependence_relation *ddr; 760 struct data_dependence_relation *ddr;
706 struct component *comp_list = NULL, *comp; 761 struct component *comp_list = NULL, *comp;
707 dref dataref; 762 dref dataref;
763 /* Don't do store elimination if loop has multiple exit edges. */
764 bool eliminate_store_p = single_exit (loop) != NULL;
708 basic_block last_always_executed = last_always_executed_block (loop); 765 basic_block last_always_executed = last_always_executed_block (loop);
709 766
710 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr) 767 FOR_EACH_VEC_ELT (datarefs, i, dr)
711 { 768 {
712 if (!DR_REF (dr)) 769 if (!DR_REF (dr))
713 { 770 {
714 /* A fake reference for call or asm_expr that may clobber memory; 771 /* A fake reference for call or asm_expr that may clobber memory;
715 just fail. */ 772 just fail. */
716 goto end; 773 goto end;
717 } 774 }
775 /* predcom pass isn't prepared to handle calls with data references. */
776 if (is_gimple_call (DR_STMT (dr)))
777 goto end;
718 dr->aux = (void *) (size_t) i; 778 dr->aux = (void *) (size_t) i;
719 comp_father[i] = i; 779 comp_father[i] = i;
720 comp_size[i] = 1; 780 comp_size[i] = 1;
721 } 781 }
722 782
723 /* A component reserved for the "bad" data references. */ 783 /* A component reserved for the "bad" data references. */
724 comp_father[n] = n; 784 comp_father[n] = n;
725 comp_size[n] = 1; 785 comp_size[n] = 1;
726 786
727 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr) 787 FOR_EACH_VEC_ELT (datarefs, i, dr)
728 { 788 {
729 enum ref_step_type dummy; 789 enum ref_step_type dummy;
730 790
731 if (!suitable_reference_p (dr, &dummy)) 791 if (!suitable_reference_p (dr, &dummy))
732 { 792 {
733 ia = (unsigned) (size_t) dr->aux; 793 ia = (unsigned) (size_t) dr->aux;
734 merge_comps (comp_father, comp_size, n, ia); 794 merge_comps (comp_father, comp_size, n, ia);
735 } 795 }
736 } 796 }
737 797
738 FOR_EACH_VEC_ELT (ddr_p, depends, i, ddr) 798 FOR_EACH_VEC_ELT (depends, i, ddr)
739 { 799 {
740 double_int dummy_off; 800 widest_int dummy_off;
741 801
742 if (DDR_ARE_DEPENDENT (ddr) == chrec_known) 802 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
743 continue; 803 continue;
744 804
745 dra = DDR_A (ddr); 805 dra = DDR_A (ddr);
746 drb = DDR_B (ddr); 806 drb = DDR_B (ddr);
807
808 /* Don't do store elimination if there is any unknown dependence for
809 any store data reference. */
810 if ((DR_IS_WRITE (dra) || DR_IS_WRITE (drb))
811 && (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
812 || DDR_NUM_DIST_VECTS (ddr) == 0))
813 eliminate_store_p = false;
814
747 ia = component_of (comp_father, (unsigned) (size_t) dra->aux); 815 ia = component_of (comp_father, (unsigned) (size_t) dra->aux);
748 ib = component_of (comp_father, (unsigned) (size_t) drb->aux); 816 ib = component_of (comp_father, (unsigned) (size_t) drb->aux);
749 if (ia == ib) 817 if (ia == ib)
750 continue; 818 continue;
751 819
752 bad = component_of (comp_father, n); 820 bad = component_of (comp_father, n);
753 821
754 /* If both A and B are reads, we may ignore unsuitable dependences. */ 822 /* If both A and B are reads, we may ignore unsuitable dependences. */
755 if (DR_IS_READ (dra) && DR_IS_READ (drb) 823 if (DR_IS_READ (dra) && DR_IS_READ (drb))
756 && (ia == bad || ib == bad 824 {
757 || !determine_offset (dra, drb, &dummy_off))) 825 if (ia == bad || ib == bad
758 continue; 826 || !determine_offset (dra, drb, &dummy_off))
827 continue;
828 }
829 /* If A is read and B write or vice versa and there is unsuitable
830 dependence, instead of merging both components into a component
831 that will certainly not pass suitable_component_p, just put the
832 read into bad component, perhaps at least the write together with
833 all the other data refs in it's component will be optimizable. */
834 else if (DR_IS_READ (dra) && ib != bad)
835 {
836 if (ia == bad)
837 continue;
838 else if (!determine_offset (dra, drb, &dummy_off))
839 {
840 merge_comps (comp_father, comp_size, bad, ia);
841 continue;
842 }
843 }
844 else if (DR_IS_READ (drb) && ia != bad)
845 {
846 if (ib == bad)
847 continue;
848 else if (!determine_offset (dra, drb, &dummy_off))
849 {
850 merge_comps (comp_father, comp_size, bad, ib);
851 continue;
852 }
853 }
854 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)
855 && ia != bad && ib != bad
856 && !determine_offset (dra, drb, &dummy_off))
857 {
858 merge_comps (comp_father, comp_size, bad, ia);
859 merge_comps (comp_father, comp_size, bad, ib);
860 continue;
861 }
759 862
760 merge_comps (comp_father, comp_size, ia, ib); 863 merge_comps (comp_father, comp_size, ia, ib);
864 }
865
866 if (eliminate_store_p)
867 {
868 tree niters = number_of_latch_executions (loop);
869
870 /* Don't do store elimination if niters info is unknown because stores
871 in the last iteration can't be eliminated and we need to recover it
872 after loop. */
873 eliminate_store_p = (niters != NULL_TREE && niters != chrec_dont_know);
761 } 874 }
762 875
763 comps = XCNEWVEC (struct component *, n); 876 comps = XCNEWVEC (struct component *, n);
764 bad = component_of (comp_father, n); 877 bad = component_of (comp_father, n);
765 FOR_EACH_VEC_ELT (data_reference_p, datarefs, i, dr) 878 FOR_EACH_VEC_ELT (datarefs, i, dr)
766 { 879 {
767 ia = (unsigned) (size_t) dr->aux; 880 ia = (unsigned) (size_t) dr->aux;
768 ca = component_of (comp_father, ia); 881 ca = component_of (comp_father, ia);
769 if (ca == bad) 882 if (ca == bad)
770 continue; 883 continue;
771 884
772 comp = comps[ca]; 885 comp = comps[ca];
773 if (!comp) 886 if (!comp)
774 { 887 {
775 comp = XCNEW (struct component); 888 comp = XCNEW (struct component);
776 comp->refs = VEC_alloc (dref, heap, comp_size[ca]); 889 comp->refs.create (comp_size[ca]);
890 comp->eliminate_store_p = eliminate_store_p;
777 comps[ca] = comp; 891 comps[ca] = comp;
778 } 892 }
779 893
780 dataref = XCNEW (struct dref_d); 894 dataref = XCNEW (struct dref_d);
781 dataref->ref = dr; 895 dataref->ref = dr;
782 dataref->stmt = DR_STMT (dr); 896 dataref->stmt = DR_STMT (dr);
783 dataref->offset = double_int_zero; 897 dataref->offset = 0;
784 dataref->distance = 0; 898 dataref->distance = 0;
785 899
786 dataref->always_accessed 900 dataref->always_accessed
787 = dominated_by_p (CDI_DOMINATORS, last_always_executed, 901 = dominated_by_p (CDI_DOMINATORS, last_always_executed,
788 gimple_bb (dataref->stmt)); 902 gimple_bb (dataref->stmt));
789 dataref->pos = VEC_length (dref, comp->refs); 903 dataref->pos = comp->refs.length ();
790 VEC_quick_push (dref, comp->refs, dataref); 904 comp->refs.quick_push (dataref);
905 if (DR_IS_READ (dr))
906 comp->eliminate_store_p = false;
791 } 907 }
792 908
793 for (i = 0; i < n; i++) 909 for (i = 0; i < n; i++)
794 { 910 {
795 comp = comps[i]; 911 comp = comps[i];
817 unsigned i; 933 unsigned i;
818 dref a, first; 934 dref a, first;
819 basic_block ba, bp = loop->header; 935 basic_block ba, bp = loop->header;
820 bool ok, has_write = false; 936 bool ok, has_write = false;
821 937
822 FOR_EACH_VEC_ELT (dref, comp->refs, i, a) 938 FOR_EACH_VEC_ELT (comp->refs, i, a)
823 { 939 {
824 ba = gimple_bb (a->stmt); 940 ba = gimple_bb (a->stmt);
825 941
826 if (!just_once_each_iteration_p (loop, ba)) 942 if (!just_once_each_iteration_p (loop, ba))
827 return false; 943 return false;
831 947
832 if (DR_IS_WRITE (a->ref)) 948 if (DR_IS_WRITE (a->ref))
833 has_write = true; 949 has_write = true;
834 } 950 }
835 951
836 first = VEC_index (dref, comp->refs, 0); 952 first = comp->refs[0];
837 ok = suitable_reference_p (first->ref, &comp->comp_step); 953 ok = suitable_reference_p (first->ref, &comp->comp_step);
838 gcc_assert (ok); 954 gcc_assert (ok);
839 first->offset = double_int_zero; 955 first->offset = 0;
840 956
841 for (i = 1; VEC_iterate (dref, comp->refs, i, a); i++) 957 for (i = 1; comp->refs.iterate (i, &a); i++)
842 { 958 {
843 if (!determine_offset (first->ref, a->ref, &a->offset)) 959 if (!determine_offset (first->ref, a->ref, &a->offset))
844 return false; 960 return false;
845 961
846 #ifdef ENABLE_CHECKING 962 enum ref_step_type a_step;
847 { 963 gcc_checking_assert (suitable_reference_p (a->ref, &a_step)
848 enum ref_step_type a_step; 964 && a_step == comp->comp_step);
849 ok = suitable_reference_p (a->ref, &a_step);
850 gcc_assert (ok && a_step == comp->comp_step);
851 }
852 #endif
853 } 965 }
854 966
855 /* If there is a write inside the component, we must know whether the 967 /* If there is a write inside the component, we must know whether the
856 step is nonzero or not -- we would not otherwise be able to recognize 968 step is nonzero or not -- we would not otherwise be able to recognize
857 whether the value accessed by reads comes from the OFFSET-th iteration 969 whether the value accessed by reads comes from the OFFSET-th iteration
881 { 993 {
882 dref ref; 994 dref ref;
883 unsigned i; 995 unsigned i;
884 996
885 *comp = act->next; 997 *comp = act->next;
886 FOR_EACH_VEC_ELT (dref, act->refs, i, ref) 998 FOR_EACH_VEC_ELT (act->refs, i, ref)
887 free (ref); 999 free (ref);
888 release_component (act); 1000 release_component (act);
889 } 1001 }
890 } 1002 }
891 1003
898 static int 1010 static int
899 order_drefs (const void *a, const void *b) 1011 order_drefs (const void *a, const void *b)
900 { 1012 {
901 const dref *const da = (const dref *) a; 1013 const dref *const da = (const dref *) a;
902 const dref *const db = (const dref *) b; 1014 const dref *const db = (const dref *) b;
903 int offcmp = double_int_scmp ((*da)->offset, (*db)->offset); 1015 int offcmp = wi::cmps ((*da)->offset, (*db)->offset);
904 1016
905 if (offcmp != 0) 1017 if (offcmp != 0)
906 return offcmp; 1018 return offcmp;
907 1019
908 return (*da)->pos - (*db)->pos; 1020 return (*da)->pos - (*db)->pos;
911 /* Returns root of the CHAIN. */ 1023 /* Returns root of the CHAIN. */
912 1024
913 static inline dref 1025 static inline dref
914 get_chain_root (chain_p chain) 1026 get_chain_root (chain_p chain)
915 { 1027 {
916 return VEC_index (dref, chain->refs, 0); 1028 return chain->refs[0];
1029 }
1030
1031 /* Given CHAIN, returns the last ref at DISTANCE, or NULL if it doesn't
1032 exist. */
1033
1034 static inline dref
1035 get_chain_last_ref_at (chain_p chain, unsigned distance)
1036 {
1037 unsigned i;
1038
1039 for (i = chain->refs.length (); i > 0; i--)
1040 if (distance == chain->refs[i - 1]->distance)
1041 break;
1042
1043 return (i > 0) ? chain->refs[i - 1] : NULL;
917 } 1044 }
918 1045
919 /* Adds REF to the chain CHAIN. */ 1046 /* Adds REF to the chain CHAIN. */
920 1047
921 static void 1048 static void
922 add_ref_to_chain (chain_p chain, dref ref) 1049 add_ref_to_chain (chain_p chain, dref ref)
923 { 1050 {
924 dref root = get_chain_root (chain); 1051 dref root = get_chain_root (chain);
925 double_int dist; 1052
926 1053 gcc_assert (wi::les_p (root->offset, ref->offset));
927 gcc_assert (double_int_scmp (root->offset, ref->offset) <= 0); 1054 widest_int dist = ref->offset - root->offset;
928 dist = double_int_sub (ref->offset, root->offset); 1055 if (wi::leu_p (MAX_DISTANCE, dist))
929 if (double_int_ucmp (uhwi_to_double_int (MAX_DISTANCE), dist) <= 0)
930 { 1056 {
931 free (ref); 1057 free (ref);
932 return; 1058 return;
933 } 1059 }
934 gcc_assert (double_int_fits_in_uhwi_p (dist)); 1060 gcc_assert (wi::fits_uhwi_p (dist));
935 1061
936 VEC_safe_push (dref, heap, chain->refs, ref); 1062 chain->refs.safe_push (ref);
937 1063
938 ref->distance = double_int_to_uhwi (dist); 1064 ref->distance = dist.to_uhwi ();
939 1065
940 if (ref->distance >= chain->length) 1066 if (ref->distance >= chain->length)
941 { 1067 {
942 chain->length = ref->distance; 1068 chain->length = ref->distance;
943 chain->has_max_use_after = false; 1069 chain->has_max_use_after = false;
944 } 1070 }
945 1071
946 if (ref->distance == chain->length 1072 /* Don't set the flag for store-store chain since there is no use. */
1073 if (chain->type != CT_STORE_STORE
1074 && ref->distance == chain->length
947 && ref->pos > root->pos) 1075 && ref->pos > root->pos)
948 chain->has_max_use_after = true; 1076 chain->has_max_use_after = true;
949 1077
950 chain->all_always_accessed &= ref->always_accessed; 1078 chain->all_always_accessed &= ref->always_accessed;
951 } 1079 }
961 1089
962 chain->type = CT_INVARIANT; 1090 chain->type = CT_INVARIANT;
963 1091
964 chain->all_always_accessed = true; 1092 chain->all_always_accessed = true;
965 1093
966 FOR_EACH_VEC_ELT (dref, comp->refs, i, ref) 1094 FOR_EACH_VEC_ELT (comp->refs, i, ref)
967 { 1095 {
968 VEC_safe_push (dref, heap, chain->refs, ref); 1096 chain->refs.safe_push (ref);
969 chain->all_always_accessed &= ref->always_accessed; 1097 chain->all_always_accessed &= ref->always_accessed;
970 } 1098 }
971 1099
1100 chain->inits = vNULL;
1101 chain->finis = vNULL;
1102
972 return chain; 1103 return chain;
973 } 1104 }
974 1105
975 /* Make a new chain rooted at REF. */ 1106 /* Make a new chain of type TYPE rooted at REF. */
976 1107
977 static chain_p 1108 static chain_p
978 make_rooted_chain (dref ref) 1109 make_rooted_chain (dref ref, enum chain_type type)
979 { 1110 {
980 chain_p chain = XCNEW (struct chain); 1111 chain_p chain = XCNEW (struct chain);
981 1112
982 chain->type = DR_IS_READ (ref->ref) ? CT_LOAD : CT_STORE_LOAD; 1113 chain->type = type;
983 1114 chain->refs.safe_push (ref);
984 VEC_safe_push (dref, heap, chain->refs, ref);
985 chain->all_always_accessed = ref->always_accessed; 1115 chain->all_always_accessed = ref->always_accessed;
986
987 ref->distance = 0; 1116 ref->distance = 0;
1117
1118 chain->inits = vNULL;
1119 chain->finis = vNULL;
988 1120
989 return chain; 1121 return chain;
990 } 1122 }
991 1123
992 /* Returns true if CHAIN is not trivial. */ 1124 /* Returns true if CHAIN is not trivial. */
993 1125
994 static bool 1126 static bool
995 nontrivial_chain_p (chain_p chain) 1127 nontrivial_chain_p (chain_p chain)
996 { 1128 {
997 return chain != NULL && VEC_length (dref, chain->refs) > 1; 1129 return chain != NULL && chain->refs.length () > 1;
998 } 1130 }
999 1131
1000 /* Returns the ssa name that contains the value of REF, or NULL_TREE if there 1132 /* Returns the ssa name that contains the value of REF, or NULL_TREE if there
1001 is no such name. */ 1133 is no such name. */
1002 1134
1024 static bool 1156 static bool
1025 valid_initializer_p (struct data_reference *ref, 1157 valid_initializer_p (struct data_reference *ref,
1026 unsigned distance, struct data_reference *root) 1158 unsigned distance, struct data_reference *root)
1027 { 1159 {
1028 aff_tree diff, base, step; 1160 aff_tree diff, base, step;
1029 double_int off; 1161 widest_int off;
1030 1162
1031 /* Both REF and ROOT must be accessing the same object. */ 1163 /* Both REF and ROOT must be accessing the same object. */
1032 if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0)) 1164 if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0))
1033 return false; 1165 return false;
1034 1166
1044 1176
1045 /* Verify that this index of REF is equal to the root's index at 1177 /* Verify that this index of REF is equal to the root's index at
1046 -DISTANCE-th iteration. */ 1178 -DISTANCE-th iteration. */
1047 aff_combination_dr_offset (root, &diff); 1179 aff_combination_dr_offset (root, &diff);
1048 aff_combination_dr_offset (ref, &base); 1180 aff_combination_dr_offset (ref, &base);
1049 aff_combination_scale (&base, double_int_minus_one); 1181 aff_combination_scale (&base, -1);
1050 aff_combination_add (&diff, &base); 1182 aff_combination_add (&diff, &base);
1051 1183
1052 tree_to_aff_combination_expand (DR_STEP (root), sizetype, &step, 1184 tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)),
1053 &name_expansions); 1185 &step, &name_expansions);
1054 if (!aff_combination_constant_multiple_p (&diff, &step, &off)) 1186 if (!aff_combination_constant_multiple_p (&diff, &step, &off))
1055 return false; 1187 return false;
1056 1188
1057 if (!double_int_equal_p (off, uhwi_to_double_int (distance))) 1189 if (off != distance)
1058 return false; 1190 return false;
1059 1191
1060 return true; 1192 return true;
1061 } 1193 }
1062 1194
1063 /* Finds looparound phi node of LOOP that copies the value of REF, and if its 1195 /* Finds looparound phi node of LOOP that copies the value of REF, and if its
1064 initial value is correct (equal to initial value of REF shifted by one 1196 initial value is correct (equal to initial value of REF shifted by one
1065 iteration), returns the phi node. Otherwise, NULL_TREE is returned. ROOT 1197 iteration), returns the phi node. Otherwise, NULL_TREE is returned. ROOT
1066 is the root of the current chain. */ 1198 is the root of the current chain. */
1067 1199
1068 static gimple 1200 static gphi *
1069 find_looparound_phi (struct loop *loop, dref ref, dref root) 1201 find_looparound_phi (struct loop *loop, dref ref, dref root)
1070 { 1202 {
1071 tree name, init, init_ref; 1203 tree name, init, init_ref;
1072 gimple phi = NULL, init_stmt; 1204 gphi *phi = NULL;
1205 gimple *init_stmt;
1073 edge latch = loop_latch_edge (loop); 1206 edge latch = loop_latch_edge (loop);
1074 struct data_reference init_dr; 1207 struct data_reference init_dr;
1075 gimple_stmt_iterator psi; 1208 gphi_iterator psi;
1076 1209
1077 if (is_gimple_assign (ref->stmt)) 1210 if (is_gimple_assign (ref->stmt))
1078 { 1211 {
1079 if (DR_IS_READ (ref->ref)) 1212 if (DR_IS_READ (ref->ref))
1080 name = gimple_assign_lhs (ref->stmt); 1213 name = gimple_assign_lhs (ref->stmt);
1086 if (!name) 1219 if (!name)
1087 return NULL; 1220 return NULL;
1088 1221
1089 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi)) 1222 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1090 { 1223 {
1091 phi = gsi_stmt (psi); 1224 phi = psi.phi ();
1092 if (PHI_ARG_DEF_FROM_EDGE (phi, latch) == name) 1225 if (PHI_ARG_DEF_FROM_EDGE (phi, latch) == name)
1093 break; 1226 break;
1094 } 1227 }
1095 1228
1096 if (gsi_end_p (psi)) 1229 if (gsi_end_p (psi))
1112 /* Analyze the behavior of INIT_REF with respect to LOOP (innermost 1245 /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1113 loop enclosing PHI). */ 1246 loop enclosing PHI). */
1114 memset (&init_dr, 0, sizeof (struct data_reference)); 1247 memset (&init_dr, 0, sizeof (struct data_reference));
1115 DR_REF (&init_dr) = init_ref; 1248 DR_REF (&init_dr) = init_ref;
1116 DR_STMT (&init_dr) = phi; 1249 DR_STMT (&init_dr) = phi;
1117 if (!dr_analyze_innermost (&init_dr)) 1250 if (!dr_analyze_innermost (&DR_INNERMOST (&init_dr), init_ref, loop))
1118 return NULL; 1251 return NULL;
1119 1252
1120 if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref)) 1253 if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
1121 return NULL; 1254 return NULL;
1122 1255
1124 } 1257 }
1125 1258
1126 /* Adds a reference for the looparound copy of REF in PHI to CHAIN. */ 1259 /* Adds a reference for the looparound copy of REF in PHI to CHAIN. */
1127 1260
1128 static void 1261 static void
1129 insert_looparound_copy (chain_p chain, dref ref, gimple phi) 1262 insert_looparound_copy (chain_p chain, dref ref, gphi *phi)
1130 { 1263 {
1131 dref nw = XCNEW (struct dref_d), aref; 1264 dref nw = XCNEW (struct dref_d), aref;
1132 unsigned i; 1265 unsigned i;
1133 1266
1134 nw->stmt = phi; 1267 nw->stmt = phi;
1135 nw->distance = ref->distance + 1; 1268 nw->distance = ref->distance + 1;
1136 nw->always_accessed = 1; 1269 nw->always_accessed = 1;
1137 1270
1138 FOR_EACH_VEC_ELT (dref, chain->refs, i, aref) 1271 FOR_EACH_VEC_ELT (chain->refs, i, aref)
1139 if (aref->distance >= nw->distance) 1272 if (aref->distance >= nw->distance)
1140 break; 1273 break;
1141 VEC_safe_insert (dref, heap, chain->refs, i, nw); 1274 chain->refs.safe_insert (i, nw);
1142 1275
1143 if (nw->distance > chain->length) 1276 if (nw->distance > chain->length)
1144 { 1277 {
1145 chain->length = nw->distance; 1278 chain->length = nw->distance;
1146 chain->has_max_use_after = false; 1279 chain->has_max_use_after = false;
1155 static void 1288 static void
1156 add_looparound_copies (struct loop *loop, chain_p chain) 1289 add_looparound_copies (struct loop *loop, chain_p chain)
1157 { 1290 {
1158 unsigned i; 1291 unsigned i;
1159 dref ref, root = get_chain_root (chain); 1292 dref ref, root = get_chain_root (chain);
1160 gimple phi; 1293 gphi *phi;
1161 1294
1162 FOR_EACH_VEC_ELT (dref, chain->refs, i, ref) 1295 if (chain->type == CT_STORE_STORE)
1296 return;
1297
1298 FOR_EACH_VEC_ELT (chain->refs, i, ref)
1163 { 1299 {
1164 phi = find_looparound_phi (loop, ref, root); 1300 phi = find_looparound_phi (loop, ref, root);
1165 if (!phi) 1301 if (!phi)
1166 continue; 1302 continue;
1167 1303
1175 loop. */ 1311 loop. */
1176 1312
1177 static void 1313 static void
1178 determine_roots_comp (struct loop *loop, 1314 determine_roots_comp (struct loop *loop,
1179 struct component *comp, 1315 struct component *comp,
1180 VEC (chain_p, heap) **chains) 1316 vec<chain_p> *chains)
1181 { 1317 {
1182 unsigned i; 1318 unsigned i;
1183 dref a; 1319 dref a;
1184 chain_p chain = NULL; 1320 chain_p chain = NULL;
1185 double_int last_ofs = double_int_zero; 1321 widest_int last_ofs = 0;
1322 enum chain_type type;
1186 1323
1187 /* Invariants are handled specially. */ 1324 /* Invariants are handled specially. */
1188 if (comp->comp_step == RS_INVARIANT) 1325 if (comp->comp_step == RS_INVARIANT)
1189 { 1326 {
1190 chain = make_invariant_chain (comp); 1327 chain = make_invariant_chain (comp);
1191 VEC_safe_push (chain_p, heap, *chains, chain); 1328 chains->safe_push (chain);
1192 return; 1329 return;
1193 } 1330 }
1194 1331
1195 VEC_qsort (dref, comp->refs, order_drefs); 1332 /* Trivial component. */
1196 1333 if (comp->refs.length () <= 1)
1197 FOR_EACH_VEC_ELT (dref, comp->refs, i, a) 1334 return;
1198 { 1335
1199 if (!chain || DR_IS_WRITE (a->ref) 1336 comp->refs.qsort (order_drefs);
1200 || double_int_ucmp (uhwi_to_double_int (MAX_DISTANCE), 1337 FOR_EACH_VEC_ELT (comp->refs, i, a)
1201 double_int_sub (a->offset, last_ofs)) <= 0) 1338 {
1339 if (!chain
1340 || (!comp->eliminate_store_p && DR_IS_WRITE (a->ref))
1341 || wi::leu_p (MAX_DISTANCE, a->offset - last_ofs))
1202 { 1342 {
1203 if (nontrivial_chain_p (chain)) 1343 if (nontrivial_chain_p (chain))
1204 { 1344 {
1205 add_looparound_copies (loop, chain); 1345 add_looparound_copies (loop, chain);
1206 VEC_safe_push (chain_p, heap, *chains, chain); 1346 chains->safe_push (chain);
1207 } 1347 }
1208 else 1348 else
1209 release_chain (chain); 1349 release_chain (chain);
1210 chain = make_rooted_chain (a); 1350
1351 if (DR_IS_READ (a->ref))
1352 type = CT_LOAD;
1353 else
1354 type = comp->eliminate_store_p ? CT_STORE_STORE : CT_STORE_LOAD;
1355
1356 chain = make_rooted_chain (a, type);
1211 last_ofs = a->offset; 1357 last_ofs = a->offset;
1212 continue; 1358 continue;
1213 } 1359 }
1214 1360
1215 add_ref_to_chain (chain, a); 1361 add_ref_to_chain (chain, a);
1216 } 1362 }
1217 1363
1218 if (nontrivial_chain_p (chain)) 1364 if (nontrivial_chain_p (chain))
1219 { 1365 {
1220 add_looparound_copies (loop, chain); 1366 add_looparound_copies (loop, chain);
1221 VEC_safe_push (chain_p, heap, *chains, chain); 1367 chains->safe_push (chain);
1222 } 1368 }
1223 else 1369 else
1224 release_chain (chain); 1370 release_chain (chain);
1225 } 1371 }
1226 1372
1227 /* Find roots of the values and determine distances in components COMPS, and 1373 /* Find roots of the values and determine distances in components COMPS, and
1228 separates the references to CHAINS. LOOP is the current loop. */ 1374 separates the references to CHAINS. LOOP is the current loop. */
1229 1375
1230 static void 1376 static void
1231 determine_roots (struct loop *loop, 1377 determine_roots (struct loop *loop,
1232 struct component *comps, VEC (chain_p, heap) **chains) 1378 struct component *comps, vec<chain_p> *chains)
1233 { 1379 {
1234 struct component *comp; 1380 struct component *comp;
1235 1381
1236 for (comp = comps; comp; comp = comp->next) 1382 for (comp = comps; comp; comp = comp->next)
1237 determine_roots_comp (loop, comp, chains); 1383 determine_roots_comp (loop, comp, chains);
1241 NEW_TREE. If SET is true, NEW_TREE is instead initialized to the value of 1387 NEW_TREE. If SET is true, NEW_TREE is instead initialized to the value of
1242 the reference in the statement. IN_LHS is true if the reference 1388 the reference in the statement. IN_LHS is true if the reference
1243 is in the lhs of STMT, false if it is in rhs. */ 1389 is in the lhs of STMT, false if it is in rhs. */
1244 1390
1245 static void 1391 static void
1246 replace_ref_with (gimple stmt, tree new_tree, bool set, bool in_lhs) 1392 replace_ref_with (gimple *stmt, tree new_tree, bool set, bool in_lhs)
1247 { 1393 {
1248 tree val; 1394 tree val;
1249 gimple new_stmt; 1395 gassign *new_stmt;
1250 gimple_stmt_iterator bsi, psi; 1396 gimple_stmt_iterator bsi, psi;
1251 1397
1252 if (gimple_code (stmt) == GIMPLE_PHI) 1398 if (gimple_code (stmt) == GIMPLE_PHI)
1253 { 1399 {
1254 gcc_assert (!in_lhs && !set); 1400 gcc_assert (!in_lhs && !set);
1302 */ 1448 */
1303 1449
1304 val = gimple_assign_lhs (stmt); 1450 val = gimple_assign_lhs (stmt);
1305 if (TREE_CODE (val) != SSA_NAME) 1451 if (TREE_CODE (val) != SSA_NAME)
1306 { 1452 {
1307 gcc_assert (gimple_assign_copy_p (stmt));
1308 val = gimple_assign_rhs1 (stmt); 1453 val = gimple_assign_rhs1 (stmt);
1454 gcc_assert (gimple_assign_single_p (stmt));
1455 if (TREE_CLOBBER_P (val))
1456 val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (new_tree));
1457 else
1458 gcc_assert (gimple_assign_copy_p (stmt));
1309 } 1459 }
1310 } 1460 }
1311 else 1461 else
1312 { 1462 {
1313 /* VAL = OLD 1463 /* VAL = OLD
1322 1472
1323 new_stmt = gimple_build_assign (new_tree, unshare_expr (val)); 1473 new_stmt = gimple_build_assign (new_tree, unshare_expr (val));
1324 gsi_insert_after (&bsi, new_stmt, GSI_NEW_STMT); 1474 gsi_insert_after (&bsi, new_stmt, GSI_NEW_STMT);
1325 } 1475 }
1326 1476
1327 /* Returns the reference to the address of REF in the ITER-th iteration of 1477 /* Returns a memory reference to DR in the (NITERS + ITER)-th iteration
1328 LOOP, or NULL if we fail to determine it (ITER may be negative). We 1478 of the loop it was analyzed in. Append init stmts to STMTS. */
1329 try to preserve the original shape of the reference (not rewrite it
1330 as an indirect ref to the address), to make tree_could_trap_p in
1331 prepare_initializers_chain return false more often. */
1332 1479
1333 static tree 1480 static tree
1334 ref_at_iteration (struct loop *loop, tree ref, int iter) 1481 ref_at_iteration (data_reference_p dr, int iter,
1335 { 1482 gimple_seq *stmts, tree niters = NULL_TREE)
1336 tree idx, *idx_p, type, val, op0 = NULL_TREE, ret; 1483 {
1337 affine_iv iv; 1484 tree off = DR_OFFSET (dr);
1338 bool ok; 1485 tree coff = DR_INIT (dr);
1339 1486 tree ref = DR_REF (dr);
1340 if (handled_component_p (ref)) 1487 enum tree_code ref_code = ERROR_MARK;
1341 { 1488 tree ref_type = NULL_TREE;
1342 op0 = ref_at_iteration (loop, TREE_OPERAND (ref, 0), iter); 1489 tree ref_op1 = NULL_TREE;
1343 if (!op0) 1490 tree ref_op2 = NULL_TREE;
1344 return NULL_TREE; 1491 tree new_offset;
1345 } 1492
1346 else if (!INDIRECT_REF_P (ref) 1493 if (iter != 0)
1347 && TREE_CODE (ref) != MEM_REF) 1494 {
1348 return unshare_expr (ref); 1495 new_offset = size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter));
1349 1496 if (TREE_CODE (new_offset) == INTEGER_CST)
1350 if (TREE_CODE (ref) == MEM_REF) 1497 coff = size_binop (PLUS_EXPR, coff, new_offset);
1351 {
1352 ret = unshare_expr (ref);
1353 idx = TREE_OPERAND (ref, 0);
1354 idx_p = &TREE_OPERAND (ret, 0);
1355 }
1356 else if (TREE_CODE (ref) == COMPONENT_REF)
1357 {
1358 /* Check that the offset is loop invariant. */
1359 if (TREE_OPERAND (ref, 2)
1360 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (ref, 2)))
1361 return NULL_TREE;
1362
1363 return build3 (COMPONENT_REF, TREE_TYPE (ref), op0,
1364 unshare_expr (TREE_OPERAND (ref, 1)),
1365 unshare_expr (TREE_OPERAND (ref, 2)));
1366 }
1367 else if (TREE_CODE (ref) == ARRAY_REF)
1368 {
1369 /* Check that the lower bound and the step are loop invariant. */
1370 if (TREE_OPERAND (ref, 2)
1371 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (ref, 2)))
1372 return NULL_TREE;
1373 if (TREE_OPERAND (ref, 3)
1374 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (ref, 3)))
1375 return NULL_TREE;
1376
1377 ret = build4 (ARRAY_REF, TREE_TYPE (ref), op0, NULL_TREE,
1378 unshare_expr (TREE_OPERAND (ref, 2)),
1379 unshare_expr (TREE_OPERAND (ref, 3)));
1380 idx = TREE_OPERAND (ref, 1);
1381 idx_p = &TREE_OPERAND (ret, 1);
1382 }
1383 else
1384 return NULL_TREE;
1385
1386 ok = simple_iv (loop, loop, idx, &iv, true);
1387 if (!ok)
1388 return NULL_TREE;
1389 iv.base = expand_simple_operations (iv.base);
1390 if (integer_zerop (iv.step))
1391 *idx_p = unshare_expr (iv.base);
1392 else
1393 {
1394 type = TREE_TYPE (iv.base);
1395 if (POINTER_TYPE_P (type))
1396 {
1397 val = fold_build2 (MULT_EXPR, sizetype, iv.step,
1398 size_int (iter));
1399 val = fold_build2 (POINTER_PLUS_EXPR, type, iv.base, val);
1400 }
1401 else 1498 else
1402 { 1499 off = size_binop (PLUS_EXPR, off, new_offset);
1403 val = fold_build2 (MULT_EXPR, type, iv.step, 1500 }
1404 build_int_cst_type (type, iter)); 1501
1405 val = fold_build2 (PLUS_EXPR, type, iv.base, val); 1502 if (niters != NULL_TREE)
1406 } 1503 {
1407 *idx_p = unshare_expr (val); 1504 niters = fold_convert (ssizetype, niters);
1408 } 1505 new_offset = size_binop (MULT_EXPR, DR_STEP (dr), niters);
1409 1506 if (TREE_CODE (niters) == INTEGER_CST)
1410 return ret; 1507 coff = size_binop (PLUS_EXPR, coff, new_offset);
1508 else
1509 off = size_binop (PLUS_EXPR, off, new_offset);
1510 }
1511
1512 /* While data-ref analysis punts on bit offsets it still handles
1513 bitfield accesses at byte boundaries. Cope with that. Note that
1514 if the bitfield object also starts at a byte-boundary we can simply
1515 replicate the COMPONENT_REF, but we have to subtract the component's
1516 byte-offset from the MEM_REF address first.
1517 Otherwise we simply build a BIT_FIELD_REF knowing that the bits
1518 start at offset zero. */
1519 if (TREE_CODE (ref) == COMPONENT_REF
1520 && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
1521 {
1522 unsigned HOST_WIDE_INT boff;
1523 tree field = TREE_OPERAND (ref, 1);
1524 tree offset = component_ref_field_offset (ref);
1525 ref_type = TREE_TYPE (ref);
1526 boff = tree_to_uhwi (DECL_FIELD_BIT_OFFSET (field));
1527 /* This can occur in Ada. See the comment in get_bit_range. */
1528 if (boff % BITS_PER_UNIT != 0
1529 || !tree_fits_uhwi_p (offset))
1530 {
1531 ref_code = BIT_FIELD_REF;
1532 ref_op1 = DECL_SIZE (field);
1533 ref_op2 = bitsize_zero_node;
1534 }
1535 else
1536 {
1537 boff >>= LOG2_BITS_PER_UNIT;
1538 boff += tree_to_uhwi (offset);
1539 coff = size_binop (MINUS_EXPR, coff, ssize_int (boff));
1540 ref_code = COMPONENT_REF;
1541 ref_op1 = field;
1542 ref_op2 = TREE_OPERAND (ref, 2);
1543 ref = TREE_OPERAND (ref, 0);
1544 }
1545 }
1546 tree addr = fold_build_pointer_plus (DR_BASE_ADDRESS (dr), off);
1547 addr = force_gimple_operand_1 (unshare_expr (addr), stmts,
1548 is_gimple_mem_ref_addr, NULL_TREE);
1549 tree alias_ptr = fold_convert (reference_alias_ptr_type (ref), coff);
1550 tree type = build_aligned_type (TREE_TYPE (ref),
1551 get_object_alignment (ref));
1552 ref = build2 (MEM_REF, type, addr, alias_ptr);
1553 if (ref_type)
1554 ref = build3 (ref_code, ref_type, ref, ref_op1, ref_op2);
1555 return ref;
1411 } 1556 }
1412 1557
1413 /* Get the initialization expression for the INDEX-th temporary variable 1558 /* Get the initialization expression for the INDEX-th temporary variable
1414 of CHAIN. */ 1559 of CHAIN. */
1415 1560
1422 tree e2 = get_init_expr (chain->ch2, index); 1567 tree e2 = get_init_expr (chain->ch2, index);
1423 1568
1424 return fold_build2 (chain->op, chain->rslt_type, e1, e2); 1569 return fold_build2 (chain->op, chain->rslt_type, e1, e2);
1425 } 1570 }
1426 else 1571 else
1427 return VEC_index (tree, chain->inits, index); 1572 return chain->inits[index];
1428 }
1429
1430 /* Marks all virtual operands of statement STMT for renaming. */
1431
1432 void
1433 mark_virtual_ops_for_renaming (gimple stmt)
1434 {
1435 tree var;
1436
1437 if (gimple_code (stmt) == GIMPLE_PHI)
1438 {
1439 var = PHI_RESULT (stmt);
1440 if (is_gimple_reg (var))
1441 return;
1442
1443 if (TREE_CODE (var) == SSA_NAME)
1444 var = SSA_NAME_VAR (var);
1445 mark_sym_for_renaming (var);
1446 return;
1447 }
1448
1449 update_stmt (stmt);
1450 if (gimple_vuse (stmt))
1451 mark_sym_for_renaming (gimple_vop (cfun));
1452 } 1573 }
1453 1574
1454 /* Returns a new temporary variable used for the I-th variable carrying 1575 /* Returns a new temporary variable used for the I-th variable carrying
1455 value of REF. The variable's uid is marked in TMP_VARS. */ 1576 value of REF. The variable's uid is marked in TMP_VARS. */
1456 1577
1459 { 1580 {
1460 tree type = TREE_TYPE (ref); 1581 tree type = TREE_TYPE (ref);
1461 /* We never access the components of the temporary variable in predictive 1582 /* We never access the components of the temporary variable in predictive
1462 commoning. */ 1583 commoning. */
1463 tree var = create_tmp_reg (type, get_lsm_tmp_name (ref, i)); 1584 tree var = create_tmp_reg (type, get_lsm_tmp_name (ref, i));
1464
1465 add_referenced_var (var);
1466 bitmap_set_bit (tmp_vars, DECL_UID (var)); 1585 bitmap_set_bit (tmp_vars, DECL_UID (var));
1467 return var; 1586 return var;
1468 } 1587 }
1469 1588
1470 /* Creates the variables for CHAIN, as well as phi nodes for them and 1589 /* Creates the variables for CHAIN, as well as phi nodes for them and
1477 unsigned i; 1596 unsigned i;
1478 unsigned n = chain->length; 1597 unsigned n = chain->length;
1479 dref root = get_chain_root (chain); 1598 dref root = get_chain_root (chain);
1480 bool reuse_first = !chain->has_max_use_after; 1599 bool reuse_first = !chain->has_max_use_after;
1481 tree ref, init, var, next; 1600 tree ref, init, var, next;
1482 gimple phi; 1601 gphi *phi;
1483 gimple_seq stmts; 1602 gimple_seq stmts;
1484 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop); 1603 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1485 1604
1486 /* If N == 0, then all the references are within the single iteration. And 1605 /* If N == 0, then all the references are within the single iteration. And
1487 since this is an nonempty chain, reuse_first cannot be true. */ 1606 since this is an nonempty chain, reuse_first cannot be true. */
1488 gcc_assert (n > 0 || !reuse_first); 1607 gcc_assert (n > 0 || !reuse_first);
1489 1608
1490 chain->vars = VEC_alloc (tree, heap, n + 1); 1609 chain->vars.create (n + 1);
1491 1610
1492 if (chain->type == CT_COMBINATION) 1611 if (chain->type == CT_COMBINATION)
1493 ref = gimple_assign_lhs (root->stmt); 1612 ref = gimple_assign_lhs (root->stmt);
1494 else 1613 else
1495 ref = DR_REF (root->ref); 1614 ref = DR_REF (root->ref);
1496 1615
1497 for (i = 0; i < n + (reuse_first ? 0 : 1); i++) 1616 for (i = 0; i < n + (reuse_first ? 0 : 1); i++)
1498 { 1617 {
1499 var = predcom_tmp_var (ref, i, tmp_vars); 1618 var = predcom_tmp_var (ref, i, tmp_vars);
1500 VEC_quick_push (tree, chain->vars, var); 1619 chain->vars.quick_push (var);
1501 } 1620 }
1502 if (reuse_first) 1621 if (reuse_first)
1503 VEC_quick_push (tree, chain->vars, VEC_index (tree, chain->vars, 0)); 1622 chain->vars.quick_push (chain->vars[0]);
1504 1623
1505 FOR_EACH_VEC_ELT (tree, chain->vars, i, var) 1624 FOR_EACH_VEC_ELT (chain->vars, i, var)
1506 VEC_replace (tree, chain->vars, i, make_ssa_name (var, NULL)); 1625 chain->vars[i] = make_ssa_name (var);
1507 1626
1508 for (i = 0; i < n; i++) 1627 for (i = 0; i < n; i++)
1509 { 1628 {
1510 var = VEC_index (tree, chain->vars, i); 1629 var = chain->vars[i];
1511 next = VEC_index (tree, chain->vars, i + 1); 1630 next = chain->vars[i + 1];
1512 init = get_init_expr (chain, i); 1631 init = get_init_expr (chain, i);
1513 1632
1514 init = force_gimple_operand (init, &stmts, true, NULL_TREE); 1633 init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1515 if (stmts) 1634 if (stmts)
1516 gsi_insert_seq_on_edge_immediate (entry, stmts); 1635 gsi_insert_seq_on_edge_immediate (entry, stmts);
1517 1636
1518 phi = create_phi_node (var, loop->header); 1637 phi = create_phi_node (var, loop->header);
1519 SSA_NAME_DEF_STMT (var) = phi;
1520 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION); 1638 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1521 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION); 1639 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1522 } 1640 }
1523 } 1641 }
1524 1642
1525 /* Create the variables and initialization statement for root of chain 1643 /* For inter-iteration store elimination CHAIN in LOOP, returns true if
1526 CHAIN. Uids of the newly created temporary variables are marked 1644 all stores to be eliminated store loop invariant values into memory.
1527 in TMP_VARS. */ 1645 In this case, we can use these invariant values directly after LOOP. */
1646
1647 static bool
1648 is_inv_store_elimination_chain (struct loop *loop, chain_p chain)
1649 {
1650 if (chain->length == 0 || chain->type != CT_STORE_STORE)
1651 return false;
1652
1653 gcc_assert (!chain->has_max_use_after);
1654
1655 /* If loop iterates for unknown times or fewer times than chain->lenght,
1656 we still need to setup root variable and propagate it with PHI node. */
1657 tree niters = number_of_latch_executions (loop);
1658 if (TREE_CODE (niters) != INTEGER_CST
1659 || wi::leu_p (wi::to_wide (niters), chain->length))
1660 return false;
1661
1662 /* Check stores in chain for elimination if they only store loop invariant
1663 values. */
1664 for (unsigned i = 0; i < chain->length; i++)
1665 {
1666 dref a = get_chain_last_ref_at (chain, i);
1667 if (a == NULL)
1668 continue;
1669
1670 gimple *def_stmt, *stmt = a->stmt;
1671 if (!gimple_assign_single_p (stmt))
1672 return false;
1673
1674 tree val = gimple_assign_rhs1 (stmt);
1675 if (TREE_CLOBBER_P (val))
1676 return false;
1677
1678 if (CONSTANT_CLASS_P (val))
1679 continue;
1680
1681 if (TREE_CODE (val) != SSA_NAME)
1682 return false;
1683
1684 def_stmt = SSA_NAME_DEF_STMT (val);
1685 if (gimple_nop_p (def_stmt))
1686 continue;
1687
1688 if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
1689 return false;
1690 }
1691 return true;
1692 }
1693
1694 /* Creates root variables for store elimination CHAIN in which stores for
1695 elimination only store loop invariant values. In this case, we neither
1696 need to load root variables before loop nor propagate it with PHI nodes. */
1528 1697
1529 static void 1698 static void
1530 initialize_root (struct loop *loop, chain_p chain, bitmap tmp_vars) 1699 initialize_root_vars_store_elim_1 (chain_p chain)
1531 { 1700 {
1532 dref root = get_chain_root (chain); 1701 tree var;
1533 bool in_lhs = (chain->type == CT_STORE_LOAD 1702 unsigned i, n = chain->length;
1534 || chain->type == CT_COMBINATION); 1703
1535 1704 chain->vars.create (n);
1536 initialize_root_vars (loop, chain, tmp_vars); 1705 chain->vars.safe_grow_cleared (n);
1537 replace_ref_with (root->stmt, 1706
1538 VEC_index (tree, chain->vars, chain->length), 1707 /* Initialize root value for eliminated stores at each distance. */
1539 true, in_lhs); 1708 for (i = 0; i < n; i++)
1709 {
1710 dref a = get_chain_last_ref_at (chain, i);
1711 if (a == NULL)
1712 continue;
1713
1714 var = gimple_assign_rhs1 (a->stmt);
1715 chain->vars[a->distance] = var;
1716 }
1717
1718 /* We don't propagate values with PHI nodes, so manually propagate value
1719 to bubble positions. */
1720 var = chain->vars[0];
1721 for (i = 1; i < n; i++)
1722 {
1723 if (chain->vars[i] != NULL_TREE)
1724 {
1725 var = chain->vars[i];
1726 continue;
1727 }
1728 chain->vars[i] = var;
1729 }
1730
1731 /* Revert the vector. */
1732 for (i = 0; i < n / 2; i++)
1733 std::swap (chain->vars[i], chain->vars[n - i - 1]);
1734 }
1735
1736 /* Creates root variables for store elimination CHAIN in which stores for
1737 elimination store loop variant values. In this case, we may need to
1738 load root variables before LOOP and propagate it with PHI nodes. Uids
1739 of the newly created root variables are marked in TMP_VARS. */
1740
1741 static void
1742 initialize_root_vars_store_elim_2 (struct loop *loop,
1743 chain_p chain, bitmap tmp_vars)
1744 {
1745 unsigned i, n = chain->length;
1746 tree ref, init, var, next, val, phi_result;
1747 gimple *stmt;
1748 gimple_seq stmts;
1749
1750 chain->vars.create (n);
1751
1752 ref = DR_REF (get_chain_root (chain)->ref);
1753 for (i = 0; i < n; i++)
1754 {
1755 var = predcom_tmp_var (ref, i, tmp_vars);
1756 chain->vars.quick_push (var);
1757 }
1758
1759 FOR_EACH_VEC_ELT (chain->vars, i, var)
1760 chain->vars[i] = make_ssa_name (var);
1761
1762 /* Root values are either rhs operand of stores to be eliminated, or
1763 loaded from memory before loop. */
1764 auto_vec<tree> vtemps;
1765 vtemps.safe_grow_cleared (n);
1766 for (i = 0; i < n; i++)
1767 {
1768 init = get_init_expr (chain, i);
1769 if (init == NULL_TREE)
1770 {
1771 /* Root value is rhs operand of the store to be eliminated if
1772 it isn't loaded from memory before loop. */
1773 dref a = get_chain_last_ref_at (chain, i);
1774 val = gimple_assign_rhs1 (a->stmt);
1775 if (TREE_CLOBBER_P (val))
1776 val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (var));
1777
1778 vtemps[n - i - 1] = val;
1779 }
1780 else
1781 {
1782 edge latch = loop_latch_edge (loop);
1783 edge entry = loop_preheader_edge (loop);
1784
1785 /* Root value is loaded from memory before loop, we also need
1786 to add PHI nodes to propagate the value across iterations. */
1787 init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1788 if (stmts)
1789 gsi_insert_seq_on_edge_immediate (entry, stmts);
1790
1791 next = chain->vars[n - i];
1792 phi_result = copy_ssa_name (next);
1793 gphi *phi = create_phi_node (phi_result, loop->header);
1794 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1795 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1796 vtemps[n - i - 1] = phi_result;
1797 }
1798 }
1799
1800 /* Find the insertion position. */
1801 dref last = get_chain_root (chain);
1802 for (i = 0; i < chain->refs.length (); i++)
1803 {
1804 if (chain->refs[i]->pos > last->pos)
1805 last = chain->refs[i];
1806 }
1807
1808 gimple_stmt_iterator gsi = gsi_for_stmt (last->stmt);
1809
1810 /* Insert statements copying root value to root variable. */
1811 for (i = 0; i < n; i++)
1812 {
1813 var = chain->vars[i];
1814 val = vtemps[i];
1815 stmt = gimple_build_assign (var, val);
1816 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1817 }
1818 }
1819
1820 /* Generates stores for CHAIN's eliminated stores in LOOP's last
1821 (CHAIN->length - 1) iterations. */
1822
1823 static void
1824 finalize_eliminated_stores (struct loop *loop, chain_p chain)
1825 {
1826 unsigned i, n = chain->length;
1827
1828 for (i = 0; i < n; i++)
1829 {
1830 tree var = chain->vars[i];
1831 tree fini = chain->finis[n - i - 1];
1832 gimple *stmt = gimple_build_assign (fini, var);
1833
1834 gimple_seq_add_stmt_without_update (&chain->fini_seq, stmt);
1835 }
1836
1837 if (chain->fini_seq)
1838 {
1839 gsi_insert_seq_on_edge_immediate (single_exit (loop), chain->fini_seq);
1840 chain->fini_seq = NULL;
1841 }
1540 } 1842 }
1541 1843
1542 /* Initializes a variable for load motion for ROOT and prepares phi nodes and 1844 /* Initializes a variable for load motion for ROOT and prepares phi nodes and
1543 initialization on entry to LOOP if necessary. The ssa name for the variable 1845 initialization on entry to LOOP if necessary. The ssa name for the variable
1544 is stored in VARS. If WRITTEN is true, also a phi node to copy its value 1846 is stored in VARS. If WRITTEN is true, also a phi node to copy its value
1546 is marked in TMP_VARS. INITS is the list containing the (single) 1848 is marked in TMP_VARS. INITS is the list containing the (single)
1547 initializer. */ 1849 initializer. */
1548 1850
1549 static void 1851 static void
1550 initialize_root_vars_lm (struct loop *loop, dref root, bool written, 1852 initialize_root_vars_lm (struct loop *loop, dref root, bool written,
1551 VEC(tree, heap) **vars, VEC(tree, heap) *inits, 1853 vec<tree> *vars, vec<tree> inits,
1552 bitmap tmp_vars) 1854 bitmap tmp_vars)
1553 { 1855 {
1554 unsigned i; 1856 unsigned i;
1555 tree ref = DR_REF (root->ref), init, var, next; 1857 tree ref = DR_REF (root->ref), init, var, next;
1556 gimple_seq stmts; 1858 gimple_seq stmts;
1557 gimple phi; 1859 gphi *phi;
1558 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop); 1860 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1559 1861
1560 /* Find the initializer for the variable, and check that it cannot 1862 /* Find the initializer for the variable, and check that it cannot
1561 trap. */ 1863 trap. */
1562 init = VEC_index (tree, inits, 0); 1864 init = inits[0];
1563 1865
1564 *vars = VEC_alloc (tree, heap, written ? 2 : 1); 1866 vars->create (written ? 2 : 1);
1565 var = predcom_tmp_var (ref, 0, tmp_vars); 1867 var = predcom_tmp_var (ref, 0, tmp_vars);
1566 VEC_quick_push (tree, *vars, var); 1868 vars->quick_push (var);
1567 if (written) 1869 if (written)
1568 VEC_quick_push (tree, *vars, VEC_index (tree, *vars, 0)); 1870 vars->quick_push ((*vars)[0]);
1569 1871
1570 FOR_EACH_VEC_ELT (tree, *vars, i, var) 1872 FOR_EACH_VEC_ELT (*vars, i, var)
1571 VEC_replace (tree, *vars, i, make_ssa_name (var, NULL)); 1873 (*vars)[i] = make_ssa_name (var);
1572 1874
1573 var = VEC_index (tree, *vars, 0); 1875 var = (*vars)[0];
1574 1876
1575 init = force_gimple_operand (init, &stmts, written, NULL_TREE); 1877 init = force_gimple_operand (init, &stmts, written, NULL_TREE);
1576 if (stmts) 1878 if (stmts)
1577 gsi_insert_seq_on_edge_immediate (entry, stmts); 1879 gsi_insert_seq_on_edge_immediate (entry, stmts);
1578 1880
1579 if (written) 1881 if (written)
1580 { 1882 {
1581 next = VEC_index (tree, *vars, 1); 1883 next = (*vars)[1];
1582 phi = create_phi_node (var, loop->header); 1884 phi = create_phi_node (var, loop->header);
1583 SSA_NAME_DEF_STMT (var) = phi;
1584 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION); 1885 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1585 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION); 1886 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1586 } 1887 }
1587 else 1888 else
1588 { 1889 {
1589 gimple init_stmt = gimple_build_assign (var, init); 1890 gassign *init_stmt = gimple_build_assign (var, init);
1590 mark_virtual_ops_for_renaming (init_stmt);
1591 gsi_insert_on_edge_immediate (entry, init_stmt); 1891 gsi_insert_on_edge_immediate (entry, init_stmt);
1592 } 1892 }
1593 } 1893 }
1594 1894
1595 1895
1597 created temporary variables are marked in TMP_VARS. */ 1897 created temporary variables are marked in TMP_VARS. */
1598 1898
1599 static void 1899 static void
1600 execute_load_motion (struct loop *loop, chain_p chain, bitmap tmp_vars) 1900 execute_load_motion (struct loop *loop, chain_p chain, bitmap tmp_vars)
1601 { 1901 {
1602 VEC (tree, heap) *vars; 1902 auto_vec<tree> vars;
1603 dref a; 1903 dref a;
1604 unsigned n_writes = 0, ridx, i; 1904 unsigned n_writes = 0, ridx, i;
1605 tree var; 1905 tree var;
1606 1906
1607 gcc_assert (chain->type == CT_INVARIANT); 1907 gcc_assert (chain->type == CT_INVARIANT);
1608 gcc_assert (!chain->combined); 1908 gcc_assert (!chain->combined);
1609 FOR_EACH_VEC_ELT (dref, chain->refs, i, a) 1909 FOR_EACH_VEC_ELT (chain->refs, i, a)
1610 if (DR_IS_WRITE (a->ref)) 1910 if (DR_IS_WRITE (a->ref))
1611 n_writes++; 1911 n_writes++;
1612 1912
1613 /* If there are no reads in the loop, there is nothing to do. */ 1913 /* If there are no reads in the loop, there is nothing to do. */
1614 if (n_writes == VEC_length (dref, chain->refs)) 1914 if (n_writes == chain->refs.length ())
1615 return; 1915 return;
1616 1916
1617 initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0, 1917 initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0,
1618 &vars, chain->inits, tmp_vars); 1918 &vars, chain->inits, tmp_vars);
1619 1919
1620 ridx = 0; 1920 ridx = 0;
1621 FOR_EACH_VEC_ELT (dref, chain->refs, i, a) 1921 FOR_EACH_VEC_ELT (chain->refs, i, a)
1622 { 1922 {
1623 bool is_read = DR_IS_READ (a->ref); 1923 bool is_read = DR_IS_READ (a->ref);
1624 mark_virtual_ops_for_renaming (a->stmt);
1625 1924
1626 if (DR_IS_WRITE (a->ref)) 1925 if (DR_IS_WRITE (a->ref))
1627 { 1926 {
1628 n_writes--; 1927 n_writes--;
1629 if (n_writes) 1928 if (n_writes)
1630 { 1929 {
1631 var = VEC_index (tree, vars, 0); 1930 var = vars[0];
1632 var = make_ssa_name (SSA_NAME_VAR (var), NULL); 1931 var = make_ssa_name (SSA_NAME_VAR (var));
1633 VEC_replace (tree, vars, 0, var); 1932 vars[0] = var;
1634 } 1933 }
1635 else 1934 else
1636 ridx = 1; 1935 ridx = 1;
1637 } 1936 }
1638 1937
1639 replace_ref_with (a->stmt, VEC_index (tree, vars, ridx), 1938 replace_ref_with (a->stmt, vars[ridx],
1640 !is_read, !is_read); 1939 !is_read, !is_read);
1641 } 1940 }
1642
1643 VEC_free (tree, heap, vars);
1644 } 1941 }
1645 1942
1646 /* Returns the single statement in that NAME is used, excepting 1943 /* Returns the single statement in that NAME is used, excepting
1647 the looparound phi nodes contained in one of the chains. If there is no 1944 the looparound phi nodes contained in one of the chains. If there is no
1648 such statement, or more statements, NULL is returned. */ 1945 such statement, or more statements, NULL is returned. */
1649 1946
1650 static gimple 1947 static gimple *
1651 single_nonlooparound_use (tree name) 1948 single_nonlooparound_use (tree name)
1652 { 1949 {
1653 use_operand_p use; 1950 use_operand_p use;
1654 imm_use_iterator it; 1951 imm_use_iterator it;
1655 gimple stmt, ret = NULL; 1952 gimple *stmt, *ret = NULL;
1656 1953
1657 FOR_EACH_IMM_USE_FAST (use, it, name) 1954 FOR_EACH_IMM_USE_FAST (use, it, name)
1658 { 1955 {
1659 stmt = USE_STMT (use); 1956 stmt = USE_STMT (use);
1660 1957
1681 1978
1682 /* Remove statement STMT, as well as the chain of assignments in that it is 1979 /* Remove statement STMT, as well as the chain of assignments in that it is
1683 used. */ 1980 used. */
1684 1981
1685 static void 1982 static void
1686 remove_stmt (gimple stmt) 1983 remove_stmt (gimple *stmt)
1687 { 1984 {
1688 tree name; 1985 tree name;
1689 gimple next; 1986 gimple *next;
1690 gimple_stmt_iterator psi; 1987 gimple_stmt_iterator psi;
1691 1988
1692 if (gimple_code (stmt) == GIMPLE_PHI) 1989 if (gimple_code (stmt) == GIMPLE_PHI)
1693 { 1990 {
1694 name = PHI_RESULT (stmt); 1991 name = PHI_RESULT (stmt);
1695 next = single_nonlooparound_use (name); 1992 next = single_nonlooparound_use (name);
1993 reset_debug_uses (stmt);
1696 psi = gsi_for_stmt (stmt); 1994 psi = gsi_for_stmt (stmt);
1697 remove_phi_node (&psi, true); 1995 remove_phi_node (&psi, true);
1698 1996
1699 if (!next 1997 if (!next
1700 || !gimple_assign_ssa_name_copy_p (next) 1998 || !gimple_assign_ssa_name_copy_p (next)
1709 gimple_stmt_iterator bsi; 2007 gimple_stmt_iterator bsi;
1710 2008
1711 bsi = gsi_for_stmt (stmt); 2009 bsi = gsi_for_stmt (stmt);
1712 2010
1713 name = gimple_assign_lhs (stmt); 2011 name = gimple_assign_lhs (stmt);
1714 gcc_assert (TREE_CODE (name) == SSA_NAME); 2012 if (TREE_CODE (name) == SSA_NAME)
1715 2013 {
1716 next = single_nonlooparound_use (name); 2014 next = single_nonlooparound_use (name);
1717 2015 reset_debug_uses (stmt);
1718 mark_virtual_ops_for_renaming (stmt); 2016 }
2017 else
2018 {
2019 /* This is a store to be eliminated. */
2020 gcc_assert (gimple_vdef (stmt) != NULL);
2021 next = NULL;
2022 }
2023
2024 unlink_stmt_vdef (stmt);
1719 gsi_remove (&bsi, true); 2025 gsi_remove (&bsi, true);
1720 release_defs (stmt); 2026 release_defs (stmt);
1721 2027
1722 if (!next 2028 if (!next
1723 || !gimple_assign_ssa_name_copy_p (next) 2029 || !gimple_assign_ssa_name_copy_p (next)
1731 /* Perform the predictive commoning optimization for a chain CHAIN. 2037 /* Perform the predictive commoning optimization for a chain CHAIN.
1732 Uids of the newly created temporary variables are marked in TMP_VARS.*/ 2038 Uids of the newly created temporary variables are marked in TMP_VARS.*/
1733 2039
1734 static void 2040 static void
1735 execute_pred_commoning_chain (struct loop *loop, chain_p chain, 2041 execute_pred_commoning_chain (struct loop *loop, chain_p chain,
1736 bitmap tmp_vars) 2042 bitmap tmp_vars)
1737 { 2043 {
1738 unsigned i; 2044 unsigned i, n;
1739 dref a, root; 2045 dref a;
1740 tree var; 2046 tree var;
2047 bool in_lhs;
1741 2048
1742 if (chain->combined) 2049 if (chain->combined)
1743 { 2050 {
1744 /* For combined chains, just remove the statements that are used to 2051 /* For combined chains, just remove the statements that are used to
1745 compute the values of the expression (except for the root one). */ 2052 compute the values of the expression (except for the root one).
1746 for (i = 1; VEC_iterate (dref, chain->refs, i, a); i++) 2053 We delay this until after all chains are processed. */
1747 remove_stmt (a->stmt); 2054 }
2055 else if (chain->type == CT_STORE_STORE)
2056 {
2057 if (chain->length > 0)
2058 {
2059 if (chain->inv_store_elimination)
2060 {
2061 /* If dead stores in this chain only store loop invariant
2062 values, we can simply record the invariant value and use
2063 it directly after loop. */
2064 initialize_root_vars_store_elim_1 (chain);
2065 }
2066 else
2067 {
2068 /* If dead stores in this chain store loop variant values,
2069 we need to set up the variables by loading from memory
2070 before loop and propagating it with PHI nodes. */
2071 initialize_root_vars_store_elim_2 (loop, chain, tmp_vars);
2072 }
2073
2074 /* For inter-iteration store elimination chain, stores at each
2075 distance in loop's last (chain->length - 1) iterations can't
2076 be eliminated, because there is no following killing store.
2077 We need to generate these stores after loop. */
2078 finalize_eliminated_stores (loop, chain);
2079 }
2080
2081 /* Eliminate the stores killed by following store. */
2082 n = chain->refs.length ();
2083 for (i = 0; i < n - 1; i++)
2084 remove_stmt (chain->refs[i]->stmt);
1748 } 2085 }
1749 else 2086 else
1750 { 2087 {
1751 /* For non-combined chains, set up the variables that hold its value, 2088 /* For non-combined chains, set up the variables that hold its value. */
1752 and replace the uses of the original references by these 2089 initialize_root_vars (loop, chain, tmp_vars);
1753 variables. */ 2090 a = get_chain_root (chain);
1754 root = get_chain_root (chain); 2091 in_lhs = (chain->type == CT_STORE_LOAD
1755 mark_virtual_ops_for_renaming (root->stmt); 2092 || chain->type == CT_COMBINATION);
1756 2093 replace_ref_with (a->stmt, chain->vars[chain->length], true, in_lhs);
1757 initialize_root (loop, chain, tmp_vars); 2094
1758 for (i = 1; VEC_iterate (dref, chain->refs, i, a); i++) 2095 /* Replace the uses of the original references by these variables. */
1759 { 2096 for (i = 1; chain->refs.iterate (i, &a); i++)
1760 mark_virtual_ops_for_renaming (a->stmt); 2097 {
1761 var = VEC_index (tree, chain->vars, chain->length - a->distance); 2098 var = chain->vars[chain->length - a->distance];
1762 replace_ref_with (a->stmt, var, false, false); 2099 replace_ref_with (a->stmt, var, false, false);
1763 } 2100 }
1764 } 2101 }
1765 } 2102 }
1766 2103
1767 /* Determines the unroll factor necessary to remove as many temporary variable 2104 /* Determines the unroll factor necessary to remove as many temporary variable
1768 copies as possible. CHAINS is the list of chains that will be 2105 copies as possible. CHAINS is the list of chains that will be
1769 optimized. */ 2106 optimized. */
1770 2107
1771 static unsigned 2108 static unsigned
1772 determine_unroll_factor (VEC (chain_p, heap) *chains) 2109 determine_unroll_factor (vec<chain_p> chains)
1773 { 2110 {
1774 chain_p chain; 2111 chain_p chain;
1775 unsigned factor = 1, af, nfactor, i; 2112 unsigned factor = 1, af, nfactor, i;
1776 unsigned max = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES); 2113 unsigned max = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
1777 2114
1778 FOR_EACH_VEC_ELT (chain_p, chains, i, chain) 2115 FOR_EACH_VEC_ELT (chains, i, chain)
1779 { 2116 {
1780 if (chain->type == CT_INVARIANT || chain->combined) 2117 if (chain->type == CT_INVARIANT)
1781 continue; 2118 continue;
2119 /* For now we can't handle unrolling when eliminating stores. */
2120 else if (chain->type == CT_STORE_STORE)
2121 return 1;
2122
2123 if (chain->combined)
2124 {
2125 /* For combined chains, we can't handle unrolling if we replace
2126 looparound PHIs. */
2127 dref a;
2128 unsigned j;
2129 for (j = 1; chain->refs.iterate (j, &a); j++)
2130 if (gimple_code (a->stmt) == GIMPLE_PHI)
2131 return 1;
2132 continue;
2133 }
1782 2134
1783 /* The best unroll factor for this chain is equal to the number of 2135 /* The best unroll factor for this chain is equal to the number of
1784 temporary variables that we create for it. */ 2136 temporary variables that we create for it. */
1785 af = chain->length; 2137 af = chain->length;
1786 if (chain->has_max_use_after) 2138 if (chain->has_max_use_after)
1796 2148
1797 /* Perform the predictive commoning optimization for CHAINS. 2149 /* Perform the predictive commoning optimization for CHAINS.
1798 Uids of the newly created temporary variables are marked in TMP_VARS. */ 2150 Uids of the newly created temporary variables are marked in TMP_VARS. */
1799 2151
1800 static void 2152 static void
1801 execute_pred_commoning (struct loop *loop, VEC (chain_p, heap) *chains, 2153 execute_pred_commoning (struct loop *loop, vec<chain_p> chains,
1802 bitmap tmp_vars) 2154 bitmap tmp_vars)
1803 { 2155 {
1804 chain_p chain; 2156 chain_p chain;
1805 unsigned i; 2157 unsigned i;
1806 2158
1807 FOR_EACH_VEC_ELT (chain_p, chains, i, chain) 2159 FOR_EACH_VEC_ELT (chains, i, chain)
1808 { 2160 {
1809 if (chain->type == CT_INVARIANT) 2161 if (chain->type == CT_INVARIANT)
1810 execute_load_motion (loop, chain, tmp_vars); 2162 execute_load_motion (loop, chain, tmp_vars);
1811 else 2163 else
1812 execute_pred_commoning_chain (loop, chain, tmp_vars); 2164 execute_pred_commoning_chain (loop, chain, tmp_vars);
1813 } 2165 }
1814 2166
2167 FOR_EACH_VEC_ELT (chains, i, chain)
2168 {
2169 if (chain->type == CT_INVARIANT)
2170 ;
2171 else if (chain->combined)
2172 {
2173 /* For combined chains, just remove the statements that are used to
2174 compute the values of the expression (except for the root one). */
2175 dref a;
2176 unsigned j;
2177 for (j = 1; chain->refs.iterate (j, &a); j++)
2178 remove_stmt (a->stmt);
2179 }
2180 }
2181
1815 update_ssa (TODO_update_ssa_only_virtuals); 2182 update_ssa (TODO_update_ssa_only_virtuals);
1816 } 2183 }
1817 2184
1818 /* For each reference in CHAINS, if its defining statement is 2185 /* For each reference in CHAINS, if its defining statement is
1819 phi node, record the ssa name that is defined by it. */ 2186 phi node, record the ssa name that is defined by it. */
1820 2187
1821 static void 2188 static void
1822 replace_phis_by_defined_names (VEC (chain_p, heap) *chains) 2189 replace_phis_by_defined_names (vec<chain_p> chains)
1823 { 2190 {
1824 chain_p chain; 2191 chain_p chain;
1825 dref a; 2192 dref a;
1826 unsigned i, j; 2193 unsigned i, j;
1827 2194
1828 FOR_EACH_VEC_ELT (chain_p, chains, i, chain) 2195 FOR_EACH_VEC_ELT (chains, i, chain)
1829 FOR_EACH_VEC_ELT (dref, chain->refs, j, a) 2196 FOR_EACH_VEC_ELT (chain->refs, j, a)
1830 { 2197 {
1831 if (gimple_code (a->stmt) == GIMPLE_PHI) 2198 if (gimple_code (a->stmt) == GIMPLE_PHI)
1832 { 2199 {
1833 a->name_defined_by_phi = PHI_RESULT (a->stmt); 2200 a->name_defined_by_phi = PHI_RESULT (a->stmt);
1834 a->stmt = NULL; 2201 a->stmt = NULL;
1838 2205
1839 /* For each reference in CHAINS, if name_defined_by_phi is not 2206 /* For each reference in CHAINS, if name_defined_by_phi is not
1840 NULL, use it to set the stmt field. */ 2207 NULL, use it to set the stmt field. */
1841 2208
1842 static void 2209 static void
1843 replace_names_by_phis (VEC (chain_p, heap) *chains) 2210 replace_names_by_phis (vec<chain_p> chains)
1844 { 2211 {
1845 chain_p chain; 2212 chain_p chain;
1846 dref a; 2213 dref a;
1847 unsigned i, j; 2214 unsigned i, j;
1848 2215
1849 FOR_EACH_VEC_ELT (chain_p, chains, i, chain) 2216 FOR_EACH_VEC_ELT (chains, i, chain)
1850 FOR_EACH_VEC_ELT (dref, chain->refs, j, a) 2217 FOR_EACH_VEC_ELT (chain->refs, j, a)
1851 if (a->stmt == NULL) 2218 if (a->stmt == NULL)
1852 { 2219 {
1853 a->stmt = SSA_NAME_DEF_STMT (a->name_defined_by_phi); 2220 a->stmt = SSA_NAME_DEF_STMT (a->name_defined_by_phi);
1854 gcc_assert (gimple_code (a->stmt) == GIMPLE_PHI); 2221 gcc_assert (gimple_code (a->stmt) == GIMPLE_PHI);
1855 a->name_defined_by_phi = NULL_TREE; 2222 a->name_defined_by_phi = NULL_TREE;
1859 /* Wrapper over execute_pred_commoning, to pass it as a callback 2226 /* Wrapper over execute_pred_commoning, to pass it as a callback
1860 to tree_transform_and_unroll_loop. */ 2227 to tree_transform_and_unroll_loop. */
1861 2228
1862 struct epcc_data 2229 struct epcc_data
1863 { 2230 {
1864 VEC (chain_p, heap) *chains; 2231 vec<chain_p> chains;
1865 bitmap tmp_vars; 2232 bitmap tmp_vars;
1866 }; 2233 };
1867 2234
1868 static void 2235 static void
1869 execute_pred_commoning_cbck (struct loop *loop, void *data) 2236 execute_pred_commoning_cbck (struct loop *loop, void *data)
1882 the header of the LOOP. */ 2249 the header of the LOOP. */
1883 2250
1884 static void 2251 static void
1885 base_names_in_chain_on (struct loop *loop, tree name, tree var) 2252 base_names_in_chain_on (struct loop *loop, tree name, tree var)
1886 { 2253 {
1887 gimple stmt, phi; 2254 gimple *stmt, *phi;
1888 imm_use_iterator iter; 2255 imm_use_iterator iter;
1889 2256
1890 SSA_NAME_VAR (name) = var; 2257 replace_ssa_name_symbol (name, var);
1891 2258
1892 while (1) 2259 while (1)
1893 { 2260 {
1894 phi = NULL; 2261 phi = NULL;
1895 FOR_EACH_IMM_USE_STMT (stmt, iter, name) 2262 FOR_EACH_IMM_USE_STMT (stmt, iter, name)
1903 } 2270 }
1904 if (!phi) 2271 if (!phi)
1905 return; 2272 return;
1906 2273
1907 name = PHI_RESULT (phi); 2274 name = PHI_RESULT (phi);
1908 SSA_NAME_VAR (name) = var; 2275 replace_ssa_name_symbol (name, var);
1909 } 2276 }
1910 } 2277 }
1911 2278
1912 /* Given an unrolled LOOP after predictive commoning, remove the 2279 /* Given an unrolled LOOP after predictive commoning, remove the
1913 register copies arising from phi nodes by changing the base 2280 register copies arising from phi nodes by changing the base
1916 2283
1917 static void 2284 static void
1918 eliminate_temp_copies (struct loop *loop, bitmap tmp_vars) 2285 eliminate_temp_copies (struct loop *loop, bitmap tmp_vars)
1919 { 2286 {
1920 edge e; 2287 edge e;
1921 gimple phi, stmt; 2288 gphi *phi;
2289 gimple *stmt;
1922 tree name, use, var; 2290 tree name, use, var;
1923 gimple_stmt_iterator psi; 2291 gphi_iterator psi;
1924 2292
1925 e = loop_latch_edge (loop); 2293 e = loop_latch_edge (loop);
1926 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi)) 2294 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1927 { 2295 {
1928 phi = gsi_stmt (psi); 2296 phi = psi.phi ();
1929 name = PHI_RESULT (phi); 2297 name = PHI_RESULT (phi);
1930 var = SSA_NAME_VAR (name); 2298 var = SSA_NAME_VAR (name);
1931 if (!bitmap_bit_p (tmp_vars, DECL_UID (var))) 2299 if (!var || !bitmap_bit_p (tmp_vars, DECL_UID (var)))
1932 continue; 2300 continue;
1933 use = PHI_ARG_DEF_FROM_EDGE (phi, e); 2301 use = PHI_ARG_DEF_FROM_EDGE (phi, e);
1934 gcc_assert (TREE_CODE (use) == SSA_NAME); 2302 gcc_assert (TREE_CODE (use) == SSA_NAME);
1935 2303
1936 /* Base all the ssa names in the ud and du chain of NAME on VAR. */ 2304 /* Base all the ssa names in the ud and du chain of NAME on VAR. */
1963 2331
1964 /* Returns the modify statement that uses NAME. Skips over assignment 2332 /* Returns the modify statement that uses NAME. Skips over assignment
1965 statements, NAME is replaced with the actual name used in the returned 2333 statements, NAME is replaced with the actual name used in the returned
1966 statement. */ 2334 statement. */
1967 2335
1968 static gimple 2336 static gimple *
1969 find_use_stmt (tree *name) 2337 find_use_stmt (tree *name)
1970 { 2338 {
1971 gimple stmt; 2339 gimple *stmt;
1972 tree rhs, lhs; 2340 tree rhs, lhs;
1973 2341
1974 /* Skip over assignments. */ 2342 /* Skip over assignments. */
1975 while (1) 2343 while (1)
1976 { 2344 {
2016 2384
2017 /* If the operation used in STMT is associative and commutative, go through the 2385 /* If the operation used in STMT is associative and commutative, go through the
2018 tree of the same operations and returns its root. Distance to the root 2386 tree of the same operations and returns its root. Distance to the root
2019 is stored in DISTANCE. */ 2387 is stored in DISTANCE. */
2020 2388
2021 static gimple 2389 static gimple *
2022 find_associative_operation_root (gimple stmt, unsigned *distance) 2390 find_associative_operation_root (gimple *stmt, unsigned *distance)
2023 { 2391 {
2024 tree lhs; 2392 tree lhs;
2025 gimple next; 2393 gimple *next;
2026 enum tree_code code = gimple_assign_rhs_code (stmt); 2394 enum tree_code code = gimple_assign_rhs_code (stmt);
2027 tree type = TREE_TYPE (gimple_assign_lhs (stmt)); 2395 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2028 unsigned dist = 0; 2396 unsigned dist = 0;
2029 2397
2030 if (!may_reassociate_p (type, code)) 2398 if (!may_reassociate_p (type, code))
2053 is no such statement, returns NULL_TREE. In case the operation used on 2421 is no such statement, returns NULL_TREE. In case the operation used on
2054 NAME1 and NAME2 is associative and commutative, returns the root of the 2422 NAME1 and NAME2 is associative and commutative, returns the root of the
2055 tree formed by this operation instead of the statement that uses NAME1 or 2423 tree formed by this operation instead of the statement that uses NAME1 or
2056 NAME2. */ 2424 NAME2. */
2057 2425
2058 static gimple 2426 static gimple *
2059 find_common_use_stmt (tree *name1, tree *name2) 2427 find_common_use_stmt (tree *name1, tree *name2)
2060 { 2428 {
2061 gimple stmt1, stmt2; 2429 gimple *stmt1, *stmt2;
2062 2430
2063 stmt1 = find_use_stmt (name1); 2431 stmt1 = find_use_stmt (name1);
2064 if (!stmt1) 2432 if (!stmt1)
2065 return NULL; 2433 return NULL;
2066 2434
2091 { 2459 {
2092 enum tree_code acode; 2460 enum tree_code acode;
2093 bool aswap; 2461 bool aswap;
2094 tree atype; 2462 tree atype;
2095 tree name1, name2; 2463 tree name1, name2;
2096 gimple stmt; 2464 gimple *stmt;
2097 2465
2098 name1 = name_for_ref (r1); 2466 name1 = name_for_ref (r1);
2099 name2 = name_for_ref (r2); 2467 name2 = name_for_ref (r2);
2100 gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE); 2468 gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE);
2101 2469
2102 stmt = find_common_use_stmt (&name1, &name2); 2470 stmt = find_common_use_stmt (&name1, &name2);
2103 2471
2104 if (!stmt) 2472 if (!stmt
2473 /* A simple post-dominance check - make sure the combination
2474 is executed under the same condition as the references. */
2475 || (gimple_bb (stmt) != gimple_bb (r1->stmt)
2476 && gimple_bb (stmt) != gimple_bb (r2->stmt)))
2105 return false; 2477 return false;
2106 2478
2107 acode = gimple_assign_rhs_code (stmt); 2479 acode = gimple_assign_rhs_code (stmt);
2108 aswap = (!commutative_tree_code (acode) 2480 aswap = (!commutative_tree_code (acode)
2109 && gimple_assign_rhs1 (stmt) != name1); 2481 && gimple_assign_rhs1 (stmt) != name1);
2124 2496
2125 /* Remove OP from the operation on rhs of STMT, and replace STMT with 2497 /* Remove OP from the operation on rhs of STMT, and replace STMT with
2126 an assignment of the remaining operand. */ 2498 an assignment of the remaining operand. */
2127 2499
2128 static void 2500 static void
2129 remove_name_from_operation (gimple stmt, tree op) 2501 remove_name_from_operation (gimple *stmt, tree op)
2130 { 2502 {
2131 tree other_op; 2503 tree other_op;
2132 gimple_stmt_iterator si; 2504 gimple_stmt_iterator si;
2133 2505
2134 gcc_assert (is_gimple_assign (stmt)); 2506 gcc_assert (is_gimple_assign (stmt));
2146 2518
2147 update_stmt (stmt); 2519 update_stmt (stmt);
2148 } 2520 }
2149 2521
2150 /* Reassociates the expression in that NAME1 and NAME2 are used so that they 2522 /* Reassociates the expression in that NAME1 and NAME2 are used so that they
2151 are combined in a single statement, and returns this statement. */ 2523 are combined in a single statement, and returns this statement. Note the
2152 2524 statement is inserted before INSERT_BEFORE if it's not NULL. */
2153 static gimple 2525
2154 reassociate_to_the_same_stmt (tree name1, tree name2) 2526 static gimple *
2155 { 2527 reassociate_to_the_same_stmt (tree name1, tree name2, gimple *insert_before)
2156 gimple stmt1, stmt2, root1, root2, s1, s2; 2528 {
2157 gimple new_stmt, tmp_stmt; 2529 gimple *stmt1, *stmt2, *root1, *root2, *s1, *s2;
2530 gassign *new_stmt, *tmp_stmt;
2158 tree new_name, tmp_name, var, r1, r2; 2531 tree new_name, tmp_name, var, r1, r2;
2159 unsigned dist1, dist2; 2532 unsigned dist1, dist2;
2160 enum tree_code code; 2533 enum tree_code code;
2161 tree type = TREE_TYPE (name1); 2534 tree type = TREE_TYPE (name1);
2162 gimple_stmt_iterator bsi; 2535 gimple_stmt_iterator bsi;
2204 remove_name_from_operation (stmt2, name2); 2577 remove_name_from_operation (stmt2, name2);
2205 2578
2206 /* Insert the new statement combining NAME1 and NAME2 before S1, and 2579 /* Insert the new statement combining NAME1 and NAME2 before S1, and
2207 combine it with the rhs of S1. */ 2580 combine it with the rhs of S1. */
2208 var = create_tmp_reg (type, "predreastmp"); 2581 var = create_tmp_reg (type, "predreastmp");
2209 add_referenced_var (var); 2582 new_name = make_ssa_name (var);
2210 new_name = make_ssa_name (var, NULL); 2583 new_stmt = gimple_build_assign (new_name, code, name1, name2);
2211 new_stmt = gimple_build_assign_with_ops (code, new_name, name1, name2); 2584 if (insert_before && stmt_dominates_stmt_p (insert_before, s1))
2585 bsi = gsi_for_stmt (insert_before);
2586 else
2587 bsi = gsi_for_stmt (s1);
2588
2589 gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
2212 2590
2213 var = create_tmp_reg (type, "predreastmp"); 2591 var = create_tmp_reg (type, "predreastmp");
2214 add_referenced_var (var); 2592 tmp_name = make_ssa_name (var);
2215 tmp_name = make_ssa_name (var, NULL);
2216 2593
2217 /* Rhs of S1 may now be either a binary expression with operation 2594 /* Rhs of S1 may now be either a binary expression with operation
2218 CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1, 2595 CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
2219 so that name1 or name2 was removed from it). */ 2596 so that name1 or name2 was removed from it). */
2220 tmp_stmt = gimple_build_assign_with_ops (gimple_assign_rhs_code (s1), 2597 tmp_stmt = gimple_build_assign (tmp_name, gimple_assign_rhs_code (s1),
2221 tmp_name, 2598 gimple_assign_rhs1 (s1),
2222 gimple_assign_rhs1 (s1), 2599 gimple_assign_rhs2 (s1));
2223 gimple_assign_rhs2 (s1));
2224 2600
2225 bsi = gsi_for_stmt (s1); 2601 bsi = gsi_for_stmt (s1);
2226 gimple_assign_set_rhs_with_ops (&bsi, code, new_name, tmp_name); 2602 gimple_assign_set_rhs_with_ops (&bsi, code, new_name, tmp_name);
2227 s1 = gsi_stmt (bsi); 2603 s1 = gsi_stmt (bsi);
2228 update_stmt (s1); 2604 update_stmt (s1);
2229 2605
2230 gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
2231 gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT); 2606 gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT);
2232 2607
2233 return new_stmt; 2608 return new_stmt;
2234 } 2609 }
2235 2610
2236 /* Returns the statement that combines references R1 and R2. In case R1 2611 /* Returns the statement that combines references R1 and R2. In case R1
2237 and R2 are not used in the same statement, but they are used with an 2612 and R2 are not used in the same statement, but they are used with an
2238 associative and commutative operation in the same expression, reassociate 2613 associative and commutative operation in the same expression, reassociate
2239 the expression so that they are used in the same statement. */ 2614 the expression so that they are used in the same statement. The combined
2240 2615 statement is inserted before INSERT_BEFORE if it's not NULL. */
2241 static gimple 2616
2242 stmt_combining_refs (dref r1, dref r2) 2617 static gimple *
2243 { 2618 stmt_combining_refs (dref r1, dref r2, gimple *insert_before)
2244 gimple stmt1, stmt2; 2619 {
2620 gimple *stmt1, *stmt2;
2245 tree name1 = name_for_ref (r1); 2621 tree name1 = name_for_ref (r1);
2246 tree name2 = name_for_ref (r2); 2622 tree name2 = name_for_ref (r2);
2247 2623
2248 stmt1 = find_use_stmt (&name1); 2624 stmt1 = find_use_stmt (&name1);
2249 stmt2 = find_use_stmt (&name2); 2625 stmt2 = find_use_stmt (&name2);
2250 if (stmt1 == stmt2) 2626 if (stmt1 == stmt2)
2251 return stmt1; 2627 return stmt1;
2252 2628
2253 return reassociate_to_the_same_stmt (name1, name2); 2629 return reassociate_to_the_same_stmt (name1, name2, insert_before);
2254 } 2630 }
2255 2631
2256 /* Tries to combine chains CH1 and CH2 together. If this succeeds, the 2632 /* Tries to combine chains CH1 and CH2 together. If this succeeds, the
2257 description of the new chain is returned, otherwise we return NULL. */ 2633 description of the new chain is returned, otherwise we return NULL. */
2258 2634
2261 { 2637 {
2262 dref r1, r2, nw; 2638 dref r1, r2, nw;
2263 enum tree_code op = ERROR_MARK; 2639 enum tree_code op = ERROR_MARK;
2264 bool swap = false; 2640 bool swap = false;
2265 chain_p new_chain; 2641 chain_p new_chain;
2266 unsigned i; 2642 int i, j, num;
2267 gimple root_stmt; 2643 gimple *root_stmt;
2268 tree rslt_type = NULL_TREE; 2644 tree rslt_type = NULL_TREE;
2269 2645
2270 if (ch1 == ch2) 2646 if (ch1 == ch2)
2271 return NULL; 2647 return NULL;
2272 if (ch1->length != ch2->length) 2648 if (ch1->length != ch2->length)
2273 return NULL; 2649 return NULL;
2274 2650
2275 if (VEC_length (dref, ch1->refs) != VEC_length (dref, ch2->refs)) 2651 if (ch1->refs.length () != ch2->refs.length ())
2276 return NULL; 2652 return NULL;
2277 2653
2278 for (i = 0; (VEC_iterate (dref, ch1->refs, i, r1) 2654 for (i = 0; (ch1->refs.iterate (i, &r1)
2279 && VEC_iterate (dref, ch2->refs, i, r2)); i++) 2655 && ch2->refs.iterate (i, &r2)); i++)
2280 { 2656 {
2281 if (r1->distance != r2->distance) 2657 if (r1->distance != r2->distance)
2282 return NULL; 2658 return NULL;
2283 2659
2284 if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type)) 2660 if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type))
2285 return NULL; 2661 return NULL;
2286 } 2662 }
2287 2663
2664 ch1->combined = true;
2665 ch2->combined = true;
2666
2288 if (swap) 2667 if (swap)
2289 { 2668 std::swap (ch1, ch2);
2290 chain_p tmp = ch1;
2291 ch1 = ch2;
2292 ch2 = tmp;
2293 }
2294 2669
2295 new_chain = XCNEW (struct chain); 2670 new_chain = XCNEW (struct chain);
2296 new_chain->type = CT_COMBINATION; 2671 new_chain->type = CT_COMBINATION;
2297 new_chain->op = op; 2672 new_chain->op = op;
2298 new_chain->ch1 = ch1; 2673 new_chain->ch1 = ch1;
2299 new_chain->ch2 = ch2; 2674 new_chain->ch2 = ch2;
2300 new_chain->rslt_type = rslt_type; 2675 new_chain->rslt_type = rslt_type;
2301 new_chain->length = ch1->length; 2676 new_chain->length = ch1->length;
2302 2677
2303 for (i = 0; (VEC_iterate (dref, ch1->refs, i, r1) 2678 gimple *insert = NULL;
2304 && VEC_iterate (dref, ch2->refs, i, r2)); i++) 2679 num = ch1->refs.length ();
2305 { 2680 i = (new_chain->length == 0) ? num - 1 : 0;
2681 j = (new_chain->length == 0) ? -1 : 1;
2682 /* For ZERO length chain, process refs in reverse order so that dominant
2683 position is ready when it comes to the root ref.
2684 For non-ZERO length chain, process refs in order. See PR79663. */
2685 for (; num > 0; num--, i += j)
2686 {
2687 r1 = ch1->refs[i];
2688 r2 = ch2->refs[i];
2306 nw = XCNEW (struct dref_d); 2689 nw = XCNEW (struct dref_d);
2307 nw->stmt = stmt_combining_refs (r1, r2);
2308 nw->distance = r1->distance; 2690 nw->distance = r1->distance;
2309 2691
2310 VEC_safe_push (dref, heap, new_chain->refs, nw); 2692 /* For ZERO length chain, insert combined stmt of root ref at dominant
2693 position. */
2694 nw->stmt = stmt_combining_refs (r1, r2, i == 0 ? insert : NULL);
2695 /* For ZERO length chain, record dominant position where combined stmt
2696 of root ref should be inserted. In this case, though all root refs
2697 dominate following ones, it's possible that combined stmt doesn't.
2698 See PR70754. */
2699 if (new_chain->length == 0
2700 && (insert == NULL || stmt_dominates_stmt_p (nw->stmt, insert)))
2701 insert = nw->stmt;
2702
2703 new_chain->refs.safe_push (nw);
2704 }
2705 if (new_chain->length == 0)
2706 {
2707 /* Restore the order for ZERO length chain's refs. */
2708 num = new_chain->refs.length () >> 1;
2709 for (i = 0, j = new_chain->refs.length () - 1; i < num; i++, j--)
2710 std::swap (new_chain->refs[i], new_chain->refs[j]);
2711
2712 /* For ZERO length chain, has_max_use_after must be true since root
2713 combined stmt must dominates others. */
2714 new_chain->has_max_use_after = true;
2715 return new_chain;
2311 } 2716 }
2312 2717
2313 new_chain->has_max_use_after = false; 2718 new_chain->has_max_use_after = false;
2314 root_stmt = get_chain_root (new_chain)->stmt; 2719 root_stmt = get_chain_root (new_chain)->stmt;
2315 for (i = 1; VEC_iterate (dref, new_chain->refs, i, nw); i++) 2720 for (i = 1; new_chain->refs.iterate (i, &nw); i++)
2316 { 2721 {
2317 if (nw->distance == new_chain->length 2722 if (nw->distance == new_chain->length
2318 && !stmt_dominates_stmt_p (nw->stmt, root_stmt)) 2723 && !stmt_dominates_stmt_p (nw->stmt, root_stmt))
2319 { 2724 {
2320 new_chain->has_max_use_after = true; 2725 new_chain->has_max_use_after = true;
2321 break; 2726 break;
2322 } 2727 }
2323 } 2728 }
2324 2729
2325 ch1->combined = true;
2326 ch2->combined = true;
2327 return new_chain; 2730 return new_chain;
2328 } 2731 }
2329 2732
2330 /* Try to combine the CHAINS. */ 2733 /* Try to combine the CHAINS. */
2331 2734
2332 static void 2735 static void
2333 try_combine_chains (VEC (chain_p, heap) **chains) 2736 try_combine_chains (vec<chain_p> *chains)
2334 { 2737 {
2335 unsigned i, j; 2738 unsigned i, j;
2336 chain_p ch1, ch2, cch; 2739 chain_p ch1, ch2, cch;
2337 VEC (chain_p, heap) *worklist = NULL; 2740 auto_vec<chain_p> worklist;
2338 2741
2339 FOR_EACH_VEC_ELT (chain_p, *chains, i, ch1) 2742 FOR_EACH_VEC_ELT (*chains, i, ch1)
2340 if (chain_can_be_combined_p (ch1)) 2743 if (chain_can_be_combined_p (ch1))
2341 VEC_safe_push (chain_p, heap, worklist, ch1); 2744 worklist.safe_push (ch1);
2342 2745
2343 while (!VEC_empty (chain_p, worklist)) 2746 while (!worklist.is_empty ())
2344 { 2747 {
2345 ch1 = VEC_pop (chain_p, worklist); 2748 ch1 = worklist.pop ();
2346 if (!chain_can_be_combined_p (ch1)) 2749 if (!chain_can_be_combined_p (ch1))
2347 continue; 2750 continue;
2348 2751
2349 FOR_EACH_VEC_ELT (chain_p, *chains, j, ch2) 2752 FOR_EACH_VEC_ELT (*chains, j, ch2)
2350 { 2753 {
2351 if (!chain_can_be_combined_p (ch2)) 2754 if (!chain_can_be_combined_p (ch2))
2352 continue; 2755 continue;
2353 2756
2354 cch = combine_chains (ch1, ch2); 2757 cch = combine_chains (ch1, ch2);
2355 if (cch) 2758 if (cch)
2356 { 2759 {
2357 VEC_safe_push (chain_p, heap, worklist, cch); 2760 worklist.safe_push (cch);
2358 VEC_safe_push (chain_p, heap, *chains, cch); 2761 chains->safe_push (cch);
2359 break; 2762 break;
2360 } 2763 }
2361 } 2764 }
2362 } 2765 }
2766 }
2767
2768 /* Prepare initializers for store elimination CHAIN in LOOP. Returns false
2769 if this is impossible because one of these initializers may trap, true
2770 otherwise. */
2771
2772 static bool
2773 prepare_initializers_chain_store_elim (struct loop *loop, chain_p chain)
2774 {
2775 unsigned i, n = chain->length;
2776
2777 /* For now we can't eliminate stores if some of them are conditional
2778 executed. */
2779 if (!chain->all_always_accessed)
2780 return false;
2781
2782 /* Nothing to intialize for intra-iteration store elimination. */
2783 if (n == 0 && chain->type == CT_STORE_STORE)
2784 return true;
2785
2786 /* For store elimination chain, there is nothing to initialize if stores
2787 to be eliminated only store loop invariant values into memory. */
2788 if (chain->type == CT_STORE_STORE
2789 && is_inv_store_elimination_chain (loop, chain))
2790 {
2791 chain->inv_store_elimination = true;
2792 return true;
2793 }
2794
2795 chain->inits.create (n);
2796 chain->inits.safe_grow_cleared (n);
2797
2798 /* For store eliminatin chain like below:
2799
2800 for (i = 0; i < len; i++)
2801 {
2802 a[i] = 1;
2803 // a[i + 1] = ...
2804 a[i + 2] = 3;
2805 }
2806
2807 store to a[i + 1] is missed in loop body, it acts like bubbles. The
2808 content of a[i + 1] remain the same if the loop iterates fewer times
2809 than chain->length. We need to set up root variables for such stores
2810 by loading from memory before loop. Note we only need to load bubble
2811 elements because loop body is guaranteed to be executed at least once
2812 after loop's preheader edge. */
2813 auto_vec<bool> bubbles;
2814 bubbles.safe_grow_cleared (n + 1);
2815 for (i = 0; i < chain->refs.length (); i++)
2816 bubbles[chain->refs[i]->distance] = true;
2817
2818 struct data_reference *dr = get_chain_root (chain)->ref;
2819 for (i = 0; i < n; i++)
2820 {
2821 if (bubbles[i])
2822 continue;
2823
2824 gimple_seq stmts = NULL;
2825
2826 tree init = ref_at_iteration (dr, (int) 0 - i, &stmts);
2827 if (stmts)
2828 gimple_seq_add_seq_without_update (&chain->init_seq, stmts);
2829
2830 chain->inits[i] = init;
2831 }
2832
2833 return true;
2363 } 2834 }
2364 2835
2365 /* Prepare initializers for CHAIN in LOOP. Returns false if this is 2836 /* Prepare initializers for CHAIN in LOOP. Returns false if this is
2366 impossible because one of these initializers may trap, true otherwise. */ 2837 impossible because one of these initializers may trap, true otherwise. */
2367 2838
2369 prepare_initializers_chain (struct loop *loop, chain_p chain) 2840 prepare_initializers_chain (struct loop *loop, chain_p chain)
2370 { 2841 {
2371 unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length; 2842 unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length;
2372 struct data_reference *dr = get_chain_root (chain)->ref; 2843 struct data_reference *dr = get_chain_root (chain)->ref;
2373 tree init; 2844 tree init;
2374 gimple_seq stmts;
2375 dref laref; 2845 dref laref;
2376 edge entry = loop_preheader_edge (loop); 2846 edge entry = loop_preheader_edge (loop);
2377 2847
2848 if (chain->type == CT_STORE_STORE)
2849 return prepare_initializers_chain_store_elim (loop, chain);
2850
2378 /* Find the initializers for the variables, and check that they cannot 2851 /* Find the initializers for the variables, and check that they cannot
2379 trap. */ 2852 trap. */
2380 chain->inits = VEC_alloc (tree, heap, n); 2853 chain->inits.create (n);
2381 for (i = 0; i < n; i++) 2854 for (i = 0; i < n; i++)
2382 VEC_quick_push (tree, chain->inits, NULL_TREE); 2855 chain->inits.quick_push (NULL_TREE);
2383 2856
2384 /* If we have replaced some looparound phi nodes, use their initializers 2857 /* If we have replaced some looparound phi nodes, use their initializers
2385 instead of creating our own. */ 2858 instead of creating our own. */
2386 FOR_EACH_VEC_ELT (dref, chain->refs, i, laref) 2859 FOR_EACH_VEC_ELT (chain->refs, i, laref)
2387 { 2860 {
2388 if (gimple_code (laref->stmt) != GIMPLE_PHI) 2861 if (gimple_code (laref->stmt) != GIMPLE_PHI)
2389 continue; 2862 continue;
2390 2863
2391 gcc_assert (laref->distance > 0); 2864 gcc_assert (laref->distance > 0);
2392 VEC_replace (tree, chain->inits, n - laref->distance, 2865 chain->inits[n - laref->distance]
2393 PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry)); 2866 = PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry);
2394 } 2867 }
2395 2868
2396 for (i = 0; i < n; i++) 2869 for (i = 0; i < n; i++)
2397 { 2870 {
2398 if (VEC_index (tree, chain->inits, i) != NULL_TREE) 2871 gimple_seq stmts = NULL;
2872
2873 if (chain->inits[i] != NULL_TREE)
2399 continue; 2874 continue;
2400 2875
2401 init = ref_at_iteration (loop, DR_REF (dr), (int) i - n); 2876 init = ref_at_iteration (dr, (int) i - n, &stmts);
2402 if (!init)
2403 return false;
2404
2405 if (!chain->all_always_accessed && tree_could_trap_p (init)) 2877 if (!chain->all_always_accessed && tree_could_trap_p (init))
2406 return false; 2878 {
2407 2879 gimple_seq_discard (stmts);
2408 init = force_gimple_operand (init, &stmts, false, NULL_TREE); 2880 return false;
2881 }
2882
2409 if (stmts) 2883 if (stmts)
2410 gsi_insert_seq_on_edge_immediate (entry, stmts); 2884 gimple_seq_add_seq_without_update (&chain->init_seq, stmts);
2411 2885
2412 VEC_replace (tree, chain->inits, i, init); 2886 chain->inits[i] = init;
2413 } 2887 }
2414 2888
2415 return true; 2889 return true;
2416 } 2890 }
2417 2891
2418 /* Prepare initializers for CHAINS in LOOP, and free chains that cannot 2892 /* Prepare initializers for CHAINS in LOOP, and free chains that cannot
2419 be used because the initializers might trap. */ 2893 be used because the initializers might trap. */
2420 2894
2421 static void 2895 static void
2422 prepare_initializers (struct loop *loop, VEC (chain_p, heap) *chains) 2896 prepare_initializers (struct loop *loop, vec<chain_p> chains)
2423 { 2897 {
2424 chain_p chain; 2898 chain_p chain;
2425 unsigned i; 2899 unsigned i;
2426 2900
2427 for (i = 0; i < VEC_length (chain_p, chains); ) 2901 for (i = 0; i < chains.length (); )
2428 { 2902 {
2429 chain = VEC_index (chain_p, chains, i); 2903 chain = chains[i];
2430 if (prepare_initializers_chain (loop, chain)) 2904 if (prepare_initializers_chain (loop, chain))
2431 i++; 2905 i++;
2432 else 2906 else
2433 { 2907 {
2434 release_chain (chain); 2908 release_chain (chain);
2435 VEC_unordered_remove (chain_p, chains, i); 2909 chains.unordered_remove (i);
2436 } 2910 }
2437 } 2911 }
2438 } 2912 }
2439 2913
2440 /* Performs predictive commoning for LOOP. Returns true if LOOP was 2914 /* Generates finalizer memory references for CHAIN in LOOP. Returns true
2441 unrolled. */ 2915 if finalizer code for CHAIN can be generated, otherwise false. */
2442 2916
2443 static bool 2917 static bool
2918 prepare_finalizers_chain (struct loop *loop, chain_p chain)
2919 {
2920 unsigned i, n = chain->length;
2921 struct data_reference *dr = get_chain_root (chain)->ref;
2922 tree fini, niters = number_of_latch_executions (loop);
2923
2924 /* For now we can't eliminate stores if some of them are conditional
2925 executed. */
2926 if (!chain->all_always_accessed)
2927 return false;
2928
2929 chain->finis.create (n);
2930 for (i = 0; i < n; i++)
2931 chain->finis.quick_push (NULL_TREE);
2932
2933 /* We never use looparound phi node for store elimination chains. */
2934
2935 /* Find the finalizers for the variables, and check that they cannot
2936 trap. */
2937 for (i = 0; i < n; i++)
2938 {
2939 gimple_seq stmts = NULL;
2940 gcc_assert (chain->finis[i] == NULL_TREE);
2941
2942 if (TREE_CODE (niters) != INTEGER_CST && TREE_CODE (niters) != SSA_NAME)
2943 {
2944 niters = unshare_expr (niters);
2945 niters = force_gimple_operand (niters, &stmts, true, NULL);
2946 if (stmts)
2947 {
2948 gimple_seq_add_seq_without_update (&chain->fini_seq, stmts);
2949 stmts = NULL;
2950 }
2951 }
2952 fini = ref_at_iteration (dr, (int) 0 - i, &stmts, niters);
2953 if (stmts)
2954 gimple_seq_add_seq_without_update (&chain->fini_seq, stmts);
2955
2956 chain->finis[i] = fini;
2957 }
2958
2959 return true;
2960 }
2961
2962 /* Generates finalizer memory reference for CHAINS in LOOP. Returns true
2963 if finalizer code generation for CHAINS breaks loop closed ssa form. */
2964
2965 static bool
2966 prepare_finalizers (struct loop *loop, vec<chain_p> chains)
2967 {
2968 chain_p chain;
2969 unsigned i;
2970 bool loop_closed_ssa = false;
2971
2972 for (i = 0; i < chains.length ();)
2973 {
2974 chain = chains[i];
2975
2976 /* Finalizer is only necessary for inter-iteration store elimination
2977 chains. */
2978 if (chain->length == 0 || chain->type != CT_STORE_STORE)
2979 {
2980 i++;
2981 continue;
2982 }
2983
2984 if (prepare_finalizers_chain (loop, chain))
2985 {
2986 i++;
2987 /* Be conservative, assume loop closed ssa form is corrupted
2988 by store-store chain. Though it's not always the case if
2989 eliminated stores only store loop invariant values into
2990 memory. */
2991 loop_closed_ssa = true;
2992 }
2993 else
2994 {
2995 release_chain (chain);
2996 chains.unordered_remove (i);
2997 }
2998 }
2999 return loop_closed_ssa;
3000 }
3001
3002 /* Insert all initializing gimple stmts into loop's entry edge. */
3003
3004 static void
3005 insert_init_seqs (struct loop *loop, vec<chain_p> chains)
3006 {
3007 unsigned i;
3008 edge entry = loop_preheader_edge (loop);
3009
3010 for (i = 0; i < chains.length (); ++i)
3011 if (chains[i]->init_seq)
3012 {
3013 gsi_insert_seq_on_edge_immediate (entry, chains[i]->init_seq);
3014 chains[i]->init_seq = NULL;
3015 }
3016 }
3017
3018 /* Performs predictive commoning for LOOP. Sets bit 1<<0 of return value
3019 if LOOP was unrolled; Sets bit 1<<1 of return value if loop closed ssa
3020 form was corrupted. */
3021
3022 static unsigned
2444 tree_predictive_commoning_loop (struct loop *loop) 3023 tree_predictive_commoning_loop (struct loop *loop)
2445 { 3024 {
2446 VEC (loop_p, heap) *loop_nest; 3025 vec<data_reference_p> datarefs;
2447 VEC (data_reference_p, heap) *datarefs; 3026 vec<ddr_p> dependences;
2448 VEC (ddr_p, heap) *dependences;
2449 struct component *components; 3027 struct component *components;
2450 VEC (chain_p, heap) *chains = NULL; 3028 vec<chain_p> chains = vNULL;
2451 unsigned unroll_factor; 3029 unsigned unroll_factor;
2452 struct tree_niter_desc desc; 3030 struct tree_niter_desc desc;
2453 bool unroll = false; 3031 bool unroll = false, loop_closed_ssa = false;
2454 edge exit; 3032 edge exit;
2455 bitmap tmp_vars;
2456 3033
2457 if (dump_file && (dump_flags & TDF_DETAILS)) 3034 if (dump_file && (dump_flags & TDF_DETAILS))
2458 fprintf (dump_file, "Processing loop %d\n", loop->num); 3035 fprintf (dump_file, "Processing loop %d\n", loop->num);
2459 3036
3037 /* Nothing for predicitive commoning if loop only iterates 1 time. */
3038 if (get_max_loop_iterations_int (loop) == 0)
3039 {
3040 if (dump_file && (dump_flags & TDF_DETAILS))
3041 fprintf (dump_file, "Loop iterates only 1 time, nothing to do.\n");
3042
3043 return 0;
3044 }
3045
2460 /* Find the data references and split them into components according to their 3046 /* Find the data references and split them into components according to their
2461 dependence relations. */ 3047 dependence relations. */
2462 datarefs = VEC_alloc (data_reference_p, heap, 10); 3048 auto_vec<loop_p, 3> loop_nest;
2463 dependences = VEC_alloc (ddr_p, heap, 10); 3049 dependences.create (10);
2464 loop_nest = VEC_alloc (loop_p, heap, 3); 3050 datarefs.create (10);
2465 compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs, 3051 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
2466 &dependences); 3052 &dependences))
3053 {
3054 if (dump_file && (dump_flags & TDF_DETAILS))
3055 fprintf (dump_file, "Cannot analyze data dependencies\n");
3056 free_data_refs (datarefs);
3057 free_dependence_relations (dependences);
3058 return 0;
3059 }
3060
2467 if (dump_file && (dump_flags & TDF_DETAILS)) 3061 if (dump_file && (dump_flags & TDF_DETAILS))
2468 dump_data_dependence_relations (dump_file, dependences); 3062 dump_data_dependence_relations (dump_file, dependences);
2469 3063
2470 components = split_data_refs_to_components (loop, datarefs, dependences); 3064 components = split_data_refs_to_components (loop, datarefs, dependences);
2471 VEC_free (loop_p, heap, loop_nest); 3065 loop_nest.release ();
2472 free_dependence_relations (dependences); 3066 free_dependence_relations (dependences);
2473 if (!components) 3067 if (!components)
2474 { 3068 {
2475 free_data_refs (datarefs); 3069 free_data_refs (datarefs);
2476 return false; 3070 free_affine_expand_cache (&name_expansions);
3071 return 0;
2477 } 3072 }
2478 3073
2479 if (dump_file && (dump_flags & TDF_DETAILS)) 3074 if (dump_file && (dump_flags & TDF_DETAILS))
2480 { 3075 {
2481 fprintf (dump_file, "Initial state:\n\n"); 3076 fprintf (dump_file, "Initial state:\n\n");
2483 } 3078 }
2484 3079
2485 /* Find the suitable components and split them into chains. */ 3080 /* Find the suitable components and split them into chains. */
2486 components = filter_suitable_components (loop, components); 3081 components = filter_suitable_components (loop, components);
2487 3082
2488 tmp_vars = BITMAP_ALLOC (NULL); 3083 auto_bitmap tmp_vars;
2489 looparound_phis = BITMAP_ALLOC (NULL); 3084 looparound_phis = BITMAP_ALLOC (NULL);
2490 determine_roots (loop, components, &chains); 3085 determine_roots (loop, components, &chains);
2491 release_components (components); 3086 release_components (components);
2492 3087
2493 if (!chains) 3088 if (!chains.exists ())
2494 { 3089 {
2495 if (dump_file && (dump_flags & TDF_DETAILS)) 3090 if (dump_file && (dump_flags & TDF_DETAILS))
2496 fprintf (dump_file, 3091 fprintf (dump_file,
2497 "Predictive commoning failed: no suitable chains\n"); 3092 "Predictive commoning failed: no suitable chains\n");
2498 goto end; 3093 goto end;
2499 } 3094 }
2500 prepare_initializers (loop, chains); 3095 prepare_initializers (loop, chains);
3096 loop_closed_ssa = prepare_finalizers (loop, chains);
2501 3097
2502 /* Try to combine the chains that are always worked with together. */ 3098 /* Try to combine the chains that are always worked with together. */
2503 try_combine_chains (&chains); 3099 try_combine_chains (&chains);
3100
3101 insert_init_seqs (loop, chains);
2504 3102
2505 if (dump_file && (dump_flags & TDF_DETAILS)) 3103 if (dump_file && (dump_flags & TDF_DETAILS))
2506 { 3104 {
2507 fprintf (dump_file, "Before commoning:\n\n"); 3105 fprintf (dump_file, "Before commoning:\n\n");
2508 dump_chains (dump_file, chains); 3106 dump_chains (dump_file, chains);
2551 } 3149 }
2552 3150
2553 end: ; 3151 end: ;
2554 release_chains (chains); 3152 release_chains (chains);
2555 free_data_refs (datarefs); 3153 free_data_refs (datarefs);
2556 BITMAP_FREE (tmp_vars);
2557 BITMAP_FREE (looparound_phis); 3154 BITMAP_FREE (looparound_phis);
2558 3155
2559 free_affine_expand_cache (&name_expansions); 3156 free_affine_expand_cache (&name_expansions);
2560 3157
2561 return unroll; 3158 return (unroll ? 1 : 0) | (loop_closed_ssa ? 2 : 0);
2562 } 3159 }
2563 3160
2564 /* Runs predictive commoning. */ 3161 /* Runs predictive commoning. */
2565 3162
2566 unsigned 3163 unsigned
2567 tree_predictive_commoning (void) 3164 tree_predictive_commoning (void)
2568 { 3165 {
2569 bool unrolled = false;
2570 struct loop *loop; 3166 struct loop *loop;
2571 loop_iterator li; 3167 unsigned ret = 0, changed = 0;
2572 unsigned ret = 0;
2573 3168
2574 initialize_original_copy_tables (); 3169 initialize_original_copy_tables ();
2575 FOR_EACH_LOOP (li, loop, LI_ONLY_INNERMOST) 3170 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
2576 if (optimize_loop_for_speed_p (loop)) 3171 if (optimize_loop_for_speed_p (loop))
2577 { 3172 {
2578 unrolled |= tree_predictive_commoning_loop (loop); 3173 changed |= tree_predictive_commoning_loop (loop);
2579 } 3174 }
2580 3175 free_original_copy_tables ();
2581 if (unrolled) 3176
3177 if (changed > 0)
2582 { 3178 {
2583 scev_reset (); 3179 scev_reset ();
3180
3181 if (changed > 1)
3182 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
3183
2584 ret = TODO_cleanup_cfg; 3184 ret = TODO_cleanup_cfg;
2585 } 3185 }
2586 free_original_copy_tables ();
2587 3186
2588 return ret; 3187 return ret;
2589 } 3188 }
3189
3190 /* Predictive commoning Pass. */
3191
3192 static unsigned
3193 run_tree_predictive_commoning (struct function *fun)
3194 {
3195 if (number_of_loops (fun) <= 1)
3196 return 0;
3197
3198 return tree_predictive_commoning ();
3199 }
3200
3201 namespace {
3202
3203 const pass_data pass_data_predcom =
3204 {
3205 GIMPLE_PASS, /* type */
3206 "pcom", /* name */
3207 OPTGROUP_LOOP, /* optinfo_flags */
3208 TV_PREDCOM, /* tv_id */
3209 PROP_cfg, /* properties_required */
3210 0, /* properties_provided */
3211 0, /* properties_destroyed */
3212 0, /* todo_flags_start */
3213 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
3214 };
3215
3216 class pass_predcom : public gimple_opt_pass
3217 {
3218 public:
3219 pass_predcom (gcc::context *ctxt)
3220 : gimple_opt_pass (pass_data_predcom, ctxt)
3221 {}
3222
3223 /* opt_pass methods: */
3224 virtual bool gate (function *) { return flag_predictive_commoning != 0; }
3225 virtual unsigned int execute (function *fun)
3226 {
3227 return run_tree_predictive_commoning (fun);
3228 }
3229
3230 }; // class pass_predcom
3231
3232 } // anon namespace
3233
3234 gimple_opt_pass *
3235 make_pass_predcom (gcc::context *ctxt)
3236 {
3237 return new pass_predcom (ctxt);
3238 }
3239
3240