comparison gcc/tree-loop-distribution.c @ 67:f6334be47118

update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
author nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
date Tue, 22 Mar 2011 17:18:12 +0900
parents b7f97abdc517
children 04ced10e8804
comparison
equal deleted inserted replaced
65:65488c3d617d 67:f6334be47118
43 generates the new loops. */ 43 generates the new loops. */
44 44
45 #include "config.h" 45 #include "config.h"
46 #include "system.h" 46 #include "system.h"
47 #include "coretypes.h" 47 #include "coretypes.h"
48 #include "tm.h"
49 #include "tree.h"
50 #include "basic-block.h"
51 #include "diagnostic.h"
52 #include "tree-flow.h" 48 #include "tree-flow.h"
53 #include "tree-dump.h"
54 #include "timevar.h"
55 #include "cfgloop.h" 49 #include "cfgloop.h"
56 #include "expr.h"
57 #include "optabs.h"
58 #include "tree-chrec.h" 50 #include "tree-chrec.h"
59 #include "tree-data-ref.h" 51 #include "tree-data-ref.h"
60 #include "tree-scalar-evolution.h" 52 #include "tree-scalar-evolution.h"
61 #include "tree-pass.h" 53 #include "tree-pass.h"
62 #include "lambda.h"
63 #include "langhooks.h"
64 #include "tree-vectorizer.h"
65 54
66 /* If bit I is not set, it means that this node represents an 55 /* If bit I is not set, it means that this node represents an
67 operation that has already been performed, and that should not be 56 operation that has already been performed, and that should not be
68 performed again. This is the subgraph of remaining important 57 performed again. This is the subgraph of remaining important
69 computations that is passed to the DFS algorithm for avoiding to 58 computations that is passed to the DFS algorithm for avoiding to
196 { 185 {
197 basic_block bb = bbs[i]; 186 basic_block bb = bbs[i];
198 187
199 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);) 188 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);)
200 if (!bitmap_bit_p (partition, x++)) 189 if (!bitmap_bit_p (partition, x++))
201 remove_phi_node (&bsi, true); 190 {
191 gimple phi = gsi_stmt (bsi);
192 if (!is_gimple_reg (gimple_phi_result (phi)))
193 mark_virtual_phi_result_for_renaming (phi);
194 remove_phi_node (&bsi, true);
195 }
202 else 196 else
203 gsi_next (&bsi); 197 gsi_next (&bsi);
204 198
205 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);) 199 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);)
206 if (gimple_code (gsi_stmt (bsi)) != GIMPLE_LABEL 200 {
207 && !bitmap_bit_p (partition, x++)) 201 gimple stmt = gsi_stmt (bsi);
208 gsi_remove (&bsi, false); 202 if (gimple_code (gsi_stmt (bsi)) != GIMPLE_LABEL
209 else 203 && !bitmap_bit_p (partition, x++))
210 gsi_next (&bsi); 204 {
211 205 unlink_stmt_vdef (stmt);
212 mark_virtual_ops_in_bb (bb); 206 gsi_remove (&bsi, true);
207 release_defs (stmt);
208 }
209 else
210 gsi_next (&bsi);
211 }
213 } 212 }
214 213
215 free (bbs); 214 free (bbs);
216 return true; 215 return true;
217 } 216 }
232 return x; 231 return x;
233 } 232 }
234 233
235 /* Generate a call to memset. Return true when the operation succeeded. */ 234 /* Generate a call to memset. Return true when the operation succeeded. */
236 235
237 static bool 236 static void
238 generate_memset_zero (gimple stmt, tree op0, tree nb_iter, 237 generate_memset_zero (gimple stmt, tree op0, tree nb_iter,
239 gimple_stmt_iterator bsi) 238 gimple_stmt_iterator bsi)
240 { 239 {
241 tree addr_base, nb_bytes; 240 tree addr_base, nb_bytes;
242 bool res = false; 241 bool res = false;
243 gimple_seq stmt_list = NULL, stmts; 242 gimple_seq stmt_list = NULL, stmts;
244 gimple fn_call; 243 gimple fn_call;
245 tree mem, fn; 244 tree mem, fn;
246 gimple_stmt_iterator i;
247 struct data_reference *dr = XCNEW (struct data_reference); 245 struct data_reference *dr = XCNEW (struct data_reference);
248 location_t loc = gimple_location (stmt); 246 location_t loc = gimple_location (stmt);
249 247
250 DR_STMT (dr) = stmt; 248 DR_STMT (dr) = stmt;
251 DR_REF (dr) = op0; 249 DR_REF (dr) = op0;
252 if (!dr_analyze_innermost (dr)) 250 res = dr_analyze_innermost (dr);
253 goto end; 251 gcc_assert (res && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0)));
254 252
255 /* Test for a positive stride, iterating over every element. */ 253 nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list);
256 if (integer_zerop (size_binop (MINUS_EXPR, 254 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
257 fold_convert (sizetype, DR_STEP (dr)), 255 addr_base = fold_convert_loc (loc, sizetype, addr_base);
258 TYPE_SIZE_UNIT (TREE_TYPE (op0)))))
259 {
260 addr_base = fold_convert_loc (loc, sizetype,
261 size_binop_loc (loc, PLUS_EXPR,
262 DR_OFFSET (dr),
263 DR_INIT (dr)));
264 addr_base = fold_build2_loc (loc, POINTER_PLUS_EXPR,
265 TREE_TYPE (DR_BASE_ADDRESS (dr)),
266 DR_BASE_ADDRESS (dr), addr_base);
267
268 nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list);
269 }
270 256
271 /* Test for a negative stride, iterating over every element. */ 257 /* Test for a negative stride, iterating over every element. */
272 else if (integer_zerop (size_binop (PLUS_EXPR, 258 if (integer_zerop (size_binop (PLUS_EXPR,
273 TYPE_SIZE_UNIT (TREE_TYPE (op0)), 259 TYPE_SIZE_UNIT (TREE_TYPE (op0)),
274 fold_convert (sizetype, DR_STEP (dr))))) 260 fold_convert (sizetype, DR_STEP (dr)))))
275 { 261 {
276 nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list);
277
278 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
279 addr_base = fold_convert_loc (loc, sizetype, addr_base);
280 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base, 262 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
281 fold_convert_loc (loc, sizetype, nb_bytes)); 263 fold_convert_loc (loc, sizetype, nb_bytes));
282 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base, 264 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
283 TYPE_SIZE_UNIT (TREE_TYPE (op0))); 265 TYPE_SIZE_UNIT (TREE_TYPE (op0)));
284 addr_base = fold_build2_loc (loc, POINTER_PLUS_EXPR, 266 }
285 TREE_TYPE (DR_BASE_ADDRESS (dr)), 267
286 DR_BASE_ADDRESS (dr), addr_base); 268 addr_base = fold_build2_loc (loc, POINTER_PLUS_EXPR,
287 } 269 TREE_TYPE (DR_BASE_ADDRESS (dr)),
288 else 270 DR_BASE_ADDRESS (dr), addr_base);
289 goto end;
290
291 mem = force_gimple_operand (addr_base, &stmts, true, NULL); 271 mem = force_gimple_operand (addr_base, &stmts, true, NULL);
292 gimple_seq_add_seq (&stmt_list, stmts); 272 gimple_seq_add_seq (&stmt_list, stmts);
293 273
294 fn = build_fold_addr_expr (implicit_built_in_decls [BUILT_IN_MEMSET]); 274 fn = build_fold_addr_expr (implicit_built_in_decls [BUILT_IN_MEMSET]);
295 fn_call = gimple_build_call (fn, 3, mem, integer_zero_node, nb_bytes); 275 fn_call = gimple_build_call (fn, 3, mem, integer_zero_node, nb_bytes);
296 gimple_seq_add_stmt (&stmt_list, fn_call); 276 gimple_seq_add_stmt (&stmt_list, fn_call);
297
298 for (i = gsi_start (stmt_list); !gsi_end_p (i); gsi_next (&i))
299 {
300 gimple s = gsi_stmt (i);
301 update_stmt_if_modified (s);
302 }
303
304 gsi_insert_seq_after (&bsi, stmt_list, GSI_CONTINUE_LINKING); 277 gsi_insert_seq_after (&bsi, stmt_list, GSI_CONTINUE_LINKING);
305 res = true;
306 278
307 if (dump_file && (dump_flags & TDF_DETAILS)) 279 if (dump_file && (dump_flags & TDF_DETAILS))
308 fprintf (dump_file, "generated memset zero\n"); 280 fprintf (dump_file, "generated memset zero\n");
309 281
310 end:
311 free_data_ref (dr); 282 free_data_ref (dr);
312 return res;
313 }
314
315 /* Propagate phis in BB b to their uses and remove them. */
316
317 static void
318 prop_phis (basic_block b)
319 {
320 gimple_stmt_iterator psi;
321 gimple_seq phis = phi_nodes (b);
322
323 for (psi = gsi_start (phis); !gsi_end_p (psi); )
324 {
325 gimple phi = gsi_stmt (psi);
326 tree def = gimple_phi_result (phi), use = gimple_phi_arg_def (phi, 0);
327
328 gcc_assert (gimple_phi_num_args (phi) == 1);
329
330 if (!is_gimple_reg (def))
331 {
332 imm_use_iterator iter;
333 use_operand_p use_p;
334 gimple stmt;
335
336 FOR_EACH_IMM_USE_STMT (stmt, iter, def)
337 FOR_EACH_IMM_USE_ON_STMT (use_p, iter)
338 SET_USE (use_p, use);
339 }
340 else
341 replace_uses_by (def, use);
342
343 remove_phi_node (&psi, true);
344 }
345 } 283 }
346 284
347 /* Tries to generate a builtin function for the instructions of LOOP 285 /* Tries to generate a builtin function for the instructions of LOOP
348 pointed to by the bits set in PARTITION. Returns true when the 286 pointed to by the bits set in PARTITION. Returns true when the
349 operation succeeded. */ 287 operation succeeded. */
353 { 291 {
354 bool res = false; 292 bool res = false;
355 unsigned i, x = 0; 293 unsigned i, x = 0;
356 basic_block *bbs; 294 basic_block *bbs;
357 gimple write = NULL; 295 gimple write = NULL;
358 tree op0, op1;
359 gimple_stmt_iterator bsi; 296 gimple_stmt_iterator bsi;
360 tree nb_iter = number_of_exit_cond_executions (loop); 297 tree nb_iter = number_of_exit_cond_executions (loop);
361 298
362 if (!nb_iter || nb_iter == chrec_dont_know) 299 if (!nb_iter || nb_iter == chrec_dont_know)
363 return false; 300 return false;
389 nb_iter = number_of_latch_executions (loop); 326 nb_iter = number_of_latch_executions (loop);
390 } 327 }
391 } 328 }
392 } 329 }
393 330
394 if (!write) 331 if (!stmt_with_adjacent_zero_store_dr_p (write))
395 goto end;
396
397 op0 = gimple_assign_lhs (write);
398 op1 = gimple_assign_rhs1 (write);
399
400 if (!(TREE_CODE (op0) == ARRAY_REF
401 || TREE_CODE (op0) == INDIRECT_REF))
402 goto end; 332 goto end;
403 333
404 /* The new statements will be placed before LOOP. */ 334 /* The new statements will be placed before LOOP. */
405 bsi = gsi_last_bb (loop_preheader_edge (loop)->src); 335 bsi = gsi_last_bb (loop_preheader_edge (loop)->src);
406 336 generate_memset_zero (write, gimple_assign_lhs (write), nb_iter, bsi);
407 if (gimple_assign_rhs_code (write) == INTEGER_CST 337 res = true;
408 && (integer_zerop (op1) || real_zerop (op1)))
409 res = generate_memset_zero (write, op0, nb_iter, bsi);
410 338
411 /* If this is the last partition for which we generate code, we have 339 /* If this is the last partition for which we generate code, we have
412 to destroy the loop. */ 340 to destroy the loop. */
413 if (res && !copy_p) 341 if (!copy_p)
414 { 342 {
415 unsigned nbbs = loop->num_nodes; 343 unsigned nbbs = loop->num_nodes;
416 basic_block src = loop_preheader_edge (loop)->src; 344 edge exit = single_exit (loop);
417 basic_block dest = single_exit (loop)->dest; 345 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
418 prop_phis (dest); 346 redirect_edge_pred (exit, src);
419 make_edge (src, dest, EDGE_FALLTHRU); 347 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
348 exit->flags |= EDGE_FALLTHRU;
420 cancel_loop_tree (loop); 349 cancel_loop_tree (loop);
350 rescan_loop_exit (exit, false, true);
421 351
422 for (i = 0; i < nbbs; i++) 352 for (i = 0; i < nbbs; i++)
423 delete_basic_block (bbs[i]); 353 delete_basic_block (bbs[i]);
424 354
425 set_immediate_dominator (CDI_DOMINATORS, dest, 355 set_immediate_dominator (CDI_DOMINATORS, dest,
516 unsigned i; 446 unsigned i;
517 VEC (int, heap) *nodes = VEC_alloc (int, heap, 3); 447 VEC (int, heap) *nodes = VEC_alloc (int, heap, 3);
518 448
519 graphds_dfs (rdg, &v, 1, &nodes, false, NULL); 449 graphds_dfs (rdg, &v, 1, &nodes, false, NULL);
520 450
521 for (i = 0; VEC_iterate (int, nodes, i, x); i++) 451 FOR_EACH_VEC_ELT (int, nodes, i, x)
522 { 452 {
523 if (bitmap_bit_p (seen, x)) 453 if (!bitmap_set_bit (seen, x))
524 continue; 454 continue;
525
526 bitmap_set_bit (seen, x);
527 455
528 if (RDG_MEM_WRITE_STMT (rdg, x) 456 if (RDG_MEM_WRITE_STMT (rdg, x)
529 || predecessor_has_mem_write (rdg, &(rdg->vertices[x])) 457 || predecessor_has_mem_write (rdg, &(rdg->vertices[x]))
530 /* In anti dependences the read should occur before 458 /* In anti dependences the read should occur before
531 the write, this is why both the read and the write 459 the write, this is why both the read and the write
550 } 478 }
551 479
552 static void rdg_flag_vertex_and_dependent (struct graph *, int, bitmap, bitmap, 480 static void rdg_flag_vertex_and_dependent (struct graph *, int, bitmap, bitmap,
553 bitmap, bool *); 481 bitmap, bool *);
554 482
555 /* Flag all the uses of U. */
556
557 static void
558 rdg_flag_all_uses (struct graph *rdg, int u, bitmap partition, bitmap loops,
559 bitmap processed, bool *part_has_writes)
560 {
561 struct graph_edge *e;
562
563 for (e = rdg->vertices[u].succ; e; e = e->succ_next)
564 if (!bitmap_bit_p (processed, e->dest))
565 {
566 rdg_flag_vertex_and_dependent (rdg, e->dest, partition, loops,
567 processed, part_has_writes);
568 rdg_flag_all_uses (rdg, e->dest, partition, loops, processed,
569 part_has_writes);
570 }
571 }
572
573 /* Flag the uses of U stopping following the information from 483 /* Flag the uses of U stopping following the information from
574 upstream_mem_writes. */ 484 upstream_mem_writes. */
575 485
576 static void 486 static void
577 rdg_flag_uses (struct graph *rdg, int u, bitmap partition, bitmap loops, 487 rdg_flag_uses (struct graph *rdg, int u, bitmap partition, bitmap loops,
643 rdg_flag_vertex (struct graph *rdg, int v, bitmap partition, bitmap loops, 553 rdg_flag_vertex (struct graph *rdg, int v, bitmap partition, bitmap loops,
644 bool *part_has_writes) 554 bool *part_has_writes)
645 { 555 {
646 struct loop *loop; 556 struct loop *loop;
647 557
648 if (bitmap_bit_p (partition, v)) 558 if (!bitmap_set_bit (partition, v))
649 return; 559 return;
650 560
651 loop = loop_containing_stmt (RDG_STMT (rdg, v)); 561 loop = loop_containing_stmt (RDG_STMT (rdg, v));
652 bitmap_set_bit (loops, loop->num); 562 bitmap_set_bit (loops, loop->num);
653 bitmap_set_bit (partition, v);
654 563
655 if (rdg_cannot_recompute_vertex_p (rdg, v)) 564 if (rdg_cannot_recompute_vertex_p (rdg, v))
656 { 565 {
657 *part_has_writes = true; 566 *part_has_writes = true;
658 bitmap_clear_bit (remaining_stmts, v); 567 bitmap_clear_bit (remaining_stmts, v);
674 bitmap_set_bit (processed, v); 583 bitmap_set_bit (processed, v);
675 rdg_flag_uses (rdg, v, partition, loops, processed, part_has_writes); 584 rdg_flag_uses (rdg, v, partition, loops, processed, part_has_writes);
676 graphds_dfs (rdg, &v, 1, &nodes, false, remaining_stmts); 585 graphds_dfs (rdg, &v, 1, &nodes, false, remaining_stmts);
677 rdg_flag_vertex (rdg, v, partition, loops, part_has_writes); 586 rdg_flag_vertex (rdg, v, partition, loops, part_has_writes);
678 587
679 for (i = 0; VEC_iterate (int, nodes, i, x); i++) 588 FOR_EACH_VEC_ELT (int, nodes, i, x)
680 if (!already_processed_vertex_p (processed, x)) 589 if (!already_processed_vertex_p (processed, x))
681 rdg_flag_vertex_and_dependent (rdg, x, partition, loops, processed, 590 rdg_flag_vertex_and_dependent (rdg, x, partition, loops, processed,
682 part_has_writes); 591 part_has_writes);
683 592
684 VEC_free (int, heap, nodes); 593 VEC_free (int, heap, nodes);
692 { 601 {
693 unsigned i; 602 unsigned i;
694 edge e; 603 edge e;
695 VEC (edge, heap) *exits = get_loop_exit_edges (loop); 604 VEC (edge, heap) *exits = get_loop_exit_edges (loop);
696 605
697 for (i = 0; VEC_iterate (edge, exits, i, e); i++) 606 FOR_EACH_VEC_ELT (edge, exits, i, e)
698 { 607 {
699 gimple cond = last_stmt (e->src); 608 gimple cond = last_stmt (e->src);
700 609
701 if (cond) 610 if (cond)
702 VEC_safe_push (gimple, heap, *conds, cond); 611 VEC_safe_push (gimple, heap, *conds, cond);
729 if (!already_processed_vertex_p (processed, v)) 638 if (!already_processed_vertex_p (processed, v))
730 rdg_flag_vertex_and_dependent (rdg, v, partition, new_loops, processed, 639 rdg_flag_vertex_and_dependent (rdg, v, partition, new_loops, processed,
731 part_has_writes); 640 part_has_writes);
732 641
733 EXECUTE_IF_SET_IN_BITMAP (new_loops, 0, i, bi) 642 EXECUTE_IF_SET_IN_BITMAP (new_loops, 0, i, bi)
734 if (!bitmap_bit_p (loops, i)) 643 if (bitmap_set_bit (loops, i))
735 { 644 collect_condition_stmts (get_loop (i), &conds);
736 bitmap_set_bit (loops, i);
737 collect_condition_stmts (get_loop (i), &conds);
738 }
739 645
740 BITMAP_FREE (new_loops); 646 BITMAP_FREE (new_loops);
741 } 647 }
742 } 648
743 649 VEC_free (gimple, heap, conds);
744 /* Flag all the nodes of RDG containing memory accesses that could
745 potentially belong to arrays already accessed in the current
746 PARTITION. */
747
748 static void
749 rdg_flag_similar_memory_accesses (struct graph *rdg, bitmap partition,
750 bitmap loops, bitmap processed,
751 VEC (int, heap) **other_stores)
752 {
753 bool foo;
754 unsigned i, n;
755 int j, k, kk;
756 bitmap_iterator ii;
757 struct graph_edge *e;
758
759 EXECUTE_IF_SET_IN_BITMAP (partition, 0, i, ii)
760 if (RDG_MEM_WRITE_STMT (rdg, i)
761 || RDG_MEM_READS_STMT (rdg, i))
762 {
763 for (j = 0; j < rdg->n_vertices; j++)
764 if (!bitmap_bit_p (processed, j)
765 && (RDG_MEM_WRITE_STMT (rdg, j)
766 || RDG_MEM_READS_STMT (rdg, j))
767 && rdg_has_similar_memory_accesses (rdg, i, j))
768 {
769 /* Flag first the node J itself, and all the nodes that
770 are needed to compute J. */
771 rdg_flag_vertex_and_dependent (rdg, j, partition, loops,
772 processed, &foo);
773
774 /* When J is a read, we want to coalesce in the same
775 PARTITION all the nodes that are using J: this is
776 needed for better cache locality. */
777 rdg_flag_all_uses (rdg, j, partition, loops, processed, &foo);
778
779 /* Remove from OTHER_STORES the vertex that we flagged. */
780 if (RDG_MEM_WRITE_STMT (rdg, j))
781 for (k = 0; VEC_iterate (int, *other_stores, k, kk); k++)
782 if (kk == j)
783 {
784 VEC_unordered_remove (int, *other_stores, k);
785 break;
786 }
787 }
788
789 /* If the node I has two uses, then keep these together in the
790 same PARTITION. */
791 for (n = 0, e = rdg->vertices[i].succ; e; e = e->succ_next, n++);
792
793 if (n > 1)
794 rdg_flag_all_uses (rdg, i, partition, loops, processed, &foo);
795 }
796 } 650 }
797 651
798 /* Returns a bitmap in which all the statements needed for computing 652 /* Returns a bitmap in which all the statements needed for computing
799 the strongly connected component C of the RDG are flagged, also 653 the strongly connected component C of the RDG are flagged, also
800 including the loop exit conditions. */ 654 including the loop exit conditions. */
801 655
802 static bitmap 656 static bitmap
803 build_rdg_partition_for_component (struct graph *rdg, rdgc c, 657 build_rdg_partition_for_component (struct graph *rdg, rdgc c,
804 bool *part_has_writes, 658 bool *part_has_writes)
805 VEC (int, heap) **other_stores)
806 { 659 {
807 int i, v; 660 int i, v;
808 bitmap partition = BITMAP_ALLOC (NULL); 661 bitmap partition = BITMAP_ALLOC (NULL);
809 bitmap loops = BITMAP_ALLOC (NULL); 662 bitmap loops = BITMAP_ALLOC (NULL);
810 bitmap processed = BITMAP_ALLOC (NULL); 663 bitmap processed = BITMAP_ALLOC (NULL);
811 664
812 for (i = 0; VEC_iterate (int, c->vertices, i, v); i++) 665 FOR_EACH_VEC_ELT (int, c->vertices, i, v)
813 if (!already_processed_vertex_p (processed, v)) 666 if (!already_processed_vertex_p (processed, v))
814 rdg_flag_vertex_and_dependent (rdg, v, partition, loops, processed, 667 rdg_flag_vertex_and_dependent (rdg, v, partition, loops, processed,
815 part_has_writes); 668 part_has_writes);
816 669
817 /* Also iterate on the array of stores not in the starting vertices,
818 and determine those vertices that have some memory affinity with
819 the current nodes in the component: these are stores to the same
820 arrays, i.e. we're taking care of cache locality. */
821 rdg_flag_similar_memory_accesses (rdg, partition, loops, processed,
822 other_stores);
823
824 rdg_flag_loop_exits (rdg, loops, partition, processed, part_has_writes); 670 rdg_flag_loop_exits (rdg, loops, partition, processed, part_has_writes);
825 671
826 BITMAP_FREE (processed); 672 BITMAP_FREE (processed);
827 BITMAP_FREE (loops); 673 BITMAP_FREE (loops);
828 return partition; 674 return partition;
834 free_rdg_components (VEC (rdgc, heap) *components) 680 free_rdg_components (VEC (rdgc, heap) *components)
835 { 681 {
836 int i; 682 int i;
837 rdgc x; 683 rdgc x;
838 684
839 for (i = 0; VEC_iterate (rdgc, components, i, x); i++) 685 FOR_EACH_VEC_ELT (rdgc, components, i, x)
840 { 686 {
841 VEC_free (int, heap, x->vertices); 687 VEC_free (int, heap, x->vertices);
842 free (x); 688 free (x);
843 } 689 }
690
691 VEC_free (rdgc, heap, components);
844 } 692 }
845 693
846 /* Build the COMPONENTS vector with the strongly connected components 694 /* Build the COMPONENTS vector with the strongly connected components
847 of RDG in which the STARTING_VERTICES occur. */ 695 of RDG in which the STARTING_VERTICES occur. */
848 696
859 all_components[i] = VEC_alloc (int, heap, 3); 707 all_components[i] = VEC_alloc (int, heap, 3);
860 708
861 for (i = 0; i < rdg->n_vertices; i++) 709 for (i = 0; i < rdg->n_vertices; i++)
862 VEC_safe_push (int, heap, all_components[rdg->vertices[i].component], i); 710 VEC_safe_push (int, heap, all_components[rdg->vertices[i].component], i);
863 711
864 for (i = 0; VEC_iterate (int, starting_vertices, i, v); i++) 712 FOR_EACH_VEC_ELT (int, starting_vertices, i, v)
865 { 713 {
866 int c = rdg->vertices[v].component; 714 int c = rdg->vertices[v].component;
867 715
868 if (!bitmap_bit_p (saved_components, c)) 716 if (bitmap_set_bit (saved_components, c))
869 { 717 {
870 rdgc x = XCNEW (struct rdg_component); 718 rdgc x = XCNEW (struct rdg_component);
871 x->num = c; 719 x->num = c;
872 x->vertices = all_components[c]; 720 x->vertices = all_components[c];
873 721
874 VEC_safe_push (rdgc, heap, *components, x); 722 VEC_safe_push (rdgc, heap, *components, x);
875 bitmap_set_bit (saved_components, c);
876 } 723 }
877 } 724 }
878 725
879 for (i = 0; i < n_components; i++) 726 for (i = 0; i < n_components; i++)
880 if (!bitmap_bit_p (saved_components, i)) 727 if (!bitmap_bit_p (saved_components, i))
882 729
883 free (all_components); 730 free (all_components);
884 BITMAP_FREE (saved_components); 731 BITMAP_FREE (saved_components);
885 } 732 }
886 733
734 /* Returns true when it is possible to generate a builtin pattern for
735 the PARTITION of RDG. For the moment we detect only the memset
736 zero pattern. */
737
738 static bool
739 can_generate_builtin (struct graph *rdg, bitmap partition)
740 {
741 unsigned i;
742 bitmap_iterator bi;
743 int nb_reads = 0;
744 int nb_writes = 0;
745 int stores_zero = 0;
746
747 EXECUTE_IF_SET_IN_BITMAP (partition, 0, i, bi)
748 if (RDG_MEM_READS_STMT (rdg, i))
749 nb_reads++;
750 else if (RDG_MEM_WRITE_STMT (rdg, i))
751 {
752 nb_writes++;
753 if (stmt_with_adjacent_zero_store_dr_p (RDG_STMT (rdg, i)))
754 stores_zero++;
755 }
756
757 return stores_zero == 1 && nb_writes == 1 && nb_reads == 0;
758 }
759
760 /* Returns true when PARTITION1 and PARTITION2 have similar memory
761 accesses in RDG. */
762
763 static bool
764 similar_memory_accesses (struct graph *rdg, bitmap partition1,
765 bitmap partition2)
766 {
767 unsigned i, j;
768 bitmap_iterator bi, bj;
769
770 EXECUTE_IF_SET_IN_BITMAP (partition1, 0, i, bi)
771 if (RDG_MEM_WRITE_STMT (rdg, i)
772 || RDG_MEM_READS_STMT (rdg, i))
773 EXECUTE_IF_SET_IN_BITMAP (partition2, 0, j, bj)
774 if (RDG_MEM_WRITE_STMT (rdg, j)
775 || RDG_MEM_READS_STMT (rdg, j))
776 if (rdg_has_similar_memory_accesses (rdg, i, j))
777 return true;
778
779 return false;
780 }
781
782 /* Fuse all the partitions from PARTITIONS that contain similar memory
783 references, i.e., we're taking care of cache locality. This
784 function does not fuse those partitions that contain patterns that
785 can be code generated with builtins. */
786
787 static void
788 fuse_partitions_with_similar_memory_accesses (struct graph *rdg,
789 VEC (bitmap, heap) **partitions)
790 {
791 int p1, p2;
792 bitmap partition1, partition2;
793
794 FOR_EACH_VEC_ELT (bitmap, *partitions, p1, partition1)
795 if (!can_generate_builtin (rdg, partition1))
796 FOR_EACH_VEC_ELT (bitmap, *partitions, p2, partition2)
797 if (p1 != p2
798 && !can_generate_builtin (rdg, partition2)
799 && similar_memory_accesses (rdg, partition1, partition2))
800 {
801 bitmap_ior_into (partition1, partition2);
802 VEC_ordered_remove (bitmap, *partitions, p2);
803 p2--;
804 }
805 }
806
807 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after
808 the LOOP. */
809
810 static bool
811 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
812 {
813 imm_use_iterator imm_iter;
814 use_operand_p use_p;
815
816 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
817 if (loop != loop_containing_stmt (USE_STMT (use_p)))
818 return true;
819
820 return false;
821 }
822
823 /* Returns true when STMT defines a scalar variable used after the
824 loop. */
825
826 static bool
827 stmt_has_scalar_dependences_outside_loop (gimple stmt)
828 {
829 tree name;
830
831 switch (gimple_code (stmt))
832 {
833 case GIMPLE_ASSIGN:
834 name = gimple_assign_lhs (stmt);
835 break;
836
837 case GIMPLE_PHI:
838 name = gimple_phi_result (stmt);
839 break;
840
841 default:
842 return false;
843 }
844
845 return TREE_CODE (name) == SSA_NAME
846 && ssa_name_has_uses_outside_loop_p (name, loop_containing_stmt (stmt));
847 }
848
849 /* Returns true when STMT will be code generated in a partition of RDG
850 different than PART and that will not be code generated as a
851 builtin. */
852
853 static bool
854 stmt_generated_in_another_partition (struct graph *rdg, gimple stmt, int part,
855 VEC (bitmap, heap) *partitions)
856 {
857 int p;
858 bitmap pp;
859 unsigned i;
860 bitmap_iterator bi;
861
862 FOR_EACH_VEC_ELT (bitmap, partitions, p, pp)
863 if (p != part
864 && !can_generate_builtin (rdg, pp))
865 EXECUTE_IF_SET_IN_BITMAP (pp, 0, i, bi)
866 if (stmt == RDG_STMT (rdg, i))
867 return true;
868
869 return false;
870 }
871
872 /* For each partition in PARTITIONS that will be code generated using
873 a builtin, add its scalar computations used after the loop to
874 PARTITION. */
875
876 static void
877 add_scalar_computations_to_partition (struct graph *rdg,
878 VEC (bitmap, heap) *partitions,
879 bitmap partition)
880 {
881 int p;
882 bitmap pp;
883 unsigned i;
884 bitmap_iterator bi;
885 bitmap l = BITMAP_ALLOC (NULL);
886 bitmap pr = BITMAP_ALLOC (NULL);
887 bool f = false;
888
889 FOR_EACH_VEC_ELT (bitmap, partitions, p, pp)
890 if (can_generate_builtin (rdg, pp))
891 EXECUTE_IF_SET_IN_BITMAP (pp, 0, i, bi)
892 if (stmt_has_scalar_dependences_outside_loop (RDG_STMT (rdg, i))
893 && !stmt_generated_in_another_partition (rdg, RDG_STMT (rdg, i), p,
894 partitions))
895 rdg_flag_vertex_and_dependent (rdg, i, partition, l, pr, &f);
896
897 rdg_flag_loop_exits (rdg, l, partition, pr, &f);
898
899 BITMAP_FREE (pr);
900 BITMAP_FREE (l);
901 }
902
887 /* Aggregate several components into a useful partition that is 903 /* Aggregate several components into a useful partition that is
888 registered in the PARTITIONS vector. Partitions will be 904 registered in the PARTITIONS vector. Partitions will be
889 distributed in different loops. */ 905 distributed in different loops. */
890 906
891 static void 907 static void
895 { 911 {
896 int i; 912 int i;
897 rdgc x; 913 rdgc x;
898 bitmap partition = BITMAP_ALLOC (NULL); 914 bitmap partition = BITMAP_ALLOC (NULL);
899 915
900 for (i = 0; VEC_iterate (rdgc, components, i, x); i++) 916 FOR_EACH_VEC_ELT (rdgc, components, i, x)
901 { 917 {
902 bitmap np; 918 bitmap np;
903 bool part_has_writes = false; 919 bool part_has_writes = false;
904 int v = VEC_index (int, x->vertices, 0); 920 int v = VEC_index (int, x->vertices, 0);
905 921
906 if (bitmap_bit_p (processed, v)) 922 if (bitmap_bit_p (processed, v))
907 continue; 923 continue;
908 924
909 np = build_rdg_partition_for_component (rdg, x, &part_has_writes, 925 np = build_rdg_partition_for_component (rdg, x, &part_has_writes);
910 other_stores);
911 bitmap_ior_into (partition, np); 926 bitmap_ior_into (partition, np);
912 bitmap_ior_into (processed, np); 927 bitmap_ior_into (processed, np);
913 BITMAP_FREE (np); 928 BITMAP_FREE (np);
914 929
915 if (part_has_writes) 930 if (part_has_writes)
946 961
947 VEC_free (int, heap, foo); 962 VEC_free (int, heap, foo);
948 free_rdg_components (comps); 963 free_rdg_components (comps);
949 } 964 }
950 965
966 add_scalar_computations_to_partition (rdg, *partitions, partition);
967
951 /* If there is something left in the last partition, save it. */ 968 /* If there is something left in the last partition, save it. */
952 if (bitmap_count_bits (partition) > 0) 969 if (bitmap_count_bits (partition) > 0)
953 VEC_safe_push (bitmap, heap, *partitions, partition); 970 VEC_safe_push (bitmap, heap, *partitions, partition);
954 else 971 else
955 BITMAP_FREE (partition); 972 BITMAP_FREE (partition);
973
974 fuse_partitions_with_similar_memory_accesses (rdg, partitions);
956 } 975 }
957 976
958 /* Dump to FILE the PARTITIONS. */ 977 /* Dump to FILE the PARTITIONS. */
959 978
960 static void 979 static void
961 dump_rdg_partitions (FILE *file, VEC (bitmap, heap) *partitions) 980 dump_rdg_partitions (FILE *file, VEC (bitmap, heap) *partitions)
962 { 981 {
963 int i; 982 int i;
964 bitmap partition; 983 bitmap partition;
965 984
966 for (i = 0; VEC_iterate (bitmap, partitions, i, partition); i++) 985 FOR_EACH_VEC_ELT (bitmap, partitions, i, partition)
967 debug_bitmap_file (file, partition); 986 debug_bitmap_file (file, partition);
968 } 987 }
969 988
970 /* Debug PARTITIONS. */ 989 /* Debug PARTITIONS. */
971 extern void debug_rdg_partitions (VEC (bitmap, heap) *); 990 extern void debug_rdg_partitions (VEC (bitmap, heap) *);
972 991
973 void 992 DEBUG_FUNCTION void
974 debug_rdg_partitions (VEC (bitmap, heap) *partitions) 993 debug_rdg_partitions (VEC (bitmap, heap) *partitions)
975 { 994 {
976 dump_rdg_partitions (stderr, partitions); 995 dump_rdg_partitions (stderr, partitions);
977 } 996 }
978 997
1025 { 1044 {
1026 int i; 1045 int i;
1027 bitmap partition; 1046 bitmap partition;
1028 int nrw = number_of_rw_in_rdg (rdg); 1047 int nrw = number_of_rw_in_rdg (rdg);
1029 1048
1030 for (i = 0; VEC_iterate (bitmap, partitions, i, partition); i++) 1049 FOR_EACH_VEC_ELT (bitmap, partitions, i, partition)
1031 if (nrw == number_of_rw_in_partition (rdg, partition)) 1050 if (nrw == number_of_rw_in_partition (rdg, partition))
1032 return true; 1051 return true;
1033 1052
1034 return false; 1053 return false;
1035 } 1054 }
1060 { 1079 {
1061 int v; 1080 int v;
1062 unsigned j; 1081 unsigned j;
1063 bool found = false; 1082 bool found = false;
1064 1083
1065 for (j = 0; VEC_iterate (int, starting_vertices, j, v); j++) 1084 FOR_EACH_VEC_ELT (int, starting_vertices, j, v)
1066 if (i == v) 1085 if (i == v)
1067 { 1086 {
1068 found = true; 1087 found = true;
1069 break; 1088 break;
1070 } 1089 }
1086 goto ldist_done; 1105 goto ldist_done;
1087 1106
1088 if (dump_file && (dump_flags & TDF_DETAILS)) 1107 if (dump_file && (dump_flags & TDF_DETAILS))
1089 dump_rdg_partitions (dump_file, partitions); 1108 dump_rdg_partitions (dump_file, partitions);
1090 1109
1091 for (i = 0; VEC_iterate (bitmap, partitions, i, partition); i++) 1110 FOR_EACH_VEC_ELT (bitmap, partitions, i, partition)
1092 if (!generate_code_for_partition (loop, partition, i < nbp - 1)) 1111 if (!generate_code_for_partition (loop, partition, i < nbp - 1))
1093 goto ldist_done; 1112 goto ldist_done;
1094 1113
1095 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); 1114 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
1096 update_ssa (TODO_update_ssa_only_virtuals | TODO_update_ssa); 1115 update_ssa (TODO_update_ssa_only_virtuals | TODO_update_ssa);
1098 ldist_done: 1117 ldist_done:
1099 1118
1100 BITMAP_FREE (remaining_stmts); 1119 BITMAP_FREE (remaining_stmts);
1101 BITMAP_FREE (upstream_mem_writes); 1120 BITMAP_FREE (upstream_mem_writes);
1102 1121
1103 for (i = 0; VEC_iterate (bitmap, partitions, i, partition); i++) 1122 FOR_EACH_VEC_ELT (bitmap, partitions, i, partition)
1104 BITMAP_FREE (partition); 1123 BITMAP_FREE (partition);
1105 1124
1106 VEC_free (int, heap, other_stores); 1125 VEC_free (int, heap, other_stores);
1107 VEC_free (bitmap, heap, partitions); 1126 VEC_free (bitmap, heap, partitions);
1108 free_rdg_components (components); 1127 free_rdg_components (components);
1121 int res = 0; 1140 int res = 0;
1122 struct graph *rdg; 1141 struct graph *rdg;
1123 gimple s; 1142 gimple s;
1124 unsigned i; 1143 unsigned i;
1125 VEC (int, heap) *vertices; 1144 VEC (int, heap) *vertices;
1145 VEC (ddr_p, heap) *dependence_relations;
1146 VEC (data_reference_p, heap) *datarefs;
1147 VEC (loop_p, heap) *loop_nest;
1126 1148
1127 if (loop->num_nodes > 2) 1149 if (loop->num_nodes > 2)
1128 { 1150 {
1129 if (dump_file && (dump_flags & TDF_DETAILS)) 1151 if (dump_file && (dump_flags & TDF_DETAILS))
1130 fprintf (dump_file, 1152 fprintf (dump_file,
1132 loop->num); 1154 loop->num);
1133 1155
1134 return res; 1156 return res;
1135 } 1157 }
1136 1158
1137 rdg = build_rdg (loop); 1159 datarefs = VEC_alloc (data_reference_p, heap, 10);
1160 dependence_relations = VEC_alloc (ddr_p, heap, 100);
1161 loop_nest = VEC_alloc (loop_p, heap, 3);
1162 rdg = build_rdg (loop, &loop_nest, &dependence_relations, &datarefs);
1138 1163
1139 if (!rdg) 1164 if (!rdg)
1140 { 1165 {
1141 if (dump_file && (dump_flags & TDF_DETAILS)) 1166 if (dump_file && (dump_flags & TDF_DETAILS))
1142 fprintf (dump_file, 1167 fprintf (dump_file,
1143 "FIXME: Loop %d not distributed: failed to build the RDG.\n", 1168 "FIXME: Loop %d not distributed: failed to build the RDG.\n",
1144 loop->num); 1169 loop->num);
1145 1170
1171 free_dependence_relations (dependence_relations);
1172 free_data_refs (datarefs);
1173 VEC_free (loop_p, heap, loop_nest);
1146 return res; 1174 return res;
1147 } 1175 }
1148 1176
1149 vertices = VEC_alloc (int, heap, 3); 1177 vertices = VEC_alloc (int, heap, 3);
1150 1178
1151 if (dump_file && (dump_flags & TDF_DETAILS)) 1179 if (dump_file && (dump_flags & TDF_DETAILS))
1152 dump_rdg (dump_file, rdg); 1180 dump_rdg (dump_file, rdg);
1153 1181
1154 for (i = 0; VEC_iterate (gimple, stmts, i, s); i++) 1182 FOR_EACH_VEC_ELT (gimple, stmts, i, s)
1155 { 1183 {
1156 int v = rdg_vertex_for_stmt (rdg, s); 1184 int v = rdg_vertex_for_stmt (rdg, s);
1157 1185
1158 if (v >= 0) 1186 if (v >= 0)
1159 { 1187 {
1166 } 1194 }
1167 1195
1168 res = ldist_gen (loop, rdg, vertices); 1196 res = ldist_gen (loop, rdg, vertices);
1169 VEC_free (int, heap, vertices); 1197 VEC_free (int, heap, vertices);
1170 free_rdg (rdg); 1198 free_rdg (rdg);
1171 1199 free_dependence_relations (dependence_relations);
1200 free_data_refs (datarefs);
1201 VEC_free (loop_p, heap, loop_nest);
1172 return res; 1202 return res;
1173 } 1203 }
1174 1204
1175 /* Distribute all loops in the current function. */ 1205 /* Distribute all loops in the current function. */
1176 1206
1181 loop_iterator li; 1211 loop_iterator li;
1182 int nb_generated_loops = 0; 1212 int nb_generated_loops = 0;
1183 1213
1184 FOR_EACH_LOOP (li, loop, 0) 1214 FOR_EACH_LOOP (li, loop, 0)
1185 { 1215 {
1186 VEC (gimple, heap) *work_list = VEC_alloc (gimple, heap, 3); 1216 VEC (gimple, heap) *work_list = NULL;
1187 1217 int num = loop->num;
1188 /* With the following working list, we're asking distribute_loop 1218
1189 to separate the stores of the loop: when dependences allow, 1219 /* If the loop doesn't have a single exit we will fail anyway,
1190 it will end on having one store per loop. */ 1220 so do that early. */
1191 stores_from_loop (loop, &work_list); 1221 if (!single_exit (loop))
1192 1222 continue;
1193 /* A simple heuristic for cache locality is to not split stores 1223
1194 to the same array. Without this call, an unrolled loop would 1224 /* If both flag_tree_loop_distribute_patterns and
1195 be split into as many loops as unroll factor, each loop 1225 flag_tree_loop_distribution are set, then only
1196 storing in the same array. */ 1226 distribute_patterns is executed. */
1197 remove_similar_memory_refs (&work_list); 1227 if (flag_tree_loop_distribute_patterns)
1198 1228 {
1199 nb_generated_loops = distribute_loop (loop, work_list); 1229 /* With the following working list, we're asking
1230 distribute_loop to separate from the rest of the loop the
1231 stores of the form "A[i] = 0". */
1232 stores_zero_from_loop (loop, &work_list);
1233
1234 /* Do nothing if there are no patterns to be distributed. */
1235 if (VEC_length (gimple, work_list) > 0)
1236 nb_generated_loops = distribute_loop (loop, work_list);
1237 }
1238 else if (flag_tree_loop_distribution)
1239 {
1240 /* With the following working list, we're asking
1241 distribute_loop to separate the stores of the loop: when
1242 dependences allow, it will end on having one store per
1243 loop. */
1244 stores_from_loop (loop, &work_list);
1245
1246 /* A simple heuristic for cache locality is to not split
1247 stores to the same array. Without this call, an unrolled
1248 loop would be split into as many loops as unroll factor,
1249 each loop storing in the same array. */
1250 remove_similar_memory_refs (&work_list);
1251
1252 nb_generated_loops = distribute_loop (loop, work_list);
1253 }
1200 1254
1201 if (dump_file && (dump_flags & TDF_DETAILS)) 1255 if (dump_file && (dump_flags & TDF_DETAILS))
1202 { 1256 {
1203 if (nb_generated_loops > 1) 1257 if (nb_generated_loops > 1)
1204 fprintf (dump_file, "Loop %d distributed: split to %d loops.\n", 1258 fprintf (dump_file, "Loop %d distributed: split to %d loops.\n",
1205 loop->num, nb_generated_loops); 1259 num, nb_generated_loops);
1206 else 1260 else
1207 fprintf (dump_file, "Loop %d is the same.\n", loop->num); 1261 fprintf (dump_file, "Loop %d is the same.\n", num);
1208 } 1262 }
1209 1263
1210 verify_loop_structure (); 1264 verify_loop_structure ();
1211 1265
1212 VEC_free (gimple, heap, work_list); 1266 VEC_free (gimple, heap, work_list);
1216 } 1270 }
1217 1271
1218 static bool 1272 static bool
1219 gate_tree_loop_distribution (void) 1273 gate_tree_loop_distribution (void)
1220 { 1274 {
1221 return flag_tree_loop_distribution != 0; 1275 return flag_tree_loop_distribution
1276 || flag_tree_loop_distribute_patterns;
1222 } 1277 }
1223 1278
1224 struct gimple_opt_pass pass_loop_distribution = 1279 struct gimple_opt_pass pass_loop_distribution =
1225 { 1280 {
1226 { 1281 {