Mercurial > hg > CbC > CbC_gcc
comparison gcc/tree-loop-distribution.c @ 67:f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
author | nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 22 Mar 2011 17:18:12 +0900 |
parents | b7f97abdc517 |
children | 04ced10e8804 |
comparison
equal
deleted
inserted
replaced
65:65488c3d617d | 67:f6334be47118 |
---|---|
43 generates the new loops. */ | 43 generates the new loops. */ |
44 | 44 |
45 #include "config.h" | 45 #include "config.h" |
46 #include "system.h" | 46 #include "system.h" |
47 #include "coretypes.h" | 47 #include "coretypes.h" |
48 #include "tm.h" | |
49 #include "tree.h" | |
50 #include "basic-block.h" | |
51 #include "diagnostic.h" | |
52 #include "tree-flow.h" | 48 #include "tree-flow.h" |
53 #include "tree-dump.h" | |
54 #include "timevar.h" | |
55 #include "cfgloop.h" | 49 #include "cfgloop.h" |
56 #include "expr.h" | |
57 #include "optabs.h" | |
58 #include "tree-chrec.h" | 50 #include "tree-chrec.h" |
59 #include "tree-data-ref.h" | 51 #include "tree-data-ref.h" |
60 #include "tree-scalar-evolution.h" | 52 #include "tree-scalar-evolution.h" |
61 #include "tree-pass.h" | 53 #include "tree-pass.h" |
62 #include "lambda.h" | |
63 #include "langhooks.h" | |
64 #include "tree-vectorizer.h" | |
65 | 54 |
66 /* If bit I is not set, it means that this node represents an | 55 /* If bit I is not set, it means that this node represents an |
67 operation that has already been performed, and that should not be | 56 operation that has already been performed, and that should not be |
68 performed again. This is the subgraph of remaining important | 57 performed again. This is the subgraph of remaining important |
69 computations that is passed to the DFS algorithm for avoiding to | 58 computations that is passed to the DFS algorithm for avoiding to |
196 { | 185 { |
197 basic_block bb = bbs[i]; | 186 basic_block bb = bbs[i]; |
198 | 187 |
199 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);) | 188 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi);) |
200 if (!bitmap_bit_p (partition, x++)) | 189 if (!bitmap_bit_p (partition, x++)) |
201 remove_phi_node (&bsi, true); | 190 { |
191 gimple phi = gsi_stmt (bsi); | |
192 if (!is_gimple_reg (gimple_phi_result (phi))) | |
193 mark_virtual_phi_result_for_renaming (phi); | |
194 remove_phi_node (&bsi, true); | |
195 } | |
202 else | 196 else |
203 gsi_next (&bsi); | 197 gsi_next (&bsi); |
204 | 198 |
205 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);) | 199 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi);) |
206 if (gimple_code (gsi_stmt (bsi)) != GIMPLE_LABEL | 200 { |
207 && !bitmap_bit_p (partition, x++)) | 201 gimple stmt = gsi_stmt (bsi); |
208 gsi_remove (&bsi, false); | 202 if (gimple_code (gsi_stmt (bsi)) != GIMPLE_LABEL |
209 else | 203 && !bitmap_bit_p (partition, x++)) |
210 gsi_next (&bsi); | 204 { |
211 | 205 unlink_stmt_vdef (stmt); |
212 mark_virtual_ops_in_bb (bb); | 206 gsi_remove (&bsi, true); |
207 release_defs (stmt); | |
208 } | |
209 else | |
210 gsi_next (&bsi); | |
211 } | |
213 } | 212 } |
214 | 213 |
215 free (bbs); | 214 free (bbs); |
216 return true; | 215 return true; |
217 } | 216 } |
232 return x; | 231 return x; |
233 } | 232 } |
234 | 233 |
235 /* Generate a call to memset. Return true when the operation succeeded. */ | 234 /* Generate a call to memset. Return true when the operation succeeded. */ |
236 | 235 |
237 static bool | 236 static void |
238 generate_memset_zero (gimple stmt, tree op0, tree nb_iter, | 237 generate_memset_zero (gimple stmt, tree op0, tree nb_iter, |
239 gimple_stmt_iterator bsi) | 238 gimple_stmt_iterator bsi) |
240 { | 239 { |
241 tree addr_base, nb_bytes; | 240 tree addr_base, nb_bytes; |
242 bool res = false; | 241 bool res = false; |
243 gimple_seq stmt_list = NULL, stmts; | 242 gimple_seq stmt_list = NULL, stmts; |
244 gimple fn_call; | 243 gimple fn_call; |
245 tree mem, fn; | 244 tree mem, fn; |
246 gimple_stmt_iterator i; | |
247 struct data_reference *dr = XCNEW (struct data_reference); | 245 struct data_reference *dr = XCNEW (struct data_reference); |
248 location_t loc = gimple_location (stmt); | 246 location_t loc = gimple_location (stmt); |
249 | 247 |
250 DR_STMT (dr) = stmt; | 248 DR_STMT (dr) = stmt; |
251 DR_REF (dr) = op0; | 249 DR_REF (dr) = op0; |
252 if (!dr_analyze_innermost (dr)) | 250 res = dr_analyze_innermost (dr); |
253 goto end; | 251 gcc_assert (res && stride_of_unit_type_p (DR_STEP (dr), TREE_TYPE (op0))); |
254 | 252 |
255 /* Test for a positive stride, iterating over every element. */ | 253 nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list); |
256 if (integer_zerop (size_binop (MINUS_EXPR, | 254 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr)); |
257 fold_convert (sizetype, DR_STEP (dr)), | 255 addr_base = fold_convert_loc (loc, sizetype, addr_base); |
258 TYPE_SIZE_UNIT (TREE_TYPE (op0))))) | |
259 { | |
260 addr_base = fold_convert_loc (loc, sizetype, | |
261 size_binop_loc (loc, PLUS_EXPR, | |
262 DR_OFFSET (dr), | |
263 DR_INIT (dr))); | |
264 addr_base = fold_build2_loc (loc, POINTER_PLUS_EXPR, | |
265 TREE_TYPE (DR_BASE_ADDRESS (dr)), | |
266 DR_BASE_ADDRESS (dr), addr_base); | |
267 | |
268 nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list); | |
269 } | |
270 | 256 |
271 /* Test for a negative stride, iterating over every element. */ | 257 /* Test for a negative stride, iterating over every element. */ |
272 else if (integer_zerop (size_binop (PLUS_EXPR, | 258 if (integer_zerop (size_binop (PLUS_EXPR, |
273 TYPE_SIZE_UNIT (TREE_TYPE (op0)), | 259 TYPE_SIZE_UNIT (TREE_TYPE (op0)), |
274 fold_convert (sizetype, DR_STEP (dr))))) | 260 fold_convert (sizetype, DR_STEP (dr))))) |
275 { | 261 { |
276 nb_bytes = build_size_arg_loc (loc, nb_iter, op0, &stmt_list); | |
277 | |
278 addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr)); | |
279 addr_base = fold_convert_loc (loc, sizetype, addr_base); | |
280 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base, | 262 addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base, |
281 fold_convert_loc (loc, sizetype, nb_bytes)); | 263 fold_convert_loc (loc, sizetype, nb_bytes)); |
282 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base, | 264 addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base, |
283 TYPE_SIZE_UNIT (TREE_TYPE (op0))); | 265 TYPE_SIZE_UNIT (TREE_TYPE (op0))); |
284 addr_base = fold_build2_loc (loc, POINTER_PLUS_EXPR, | 266 } |
285 TREE_TYPE (DR_BASE_ADDRESS (dr)), | 267 |
286 DR_BASE_ADDRESS (dr), addr_base); | 268 addr_base = fold_build2_loc (loc, POINTER_PLUS_EXPR, |
287 } | 269 TREE_TYPE (DR_BASE_ADDRESS (dr)), |
288 else | 270 DR_BASE_ADDRESS (dr), addr_base); |
289 goto end; | |
290 | |
291 mem = force_gimple_operand (addr_base, &stmts, true, NULL); | 271 mem = force_gimple_operand (addr_base, &stmts, true, NULL); |
292 gimple_seq_add_seq (&stmt_list, stmts); | 272 gimple_seq_add_seq (&stmt_list, stmts); |
293 | 273 |
294 fn = build_fold_addr_expr (implicit_built_in_decls [BUILT_IN_MEMSET]); | 274 fn = build_fold_addr_expr (implicit_built_in_decls [BUILT_IN_MEMSET]); |
295 fn_call = gimple_build_call (fn, 3, mem, integer_zero_node, nb_bytes); | 275 fn_call = gimple_build_call (fn, 3, mem, integer_zero_node, nb_bytes); |
296 gimple_seq_add_stmt (&stmt_list, fn_call); | 276 gimple_seq_add_stmt (&stmt_list, fn_call); |
297 | |
298 for (i = gsi_start (stmt_list); !gsi_end_p (i); gsi_next (&i)) | |
299 { | |
300 gimple s = gsi_stmt (i); | |
301 update_stmt_if_modified (s); | |
302 } | |
303 | |
304 gsi_insert_seq_after (&bsi, stmt_list, GSI_CONTINUE_LINKING); | 277 gsi_insert_seq_after (&bsi, stmt_list, GSI_CONTINUE_LINKING); |
305 res = true; | |
306 | 278 |
307 if (dump_file && (dump_flags & TDF_DETAILS)) | 279 if (dump_file && (dump_flags & TDF_DETAILS)) |
308 fprintf (dump_file, "generated memset zero\n"); | 280 fprintf (dump_file, "generated memset zero\n"); |
309 | 281 |
310 end: | |
311 free_data_ref (dr); | 282 free_data_ref (dr); |
312 return res; | |
313 } | |
314 | |
315 /* Propagate phis in BB b to their uses and remove them. */ | |
316 | |
317 static void | |
318 prop_phis (basic_block b) | |
319 { | |
320 gimple_stmt_iterator psi; | |
321 gimple_seq phis = phi_nodes (b); | |
322 | |
323 for (psi = gsi_start (phis); !gsi_end_p (psi); ) | |
324 { | |
325 gimple phi = gsi_stmt (psi); | |
326 tree def = gimple_phi_result (phi), use = gimple_phi_arg_def (phi, 0); | |
327 | |
328 gcc_assert (gimple_phi_num_args (phi) == 1); | |
329 | |
330 if (!is_gimple_reg (def)) | |
331 { | |
332 imm_use_iterator iter; | |
333 use_operand_p use_p; | |
334 gimple stmt; | |
335 | |
336 FOR_EACH_IMM_USE_STMT (stmt, iter, def) | |
337 FOR_EACH_IMM_USE_ON_STMT (use_p, iter) | |
338 SET_USE (use_p, use); | |
339 } | |
340 else | |
341 replace_uses_by (def, use); | |
342 | |
343 remove_phi_node (&psi, true); | |
344 } | |
345 } | 283 } |
346 | 284 |
347 /* Tries to generate a builtin function for the instructions of LOOP | 285 /* Tries to generate a builtin function for the instructions of LOOP |
348 pointed to by the bits set in PARTITION. Returns true when the | 286 pointed to by the bits set in PARTITION. Returns true when the |
349 operation succeeded. */ | 287 operation succeeded. */ |
353 { | 291 { |
354 bool res = false; | 292 bool res = false; |
355 unsigned i, x = 0; | 293 unsigned i, x = 0; |
356 basic_block *bbs; | 294 basic_block *bbs; |
357 gimple write = NULL; | 295 gimple write = NULL; |
358 tree op0, op1; | |
359 gimple_stmt_iterator bsi; | 296 gimple_stmt_iterator bsi; |
360 tree nb_iter = number_of_exit_cond_executions (loop); | 297 tree nb_iter = number_of_exit_cond_executions (loop); |
361 | 298 |
362 if (!nb_iter || nb_iter == chrec_dont_know) | 299 if (!nb_iter || nb_iter == chrec_dont_know) |
363 return false; | 300 return false; |
389 nb_iter = number_of_latch_executions (loop); | 326 nb_iter = number_of_latch_executions (loop); |
390 } | 327 } |
391 } | 328 } |
392 } | 329 } |
393 | 330 |
394 if (!write) | 331 if (!stmt_with_adjacent_zero_store_dr_p (write)) |
395 goto end; | |
396 | |
397 op0 = gimple_assign_lhs (write); | |
398 op1 = gimple_assign_rhs1 (write); | |
399 | |
400 if (!(TREE_CODE (op0) == ARRAY_REF | |
401 || TREE_CODE (op0) == INDIRECT_REF)) | |
402 goto end; | 332 goto end; |
403 | 333 |
404 /* The new statements will be placed before LOOP. */ | 334 /* The new statements will be placed before LOOP. */ |
405 bsi = gsi_last_bb (loop_preheader_edge (loop)->src); | 335 bsi = gsi_last_bb (loop_preheader_edge (loop)->src); |
406 | 336 generate_memset_zero (write, gimple_assign_lhs (write), nb_iter, bsi); |
407 if (gimple_assign_rhs_code (write) == INTEGER_CST | 337 res = true; |
408 && (integer_zerop (op1) || real_zerop (op1))) | |
409 res = generate_memset_zero (write, op0, nb_iter, bsi); | |
410 | 338 |
411 /* If this is the last partition for which we generate code, we have | 339 /* If this is the last partition for which we generate code, we have |
412 to destroy the loop. */ | 340 to destroy the loop. */ |
413 if (res && !copy_p) | 341 if (!copy_p) |
414 { | 342 { |
415 unsigned nbbs = loop->num_nodes; | 343 unsigned nbbs = loop->num_nodes; |
416 basic_block src = loop_preheader_edge (loop)->src; | 344 edge exit = single_exit (loop); |
417 basic_block dest = single_exit (loop)->dest; | 345 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest; |
418 prop_phis (dest); | 346 redirect_edge_pred (exit, src); |
419 make_edge (src, dest, EDGE_FALLTHRU); | 347 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE); |
348 exit->flags |= EDGE_FALLTHRU; | |
420 cancel_loop_tree (loop); | 349 cancel_loop_tree (loop); |
350 rescan_loop_exit (exit, false, true); | |
421 | 351 |
422 for (i = 0; i < nbbs; i++) | 352 for (i = 0; i < nbbs; i++) |
423 delete_basic_block (bbs[i]); | 353 delete_basic_block (bbs[i]); |
424 | 354 |
425 set_immediate_dominator (CDI_DOMINATORS, dest, | 355 set_immediate_dominator (CDI_DOMINATORS, dest, |
516 unsigned i; | 446 unsigned i; |
517 VEC (int, heap) *nodes = VEC_alloc (int, heap, 3); | 447 VEC (int, heap) *nodes = VEC_alloc (int, heap, 3); |
518 | 448 |
519 graphds_dfs (rdg, &v, 1, &nodes, false, NULL); | 449 graphds_dfs (rdg, &v, 1, &nodes, false, NULL); |
520 | 450 |
521 for (i = 0; VEC_iterate (int, nodes, i, x); i++) | 451 FOR_EACH_VEC_ELT (int, nodes, i, x) |
522 { | 452 { |
523 if (bitmap_bit_p (seen, x)) | 453 if (!bitmap_set_bit (seen, x)) |
524 continue; | 454 continue; |
525 | |
526 bitmap_set_bit (seen, x); | |
527 | 455 |
528 if (RDG_MEM_WRITE_STMT (rdg, x) | 456 if (RDG_MEM_WRITE_STMT (rdg, x) |
529 || predecessor_has_mem_write (rdg, &(rdg->vertices[x])) | 457 || predecessor_has_mem_write (rdg, &(rdg->vertices[x])) |
530 /* In anti dependences the read should occur before | 458 /* In anti dependences the read should occur before |
531 the write, this is why both the read and the write | 459 the write, this is why both the read and the write |
550 } | 478 } |
551 | 479 |
552 static void rdg_flag_vertex_and_dependent (struct graph *, int, bitmap, bitmap, | 480 static void rdg_flag_vertex_and_dependent (struct graph *, int, bitmap, bitmap, |
553 bitmap, bool *); | 481 bitmap, bool *); |
554 | 482 |
555 /* Flag all the uses of U. */ | |
556 | |
557 static void | |
558 rdg_flag_all_uses (struct graph *rdg, int u, bitmap partition, bitmap loops, | |
559 bitmap processed, bool *part_has_writes) | |
560 { | |
561 struct graph_edge *e; | |
562 | |
563 for (e = rdg->vertices[u].succ; e; e = e->succ_next) | |
564 if (!bitmap_bit_p (processed, e->dest)) | |
565 { | |
566 rdg_flag_vertex_and_dependent (rdg, e->dest, partition, loops, | |
567 processed, part_has_writes); | |
568 rdg_flag_all_uses (rdg, e->dest, partition, loops, processed, | |
569 part_has_writes); | |
570 } | |
571 } | |
572 | |
573 /* Flag the uses of U stopping following the information from | 483 /* Flag the uses of U stopping following the information from |
574 upstream_mem_writes. */ | 484 upstream_mem_writes. */ |
575 | 485 |
576 static void | 486 static void |
577 rdg_flag_uses (struct graph *rdg, int u, bitmap partition, bitmap loops, | 487 rdg_flag_uses (struct graph *rdg, int u, bitmap partition, bitmap loops, |
643 rdg_flag_vertex (struct graph *rdg, int v, bitmap partition, bitmap loops, | 553 rdg_flag_vertex (struct graph *rdg, int v, bitmap partition, bitmap loops, |
644 bool *part_has_writes) | 554 bool *part_has_writes) |
645 { | 555 { |
646 struct loop *loop; | 556 struct loop *loop; |
647 | 557 |
648 if (bitmap_bit_p (partition, v)) | 558 if (!bitmap_set_bit (partition, v)) |
649 return; | 559 return; |
650 | 560 |
651 loop = loop_containing_stmt (RDG_STMT (rdg, v)); | 561 loop = loop_containing_stmt (RDG_STMT (rdg, v)); |
652 bitmap_set_bit (loops, loop->num); | 562 bitmap_set_bit (loops, loop->num); |
653 bitmap_set_bit (partition, v); | |
654 | 563 |
655 if (rdg_cannot_recompute_vertex_p (rdg, v)) | 564 if (rdg_cannot_recompute_vertex_p (rdg, v)) |
656 { | 565 { |
657 *part_has_writes = true; | 566 *part_has_writes = true; |
658 bitmap_clear_bit (remaining_stmts, v); | 567 bitmap_clear_bit (remaining_stmts, v); |
674 bitmap_set_bit (processed, v); | 583 bitmap_set_bit (processed, v); |
675 rdg_flag_uses (rdg, v, partition, loops, processed, part_has_writes); | 584 rdg_flag_uses (rdg, v, partition, loops, processed, part_has_writes); |
676 graphds_dfs (rdg, &v, 1, &nodes, false, remaining_stmts); | 585 graphds_dfs (rdg, &v, 1, &nodes, false, remaining_stmts); |
677 rdg_flag_vertex (rdg, v, partition, loops, part_has_writes); | 586 rdg_flag_vertex (rdg, v, partition, loops, part_has_writes); |
678 | 587 |
679 for (i = 0; VEC_iterate (int, nodes, i, x); i++) | 588 FOR_EACH_VEC_ELT (int, nodes, i, x) |
680 if (!already_processed_vertex_p (processed, x)) | 589 if (!already_processed_vertex_p (processed, x)) |
681 rdg_flag_vertex_and_dependent (rdg, x, partition, loops, processed, | 590 rdg_flag_vertex_and_dependent (rdg, x, partition, loops, processed, |
682 part_has_writes); | 591 part_has_writes); |
683 | 592 |
684 VEC_free (int, heap, nodes); | 593 VEC_free (int, heap, nodes); |
692 { | 601 { |
693 unsigned i; | 602 unsigned i; |
694 edge e; | 603 edge e; |
695 VEC (edge, heap) *exits = get_loop_exit_edges (loop); | 604 VEC (edge, heap) *exits = get_loop_exit_edges (loop); |
696 | 605 |
697 for (i = 0; VEC_iterate (edge, exits, i, e); i++) | 606 FOR_EACH_VEC_ELT (edge, exits, i, e) |
698 { | 607 { |
699 gimple cond = last_stmt (e->src); | 608 gimple cond = last_stmt (e->src); |
700 | 609 |
701 if (cond) | 610 if (cond) |
702 VEC_safe_push (gimple, heap, *conds, cond); | 611 VEC_safe_push (gimple, heap, *conds, cond); |
729 if (!already_processed_vertex_p (processed, v)) | 638 if (!already_processed_vertex_p (processed, v)) |
730 rdg_flag_vertex_and_dependent (rdg, v, partition, new_loops, processed, | 639 rdg_flag_vertex_and_dependent (rdg, v, partition, new_loops, processed, |
731 part_has_writes); | 640 part_has_writes); |
732 | 641 |
733 EXECUTE_IF_SET_IN_BITMAP (new_loops, 0, i, bi) | 642 EXECUTE_IF_SET_IN_BITMAP (new_loops, 0, i, bi) |
734 if (!bitmap_bit_p (loops, i)) | 643 if (bitmap_set_bit (loops, i)) |
735 { | 644 collect_condition_stmts (get_loop (i), &conds); |
736 bitmap_set_bit (loops, i); | |
737 collect_condition_stmts (get_loop (i), &conds); | |
738 } | |
739 | 645 |
740 BITMAP_FREE (new_loops); | 646 BITMAP_FREE (new_loops); |
741 } | 647 } |
742 } | 648 |
743 | 649 VEC_free (gimple, heap, conds); |
744 /* Flag all the nodes of RDG containing memory accesses that could | |
745 potentially belong to arrays already accessed in the current | |
746 PARTITION. */ | |
747 | |
748 static void | |
749 rdg_flag_similar_memory_accesses (struct graph *rdg, bitmap partition, | |
750 bitmap loops, bitmap processed, | |
751 VEC (int, heap) **other_stores) | |
752 { | |
753 bool foo; | |
754 unsigned i, n; | |
755 int j, k, kk; | |
756 bitmap_iterator ii; | |
757 struct graph_edge *e; | |
758 | |
759 EXECUTE_IF_SET_IN_BITMAP (partition, 0, i, ii) | |
760 if (RDG_MEM_WRITE_STMT (rdg, i) | |
761 || RDG_MEM_READS_STMT (rdg, i)) | |
762 { | |
763 for (j = 0; j < rdg->n_vertices; j++) | |
764 if (!bitmap_bit_p (processed, j) | |
765 && (RDG_MEM_WRITE_STMT (rdg, j) | |
766 || RDG_MEM_READS_STMT (rdg, j)) | |
767 && rdg_has_similar_memory_accesses (rdg, i, j)) | |
768 { | |
769 /* Flag first the node J itself, and all the nodes that | |
770 are needed to compute J. */ | |
771 rdg_flag_vertex_and_dependent (rdg, j, partition, loops, | |
772 processed, &foo); | |
773 | |
774 /* When J is a read, we want to coalesce in the same | |
775 PARTITION all the nodes that are using J: this is | |
776 needed for better cache locality. */ | |
777 rdg_flag_all_uses (rdg, j, partition, loops, processed, &foo); | |
778 | |
779 /* Remove from OTHER_STORES the vertex that we flagged. */ | |
780 if (RDG_MEM_WRITE_STMT (rdg, j)) | |
781 for (k = 0; VEC_iterate (int, *other_stores, k, kk); k++) | |
782 if (kk == j) | |
783 { | |
784 VEC_unordered_remove (int, *other_stores, k); | |
785 break; | |
786 } | |
787 } | |
788 | |
789 /* If the node I has two uses, then keep these together in the | |
790 same PARTITION. */ | |
791 for (n = 0, e = rdg->vertices[i].succ; e; e = e->succ_next, n++); | |
792 | |
793 if (n > 1) | |
794 rdg_flag_all_uses (rdg, i, partition, loops, processed, &foo); | |
795 } | |
796 } | 650 } |
797 | 651 |
798 /* Returns a bitmap in which all the statements needed for computing | 652 /* Returns a bitmap in which all the statements needed for computing |
799 the strongly connected component C of the RDG are flagged, also | 653 the strongly connected component C of the RDG are flagged, also |
800 including the loop exit conditions. */ | 654 including the loop exit conditions. */ |
801 | 655 |
802 static bitmap | 656 static bitmap |
803 build_rdg_partition_for_component (struct graph *rdg, rdgc c, | 657 build_rdg_partition_for_component (struct graph *rdg, rdgc c, |
804 bool *part_has_writes, | 658 bool *part_has_writes) |
805 VEC (int, heap) **other_stores) | |
806 { | 659 { |
807 int i, v; | 660 int i, v; |
808 bitmap partition = BITMAP_ALLOC (NULL); | 661 bitmap partition = BITMAP_ALLOC (NULL); |
809 bitmap loops = BITMAP_ALLOC (NULL); | 662 bitmap loops = BITMAP_ALLOC (NULL); |
810 bitmap processed = BITMAP_ALLOC (NULL); | 663 bitmap processed = BITMAP_ALLOC (NULL); |
811 | 664 |
812 for (i = 0; VEC_iterate (int, c->vertices, i, v); i++) | 665 FOR_EACH_VEC_ELT (int, c->vertices, i, v) |
813 if (!already_processed_vertex_p (processed, v)) | 666 if (!already_processed_vertex_p (processed, v)) |
814 rdg_flag_vertex_and_dependent (rdg, v, partition, loops, processed, | 667 rdg_flag_vertex_and_dependent (rdg, v, partition, loops, processed, |
815 part_has_writes); | 668 part_has_writes); |
816 | 669 |
817 /* Also iterate on the array of stores not in the starting vertices, | |
818 and determine those vertices that have some memory affinity with | |
819 the current nodes in the component: these are stores to the same | |
820 arrays, i.e. we're taking care of cache locality. */ | |
821 rdg_flag_similar_memory_accesses (rdg, partition, loops, processed, | |
822 other_stores); | |
823 | |
824 rdg_flag_loop_exits (rdg, loops, partition, processed, part_has_writes); | 670 rdg_flag_loop_exits (rdg, loops, partition, processed, part_has_writes); |
825 | 671 |
826 BITMAP_FREE (processed); | 672 BITMAP_FREE (processed); |
827 BITMAP_FREE (loops); | 673 BITMAP_FREE (loops); |
828 return partition; | 674 return partition; |
834 free_rdg_components (VEC (rdgc, heap) *components) | 680 free_rdg_components (VEC (rdgc, heap) *components) |
835 { | 681 { |
836 int i; | 682 int i; |
837 rdgc x; | 683 rdgc x; |
838 | 684 |
839 for (i = 0; VEC_iterate (rdgc, components, i, x); i++) | 685 FOR_EACH_VEC_ELT (rdgc, components, i, x) |
840 { | 686 { |
841 VEC_free (int, heap, x->vertices); | 687 VEC_free (int, heap, x->vertices); |
842 free (x); | 688 free (x); |
843 } | 689 } |
690 | |
691 VEC_free (rdgc, heap, components); | |
844 } | 692 } |
845 | 693 |
846 /* Build the COMPONENTS vector with the strongly connected components | 694 /* Build the COMPONENTS vector with the strongly connected components |
847 of RDG in which the STARTING_VERTICES occur. */ | 695 of RDG in which the STARTING_VERTICES occur. */ |
848 | 696 |
859 all_components[i] = VEC_alloc (int, heap, 3); | 707 all_components[i] = VEC_alloc (int, heap, 3); |
860 | 708 |
861 for (i = 0; i < rdg->n_vertices; i++) | 709 for (i = 0; i < rdg->n_vertices; i++) |
862 VEC_safe_push (int, heap, all_components[rdg->vertices[i].component], i); | 710 VEC_safe_push (int, heap, all_components[rdg->vertices[i].component], i); |
863 | 711 |
864 for (i = 0; VEC_iterate (int, starting_vertices, i, v); i++) | 712 FOR_EACH_VEC_ELT (int, starting_vertices, i, v) |
865 { | 713 { |
866 int c = rdg->vertices[v].component; | 714 int c = rdg->vertices[v].component; |
867 | 715 |
868 if (!bitmap_bit_p (saved_components, c)) | 716 if (bitmap_set_bit (saved_components, c)) |
869 { | 717 { |
870 rdgc x = XCNEW (struct rdg_component); | 718 rdgc x = XCNEW (struct rdg_component); |
871 x->num = c; | 719 x->num = c; |
872 x->vertices = all_components[c]; | 720 x->vertices = all_components[c]; |
873 | 721 |
874 VEC_safe_push (rdgc, heap, *components, x); | 722 VEC_safe_push (rdgc, heap, *components, x); |
875 bitmap_set_bit (saved_components, c); | |
876 } | 723 } |
877 } | 724 } |
878 | 725 |
879 for (i = 0; i < n_components; i++) | 726 for (i = 0; i < n_components; i++) |
880 if (!bitmap_bit_p (saved_components, i)) | 727 if (!bitmap_bit_p (saved_components, i)) |
882 | 729 |
883 free (all_components); | 730 free (all_components); |
884 BITMAP_FREE (saved_components); | 731 BITMAP_FREE (saved_components); |
885 } | 732 } |
886 | 733 |
734 /* Returns true when it is possible to generate a builtin pattern for | |
735 the PARTITION of RDG. For the moment we detect only the memset | |
736 zero pattern. */ | |
737 | |
738 static bool | |
739 can_generate_builtin (struct graph *rdg, bitmap partition) | |
740 { | |
741 unsigned i; | |
742 bitmap_iterator bi; | |
743 int nb_reads = 0; | |
744 int nb_writes = 0; | |
745 int stores_zero = 0; | |
746 | |
747 EXECUTE_IF_SET_IN_BITMAP (partition, 0, i, bi) | |
748 if (RDG_MEM_READS_STMT (rdg, i)) | |
749 nb_reads++; | |
750 else if (RDG_MEM_WRITE_STMT (rdg, i)) | |
751 { | |
752 nb_writes++; | |
753 if (stmt_with_adjacent_zero_store_dr_p (RDG_STMT (rdg, i))) | |
754 stores_zero++; | |
755 } | |
756 | |
757 return stores_zero == 1 && nb_writes == 1 && nb_reads == 0; | |
758 } | |
759 | |
760 /* Returns true when PARTITION1 and PARTITION2 have similar memory | |
761 accesses in RDG. */ | |
762 | |
763 static bool | |
764 similar_memory_accesses (struct graph *rdg, bitmap partition1, | |
765 bitmap partition2) | |
766 { | |
767 unsigned i, j; | |
768 bitmap_iterator bi, bj; | |
769 | |
770 EXECUTE_IF_SET_IN_BITMAP (partition1, 0, i, bi) | |
771 if (RDG_MEM_WRITE_STMT (rdg, i) | |
772 || RDG_MEM_READS_STMT (rdg, i)) | |
773 EXECUTE_IF_SET_IN_BITMAP (partition2, 0, j, bj) | |
774 if (RDG_MEM_WRITE_STMT (rdg, j) | |
775 || RDG_MEM_READS_STMT (rdg, j)) | |
776 if (rdg_has_similar_memory_accesses (rdg, i, j)) | |
777 return true; | |
778 | |
779 return false; | |
780 } | |
781 | |
782 /* Fuse all the partitions from PARTITIONS that contain similar memory | |
783 references, i.e., we're taking care of cache locality. This | |
784 function does not fuse those partitions that contain patterns that | |
785 can be code generated with builtins. */ | |
786 | |
787 static void | |
788 fuse_partitions_with_similar_memory_accesses (struct graph *rdg, | |
789 VEC (bitmap, heap) **partitions) | |
790 { | |
791 int p1, p2; | |
792 bitmap partition1, partition2; | |
793 | |
794 FOR_EACH_VEC_ELT (bitmap, *partitions, p1, partition1) | |
795 if (!can_generate_builtin (rdg, partition1)) | |
796 FOR_EACH_VEC_ELT (bitmap, *partitions, p2, partition2) | |
797 if (p1 != p2 | |
798 && !can_generate_builtin (rdg, partition2) | |
799 && similar_memory_accesses (rdg, partition1, partition2)) | |
800 { | |
801 bitmap_ior_into (partition1, partition2); | |
802 VEC_ordered_remove (bitmap, *partitions, p2); | |
803 p2--; | |
804 } | |
805 } | |
806 | |
807 /* Returns true when DEF is an SSA_NAME defined in LOOP and used after | |
808 the LOOP. */ | |
809 | |
810 static bool | |
811 ssa_name_has_uses_outside_loop_p (tree def, loop_p loop) | |
812 { | |
813 imm_use_iterator imm_iter; | |
814 use_operand_p use_p; | |
815 | |
816 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def) | |
817 if (loop != loop_containing_stmt (USE_STMT (use_p))) | |
818 return true; | |
819 | |
820 return false; | |
821 } | |
822 | |
823 /* Returns true when STMT defines a scalar variable used after the | |
824 loop. */ | |
825 | |
826 static bool | |
827 stmt_has_scalar_dependences_outside_loop (gimple stmt) | |
828 { | |
829 tree name; | |
830 | |
831 switch (gimple_code (stmt)) | |
832 { | |
833 case GIMPLE_ASSIGN: | |
834 name = gimple_assign_lhs (stmt); | |
835 break; | |
836 | |
837 case GIMPLE_PHI: | |
838 name = gimple_phi_result (stmt); | |
839 break; | |
840 | |
841 default: | |
842 return false; | |
843 } | |
844 | |
845 return TREE_CODE (name) == SSA_NAME | |
846 && ssa_name_has_uses_outside_loop_p (name, loop_containing_stmt (stmt)); | |
847 } | |
848 | |
849 /* Returns true when STMT will be code generated in a partition of RDG | |
850 different than PART and that will not be code generated as a | |
851 builtin. */ | |
852 | |
853 static bool | |
854 stmt_generated_in_another_partition (struct graph *rdg, gimple stmt, int part, | |
855 VEC (bitmap, heap) *partitions) | |
856 { | |
857 int p; | |
858 bitmap pp; | |
859 unsigned i; | |
860 bitmap_iterator bi; | |
861 | |
862 FOR_EACH_VEC_ELT (bitmap, partitions, p, pp) | |
863 if (p != part | |
864 && !can_generate_builtin (rdg, pp)) | |
865 EXECUTE_IF_SET_IN_BITMAP (pp, 0, i, bi) | |
866 if (stmt == RDG_STMT (rdg, i)) | |
867 return true; | |
868 | |
869 return false; | |
870 } | |
871 | |
872 /* For each partition in PARTITIONS that will be code generated using | |
873 a builtin, add its scalar computations used after the loop to | |
874 PARTITION. */ | |
875 | |
876 static void | |
877 add_scalar_computations_to_partition (struct graph *rdg, | |
878 VEC (bitmap, heap) *partitions, | |
879 bitmap partition) | |
880 { | |
881 int p; | |
882 bitmap pp; | |
883 unsigned i; | |
884 bitmap_iterator bi; | |
885 bitmap l = BITMAP_ALLOC (NULL); | |
886 bitmap pr = BITMAP_ALLOC (NULL); | |
887 bool f = false; | |
888 | |
889 FOR_EACH_VEC_ELT (bitmap, partitions, p, pp) | |
890 if (can_generate_builtin (rdg, pp)) | |
891 EXECUTE_IF_SET_IN_BITMAP (pp, 0, i, bi) | |
892 if (stmt_has_scalar_dependences_outside_loop (RDG_STMT (rdg, i)) | |
893 && !stmt_generated_in_another_partition (rdg, RDG_STMT (rdg, i), p, | |
894 partitions)) | |
895 rdg_flag_vertex_and_dependent (rdg, i, partition, l, pr, &f); | |
896 | |
897 rdg_flag_loop_exits (rdg, l, partition, pr, &f); | |
898 | |
899 BITMAP_FREE (pr); | |
900 BITMAP_FREE (l); | |
901 } | |
902 | |
887 /* Aggregate several components into a useful partition that is | 903 /* Aggregate several components into a useful partition that is |
888 registered in the PARTITIONS vector. Partitions will be | 904 registered in the PARTITIONS vector. Partitions will be |
889 distributed in different loops. */ | 905 distributed in different loops. */ |
890 | 906 |
891 static void | 907 static void |
895 { | 911 { |
896 int i; | 912 int i; |
897 rdgc x; | 913 rdgc x; |
898 bitmap partition = BITMAP_ALLOC (NULL); | 914 bitmap partition = BITMAP_ALLOC (NULL); |
899 | 915 |
900 for (i = 0; VEC_iterate (rdgc, components, i, x); i++) | 916 FOR_EACH_VEC_ELT (rdgc, components, i, x) |
901 { | 917 { |
902 bitmap np; | 918 bitmap np; |
903 bool part_has_writes = false; | 919 bool part_has_writes = false; |
904 int v = VEC_index (int, x->vertices, 0); | 920 int v = VEC_index (int, x->vertices, 0); |
905 | 921 |
906 if (bitmap_bit_p (processed, v)) | 922 if (bitmap_bit_p (processed, v)) |
907 continue; | 923 continue; |
908 | 924 |
909 np = build_rdg_partition_for_component (rdg, x, &part_has_writes, | 925 np = build_rdg_partition_for_component (rdg, x, &part_has_writes); |
910 other_stores); | |
911 bitmap_ior_into (partition, np); | 926 bitmap_ior_into (partition, np); |
912 bitmap_ior_into (processed, np); | 927 bitmap_ior_into (processed, np); |
913 BITMAP_FREE (np); | 928 BITMAP_FREE (np); |
914 | 929 |
915 if (part_has_writes) | 930 if (part_has_writes) |
946 | 961 |
947 VEC_free (int, heap, foo); | 962 VEC_free (int, heap, foo); |
948 free_rdg_components (comps); | 963 free_rdg_components (comps); |
949 } | 964 } |
950 | 965 |
966 add_scalar_computations_to_partition (rdg, *partitions, partition); | |
967 | |
951 /* If there is something left in the last partition, save it. */ | 968 /* If there is something left in the last partition, save it. */ |
952 if (bitmap_count_bits (partition) > 0) | 969 if (bitmap_count_bits (partition) > 0) |
953 VEC_safe_push (bitmap, heap, *partitions, partition); | 970 VEC_safe_push (bitmap, heap, *partitions, partition); |
954 else | 971 else |
955 BITMAP_FREE (partition); | 972 BITMAP_FREE (partition); |
973 | |
974 fuse_partitions_with_similar_memory_accesses (rdg, partitions); | |
956 } | 975 } |
957 | 976 |
958 /* Dump to FILE the PARTITIONS. */ | 977 /* Dump to FILE the PARTITIONS. */ |
959 | 978 |
960 static void | 979 static void |
961 dump_rdg_partitions (FILE *file, VEC (bitmap, heap) *partitions) | 980 dump_rdg_partitions (FILE *file, VEC (bitmap, heap) *partitions) |
962 { | 981 { |
963 int i; | 982 int i; |
964 bitmap partition; | 983 bitmap partition; |
965 | 984 |
966 for (i = 0; VEC_iterate (bitmap, partitions, i, partition); i++) | 985 FOR_EACH_VEC_ELT (bitmap, partitions, i, partition) |
967 debug_bitmap_file (file, partition); | 986 debug_bitmap_file (file, partition); |
968 } | 987 } |
969 | 988 |
970 /* Debug PARTITIONS. */ | 989 /* Debug PARTITIONS. */ |
971 extern void debug_rdg_partitions (VEC (bitmap, heap) *); | 990 extern void debug_rdg_partitions (VEC (bitmap, heap) *); |
972 | 991 |
973 void | 992 DEBUG_FUNCTION void |
974 debug_rdg_partitions (VEC (bitmap, heap) *partitions) | 993 debug_rdg_partitions (VEC (bitmap, heap) *partitions) |
975 { | 994 { |
976 dump_rdg_partitions (stderr, partitions); | 995 dump_rdg_partitions (stderr, partitions); |
977 } | 996 } |
978 | 997 |
1025 { | 1044 { |
1026 int i; | 1045 int i; |
1027 bitmap partition; | 1046 bitmap partition; |
1028 int nrw = number_of_rw_in_rdg (rdg); | 1047 int nrw = number_of_rw_in_rdg (rdg); |
1029 | 1048 |
1030 for (i = 0; VEC_iterate (bitmap, partitions, i, partition); i++) | 1049 FOR_EACH_VEC_ELT (bitmap, partitions, i, partition) |
1031 if (nrw == number_of_rw_in_partition (rdg, partition)) | 1050 if (nrw == number_of_rw_in_partition (rdg, partition)) |
1032 return true; | 1051 return true; |
1033 | 1052 |
1034 return false; | 1053 return false; |
1035 } | 1054 } |
1060 { | 1079 { |
1061 int v; | 1080 int v; |
1062 unsigned j; | 1081 unsigned j; |
1063 bool found = false; | 1082 bool found = false; |
1064 | 1083 |
1065 for (j = 0; VEC_iterate (int, starting_vertices, j, v); j++) | 1084 FOR_EACH_VEC_ELT (int, starting_vertices, j, v) |
1066 if (i == v) | 1085 if (i == v) |
1067 { | 1086 { |
1068 found = true; | 1087 found = true; |
1069 break; | 1088 break; |
1070 } | 1089 } |
1086 goto ldist_done; | 1105 goto ldist_done; |
1087 | 1106 |
1088 if (dump_file && (dump_flags & TDF_DETAILS)) | 1107 if (dump_file && (dump_flags & TDF_DETAILS)) |
1089 dump_rdg_partitions (dump_file, partitions); | 1108 dump_rdg_partitions (dump_file, partitions); |
1090 | 1109 |
1091 for (i = 0; VEC_iterate (bitmap, partitions, i, partition); i++) | 1110 FOR_EACH_VEC_ELT (bitmap, partitions, i, partition) |
1092 if (!generate_code_for_partition (loop, partition, i < nbp - 1)) | 1111 if (!generate_code_for_partition (loop, partition, i < nbp - 1)) |
1093 goto ldist_done; | 1112 goto ldist_done; |
1094 | 1113 |
1095 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); | 1114 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); |
1096 update_ssa (TODO_update_ssa_only_virtuals | TODO_update_ssa); | 1115 update_ssa (TODO_update_ssa_only_virtuals | TODO_update_ssa); |
1098 ldist_done: | 1117 ldist_done: |
1099 | 1118 |
1100 BITMAP_FREE (remaining_stmts); | 1119 BITMAP_FREE (remaining_stmts); |
1101 BITMAP_FREE (upstream_mem_writes); | 1120 BITMAP_FREE (upstream_mem_writes); |
1102 | 1121 |
1103 for (i = 0; VEC_iterate (bitmap, partitions, i, partition); i++) | 1122 FOR_EACH_VEC_ELT (bitmap, partitions, i, partition) |
1104 BITMAP_FREE (partition); | 1123 BITMAP_FREE (partition); |
1105 | 1124 |
1106 VEC_free (int, heap, other_stores); | 1125 VEC_free (int, heap, other_stores); |
1107 VEC_free (bitmap, heap, partitions); | 1126 VEC_free (bitmap, heap, partitions); |
1108 free_rdg_components (components); | 1127 free_rdg_components (components); |
1121 int res = 0; | 1140 int res = 0; |
1122 struct graph *rdg; | 1141 struct graph *rdg; |
1123 gimple s; | 1142 gimple s; |
1124 unsigned i; | 1143 unsigned i; |
1125 VEC (int, heap) *vertices; | 1144 VEC (int, heap) *vertices; |
1145 VEC (ddr_p, heap) *dependence_relations; | |
1146 VEC (data_reference_p, heap) *datarefs; | |
1147 VEC (loop_p, heap) *loop_nest; | |
1126 | 1148 |
1127 if (loop->num_nodes > 2) | 1149 if (loop->num_nodes > 2) |
1128 { | 1150 { |
1129 if (dump_file && (dump_flags & TDF_DETAILS)) | 1151 if (dump_file && (dump_flags & TDF_DETAILS)) |
1130 fprintf (dump_file, | 1152 fprintf (dump_file, |
1132 loop->num); | 1154 loop->num); |
1133 | 1155 |
1134 return res; | 1156 return res; |
1135 } | 1157 } |
1136 | 1158 |
1137 rdg = build_rdg (loop); | 1159 datarefs = VEC_alloc (data_reference_p, heap, 10); |
1160 dependence_relations = VEC_alloc (ddr_p, heap, 100); | |
1161 loop_nest = VEC_alloc (loop_p, heap, 3); | |
1162 rdg = build_rdg (loop, &loop_nest, &dependence_relations, &datarefs); | |
1138 | 1163 |
1139 if (!rdg) | 1164 if (!rdg) |
1140 { | 1165 { |
1141 if (dump_file && (dump_flags & TDF_DETAILS)) | 1166 if (dump_file && (dump_flags & TDF_DETAILS)) |
1142 fprintf (dump_file, | 1167 fprintf (dump_file, |
1143 "FIXME: Loop %d not distributed: failed to build the RDG.\n", | 1168 "FIXME: Loop %d not distributed: failed to build the RDG.\n", |
1144 loop->num); | 1169 loop->num); |
1145 | 1170 |
1171 free_dependence_relations (dependence_relations); | |
1172 free_data_refs (datarefs); | |
1173 VEC_free (loop_p, heap, loop_nest); | |
1146 return res; | 1174 return res; |
1147 } | 1175 } |
1148 | 1176 |
1149 vertices = VEC_alloc (int, heap, 3); | 1177 vertices = VEC_alloc (int, heap, 3); |
1150 | 1178 |
1151 if (dump_file && (dump_flags & TDF_DETAILS)) | 1179 if (dump_file && (dump_flags & TDF_DETAILS)) |
1152 dump_rdg (dump_file, rdg); | 1180 dump_rdg (dump_file, rdg); |
1153 | 1181 |
1154 for (i = 0; VEC_iterate (gimple, stmts, i, s); i++) | 1182 FOR_EACH_VEC_ELT (gimple, stmts, i, s) |
1155 { | 1183 { |
1156 int v = rdg_vertex_for_stmt (rdg, s); | 1184 int v = rdg_vertex_for_stmt (rdg, s); |
1157 | 1185 |
1158 if (v >= 0) | 1186 if (v >= 0) |
1159 { | 1187 { |
1166 } | 1194 } |
1167 | 1195 |
1168 res = ldist_gen (loop, rdg, vertices); | 1196 res = ldist_gen (loop, rdg, vertices); |
1169 VEC_free (int, heap, vertices); | 1197 VEC_free (int, heap, vertices); |
1170 free_rdg (rdg); | 1198 free_rdg (rdg); |
1171 | 1199 free_dependence_relations (dependence_relations); |
1200 free_data_refs (datarefs); | |
1201 VEC_free (loop_p, heap, loop_nest); | |
1172 return res; | 1202 return res; |
1173 } | 1203 } |
1174 | 1204 |
1175 /* Distribute all loops in the current function. */ | 1205 /* Distribute all loops in the current function. */ |
1176 | 1206 |
1181 loop_iterator li; | 1211 loop_iterator li; |
1182 int nb_generated_loops = 0; | 1212 int nb_generated_loops = 0; |
1183 | 1213 |
1184 FOR_EACH_LOOP (li, loop, 0) | 1214 FOR_EACH_LOOP (li, loop, 0) |
1185 { | 1215 { |
1186 VEC (gimple, heap) *work_list = VEC_alloc (gimple, heap, 3); | 1216 VEC (gimple, heap) *work_list = NULL; |
1187 | 1217 int num = loop->num; |
1188 /* With the following working list, we're asking distribute_loop | 1218 |
1189 to separate the stores of the loop: when dependences allow, | 1219 /* If the loop doesn't have a single exit we will fail anyway, |
1190 it will end on having one store per loop. */ | 1220 so do that early. */ |
1191 stores_from_loop (loop, &work_list); | 1221 if (!single_exit (loop)) |
1192 | 1222 continue; |
1193 /* A simple heuristic for cache locality is to not split stores | 1223 |
1194 to the same array. Without this call, an unrolled loop would | 1224 /* If both flag_tree_loop_distribute_patterns and |
1195 be split into as many loops as unroll factor, each loop | 1225 flag_tree_loop_distribution are set, then only |
1196 storing in the same array. */ | 1226 distribute_patterns is executed. */ |
1197 remove_similar_memory_refs (&work_list); | 1227 if (flag_tree_loop_distribute_patterns) |
1198 | 1228 { |
1199 nb_generated_loops = distribute_loop (loop, work_list); | 1229 /* With the following working list, we're asking |
1230 distribute_loop to separate from the rest of the loop the | |
1231 stores of the form "A[i] = 0". */ | |
1232 stores_zero_from_loop (loop, &work_list); | |
1233 | |
1234 /* Do nothing if there are no patterns to be distributed. */ | |
1235 if (VEC_length (gimple, work_list) > 0) | |
1236 nb_generated_loops = distribute_loop (loop, work_list); | |
1237 } | |
1238 else if (flag_tree_loop_distribution) | |
1239 { | |
1240 /* With the following working list, we're asking | |
1241 distribute_loop to separate the stores of the loop: when | |
1242 dependences allow, it will end on having one store per | |
1243 loop. */ | |
1244 stores_from_loop (loop, &work_list); | |
1245 | |
1246 /* A simple heuristic for cache locality is to not split | |
1247 stores to the same array. Without this call, an unrolled | |
1248 loop would be split into as many loops as unroll factor, | |
1249 each loop storing in the same array. */ | |
1250 remove_similar_memory_refs (&work_list); | |
1251 | |
1252 nb_generated_loops = distribute_loop (loop, work_list); | |
1253 } | |
1200 | 1254 |
1201 if (dump_file && (dump_flags & TDF_DETAILS)) | 1255 if (dump_file && (dump_flags & TDF_DETAILS)) |
1202 { | 1256 { |
1203 if (nb_generated_loops > 1) | 1257 if (nb_generated_loops > 1) |
1204 fprintf (dump_file, "Loop %d distributed: split to %d loops.\n", | 1258 fprintf (dump_file, "Loop %d distributed: split to %d loops.\n", |
1205 loop->num, nb_generated_loops); | 1259 num, nb_generated_loops); |
1206 else | 1260 else |
1207 fprintf (dump_file, "Loop %d is the same.\n", loop->num); | 1261 fprintf (dump_file, "Loop %d is the same.\n", num); |
1208 } | 1262 } |
1209 | 1263 |
1210 verify_loop_structure (); | 1264 verify_loop_structure (); |
1211 | 1265 |
1212 VEC_free (gimple, heap, work_list); | 1266 VEC_free (gimple, heap, work_list); |
1216 } | 1270 } |
1217 | 1271 |
1218 static bool | 1272 static bool |
1219 gate_tree_loop_distribution (void) | 1273 gate_tree_loop_distribution (void) |
1220 { | 1274 { |
1221 return flag_tree_loop_distribution != 0; | 1275 return flag_tree_loop_distribution |
1276 || flag_tree_loop_distribute_patterns; | |
1222 } | 1277 } |
1223 | 1278 |
1224 struct gimple_opt_pass pass_loop_distribution = | 1279 struct gimple_opt_pass pass_loop_distribution = |
1225 { | 1280 { |
1226 { | 1281 { |