comparison gcc/tree-loop-distribution.c @ 145:1830386684a0

gcc-9.2.0
author anatofuz
date Thu, 13 Feb 2020 11:34:05 +0900
parents 84e7813d76e9
children
comparison
equal deleted inserted replaced
131:84e7813d76e9 145:1830386684a0
1 /* Loop distribution. 1 /* Loop distribution.
2 Copyright (C) 2006-2018 Free Software Foundation, Inc. 2 Copyright (C) 2006-2020 Free Software Foundation, Inc.
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr> 3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr>
4 and Sebastian Pop <sebastian.pop@amd.com>. 4 and Sebastian Pop <sebastian.pop@amd.com>.
5 5
6 This file is part of GCC. 6 This file is part of GCC.
7 7
110 #include "tree-ssa-loop.h" 110 #include "tree-ssa-loop.h"
111 #include "tree-into-ssa.h" 111 #include "tree-into-ssa.h"
112 #include "tree-ssa.h" 112 #include "tree-ssa.h"
113 #include "cfgloop.h" 113 #include "cfgloop.h"
114 #include "tree-scalar-evolution.h" 114 #include "tree-scalar-evolution.h"
115 #include "params.h"
116 #include "tree-vectorizer.h" 115 #include "tree-vectorizer.h"
116 #include "tree-eh.h"
117 #include "gimple-fold.h"
117 118
118 119
119 #define MAX_DATAREFS_NUM \ 120 #define MAX_DATAREFS_NUM \
120 ((unsigned) PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS)) 121 ((unsigned) param_loop_max_datarefs_for_datadeps)
121 122
122 /* Threshold controlling number of distributed partitions. Given it may 123 /* Threshold controlling number of distributed partitions. Given it may
123 be unnecessary if a memory stream cost model is invented in the future, 124 be unnecessary if a memory stream cost model is invented in the future,
124 we define it as a temporary macro, rather than a parameter. */ 125 we define it as a temporary macro, rather than a parameter. */
125 #define NUM_PARTITION_THRESHOLD (4) 126 #define NUM_PARTITION_THRESHOLD (4)
151 const data_dependence_relation *ddr2) 152 const data_dependence_relation *ddr2)
152 { 153 {
153 return (DDR_A (ddr1) == DDR_A (ddr2) && DDR_B (ddr1) == DDR_B (ddr2)); 154 return (DDR_A (ddr1) == DDR_A (ddr2) && DDR_B (ddr1) == DDR_B (ddr2));
154 } 155 }
155 156
156 /* The loop (nest) to be distributed. */ 157
157 static vec<loop_p> loop_nest; 158
158
159 /* Vector of data references in the loop to be distributed. */
160 static vec<data_reference_p> datarefs_vec;
161
162 /* Store index of data reference in aux field. */
163 #define DR_INDEX(dr) ((uintptr_t) (dr)->aux) 159 #define DR_INDEX(dr) ((uintptr_t) (dr)->aux)
164
165 /* Hash table for data dependence relation in the loop to be distributed. */
166 static hash_table<ddr_hasher> *ddrs_table;
167 160
168 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */ 161 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */
169 struct rdg_vertex 162 struct rdg_vertex
170 { 163 {
171 /* The statement represented by this vertex. */ 164 /* The statement represented by this vertex. */
208 /* Type of the dependence. */ 201 /* Type of the dependence. */
209 enum rdg_dep_type type; 202 enum rdg_dep_type type;
210 }; 203 };
211 204
212 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type 205 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type
213
214 /* Dump vertex I in RDG to FILE. */
215
216 static void
217 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
218 {
219 struct vertex *v = &(rdg->vertices[i]);
220 struct graph_edge *e;
221
222 fprintf (file, "(vertex %d: (%s%s) (in:", i,
223 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
224 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
225
226 if (v->pred)
227 for (e = v->pred; e; e = e->pred_next)
228 fprintf (file, " %d", e->src);
229
230 fprintf (file, ") (out:");
231
232 if (v->succ)
233 for (e = v->succ; e; e = e->succ_next)
234 fprintf (file, " %d", e->dest);
235
236 fprintf (file, ")\n");
237 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
238 fprintf (file, ")\n");
239 }
240
241 /* Call dump_rdg_vertex on stderr. */
242
243 DEBUG_FUNCTION void
244 debug_rdg_vertex (struct graph *rdg, int i)
245 {
246 dump_rdg_vertex (stderr, rdg, i);
247 }
248
249 /* Dump the reduced dependence graph RDG to FILE. */
250
251 static void
252 dump_rdg (FILE *file, struct graph *rdg)
253 {
254 fprintf (file, "(rdg\n");
255 for (int i = 0; i < rdg->n_vertices; i++)
256 dump_rdg_vertex (file, rdg, i);
257 fprintf (file, ")\n");
258 }
259
260 /* Call dump_rdg on stderr. */
261
262 DEBUG_FUNCTION void
263 debug_rdg (struct graph *rdg)
264 {
265 dump_rdg (stderr, rdg);
266 }
267
268 static void
269 dot_rdg_1 (FILE *file, struct graph *rdg)
270 {
271 int i;
272 pretty_printer buffer;
273 pp_needs_newline (&buffer) = false;
274 buffer.buffer->stream = file;
275
276 fprintf (file, "digraph RDG {\n");
277
278 for (i = 0; i < rdg->n_vertices; i++)
279 {
280 struct vertex *v = &(rdg->vertices[i]);
281 struct graph_edge *e;
282
283 fprintf (file, "%d [label=\"[%d] ", i, i);
284 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
285 pp_flush (&buffer);
286 fprintf (file, "\"]\n");
287
288 /* Highlight reads from memory. */
289 if (RDG_MEM_READS_STMT (rdg, i))
290 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
291
292 /* Highlight stores to memory. */
293 if (RDG_MEM_WRITE_STMT (rdg, i))
294 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
295
296 if (v->succ)
297 for (e = v->succ; e; e = e->succ_next)
298 switch (RDGE_TYPE (e))
299 {
300 case flow_dd:
301 /* These are the most common dependences: don't print these. */
302 fprintf (file, "%d -> %d \n", i, e->dest);
303 break;
304
305 case control_dd:
306 fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
307 break;
308
309 default:
310 gcc_unreachable ();
311 }
312 }
313
314 fprintf (file, "}\n\n");
315 }
316
317 /* Display the Reduced Dependence Graph using dotty. */
318
319 DEBUG_FUNCTION void
320 dot_rdg (struct graph *rdg)
321 {
322 /* When debugging, you may want to enable the following code. */
323 #ifdef HAVE_POPEN
324 FILE *file = popen ("dot -Tx11", "w");
325 if (!file)
326 return;
327 dot_rdg_1 (file, rdg);
328 fflush (file);
329 close (fileno (file));
330 pclose (file);
331 #else
332 dot_rdg_1 (stderr, rdg);
333 #endif
334 }
335
336 /* Returns the index of STMT in RDG. */
337
338 static int
339 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple *stmt)
340 {
341 int index = gimple_uid (stmt);
342 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
343 return index;
344 }
345
346 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
347 the index of DEF in RDG. */
348
349 static void
350 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
351 {
352 use_operand_p imm_use_p;
353 imm_use_iterator iterator;
354
355 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
356 {
357 struct graph_edge *e;
358 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
359
360 if (use < 0)
361 continue;
362
363 e = add_edge (rdg, idef, use);
364 e->data = XNEW (struct rdg_edge);
365 RDGE_TYPE (e) = flow_dd;
366 }
367 }
368
369 /* Creates an edge for the control dependences of BB to the vertex V. */
370
371 static void
372 create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
373 int v, control_dependences *cd)
374 {
375 bitmap_iterator bi;
376 unsigned edge_n;
377 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
378 0, edge_n, bi)
379 {
380 basic_block cond_bb = cd->get_edge_src (edge_n);
381 gimple *stmt = last_stmt (cond_bb);
382 if (stmt && is_ctrl_stmt (stmt))
383 {
384 struct graph_edge *e;
385 int c = rdg_vertex_for_stmt (rdg, stmt);
386 if (c < 0)
387 continue;
388
389 e = add_edge (rdg, c, v);
390 e->data = XNEW (struct rdg_edge);
391 RDGE_TYPE (e) = control_dd;
392 }
393 }
394 }
395
396 /* Creates the edges of the reduced dependence graph RDG. */
397
398 static void
399 create_rdg_flow_edges (struct graph *rdg)
400 {
401 int i;
402 def_operand_p def_p;
403 ssa_op_iter iter;
404
405 for (i = 0; i < rdg->n_vertices; i++)
406 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
407 iter, SSA_OP_DEF)
408 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
409 }
410
411 /* Creates the edges of the reduced dependence graph RDG. */
412
413 static void
414 create_rdg_cd_edges (struct graph *rdg, control_dependences *cd, loop_p loop)
415 {
416 int i;
417
418 for (i = 0; i < rdg->n_vertices; i++)
419 {
420 gimple *stmt = RDG_STMT (rdg, i);
421 if (gimple_code (stmt) == GIMPLE_PHI)
422 {
423 edge_iterator ei;
424 edge e;
425 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
426 if (flow_bb_inside_loop_p (loop, e->src))
427 create_edge_for_control_dependence (rdg, e->src, i, cd);
428 }
429 else
430 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
431 }
432 }
433
434 /* Build the vertices of the reduced dependence graph RDG. Return false
435 if that failed. */
436
437 static bool
438 create_rdg_vertices (struct graph *rdg, vec<gimple *> stmts, loop_p loop)
439 {
440 int i;
441 gimple *stmt;
442
443 FOR_EACH_VEC_ELT (stmts, i, stmt)
444 {
445 struct vertex *v = &(rdg->vertices[i]);
446
447 /* Record statement to vertex mapping. */
448 gimple_set_uid (stmt, i);
449
450 v->data = XNEW (struct rdg_vertex);
451 RDGV_STMT (v) = stmt;
452 RDGV_DATAREFS (v).create (0);
453 RDGV_HAS_MEM_WRITE (v) = false;
454 RDGV_HAS_MEM_READS (v) = false;
455 if (gimple_code (stmt) == GIMPLE_PHI)
456 continue;
457
458 unsigned drp = datarefs_vec.length ();
459 if (!find_data_references_in_stmt (loop, stmt, &datarefs_vec))
460 return false;
461 for (unsigned j = drp; j < datarefs_vec.length (); ++j)
462 {
463 data_reference_p dr = datarefs_vec[j];
464 if (DR_IS_READ (dr))
465 RDGV_HAS_MEM_READS (v) = true;
466 else
467 RDGV_HAS_MEM_WRITE (v) = true;
468 RDGV_DATAREFS (v).safe_push (dr);
469 }
470 }
471 return true;
472 }
473
474 /* Array mapping basic block's index to its topological order. */
475 static int *bb_top_order_index;
476 /* And size of the array. */
477 static int bb_top_order_index_size;
478
479 /* If X has a smaller topological sort number than Y, returns -1;
480 if greater, returns 1. */
481
482 static int
483 bb_top_order_cmp (const void *x, const void *y)
484 {
485 basic_block bb1 = *(const basic_block *) x;
486 basic_block bb2 = *(const basic_block *) y;
487
488 gcc_assert (bb1->index < bb_top_order_index_size
489 && bb2->index < bb_top_order_index_size);
490 gcc_assert (bb1 == bb2
491 || bb_top_order_index[bb1->index]
492 != bb_top_order_index[bb2->index]);
493
494 return (bb_top_order_index[bb1->index] - bb_top_order_index[bb2->index]);
495 }
496
497 /* Initialize STMTS with all the statements of LOOP. We use topological
498 order to discover all statements. The order is important because
499 generate_loops_for_partition is using the same traversal for identifying
500 statements in loop copies. */
501
502 static void
503 stmts_from_loop (struct loop *loop, vec<gimple *> *stmts)
504 {
505 unsigned int i;
506 basic_block *bbs = get_loop_body_in_custom_order (loop, bb_top_order_cmp);
507
508 for (i = 0; i < loop->num_nodes; i++)
509 {
510 basic_block bb = bbs[i];
511
512 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
513 gsi_next (&bsi))
514 if (!virtual_operand_p (gimple_phi_result (bsi.phi ())))
515 stmts->safe_push (bsi.phi ());
516
517 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
518 gsi_next (&bsi))
519 {
520 gimple *stmt = gsi_stmt (bsi);
521 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
522 stmts->safe_push (stmt);
523 }
524 }
525
526 free (bbs);
527 }
528
529 /* Free the reduced dependence graph RDG. */
530
531 static void
532 free_rdg (struct graph *rdg)
533 {
534 int i;
535
536 for (i = 0; i < rdg->n_vertices; i++)
537 {
538 struct vertex *v = &(rdg->vertices[i]);
539 struct graph_edge *e;
540
541 for (e = v->succ; e; e = e->succ_next)
542 free (e->data);
543
544 if (v->data)
545 {
546 gimple_set_uid (RDGV_STMT (v), -1);
547 (RDGV_DATAREFS (v)).release ();
548 free (v->data);
549 }
550 }
551
552 free_graph (rdg);
553 }
554
555 /* Build the Reduced Dependence Graph (RDG) with one vertex per statement of
556 LOOP, and one edge per flow dependence or control dependence from control
557 dependence CD. During visiting each statement, data references are also
558 collected and recorded in global data DATAREFS_VEC. */
559
560 static struct graph *
561 build_rdg (struct loop *loop, control_dependences *cd)
562 {
563 struct graph *rdg;
564
565 /* Create the RDG vertices from the stmts of the loop nest. */
566 auto_vec<gimple *, 10> stmts;
567 stmts_from_loop (loop, &stmts);
568 rdg = new_graph (stmts.length ());
569 if (!create_rdg_vertices (rdg, stmts, loop))
570 {
571 free_rdg (rdg);
572 return NULL;
573 }
574 stmts.release ();
575
576 create_rdg_flow_edges (rdg);
577 if (cd)
578 create_rdg_cd_edges (rdg, cd, loop);
579
580 return rdg;
581 }
582
583 206
584 /* Kind of distributed loop. */ 207 /* Kind of distributed loop. */
585 enum partition_kind { 208 enum partition_kind {
586 PKIND_NORMAL, 209 PKIND_NORMAL,
587 /* Partial memset stands for a paritition can be distributed into a loop 210 /* Partial memset stands for a paritition can be distributed into a loop
628 { 251 {
629 /* Statements of the partition. */ 252 /* Statements of the partition. */
630 bitmap stmts; 253 bitmap stmts;
631 /* True if the partition defines variable which is used outside of loop. */ 254 /* True if the partition defines variable which is used outside of loop. */
632 bool reduction_p; 255 bool reduction_p;
256 location_t loc;
633 enum partition_kind kind; 257 enum partition_kind kind;
634 enum partition_type type; 258 enum partition_type type;
635 /* Data references in the partition. */ 259 /* Data references in the partition. */
636 bitmap datarefs; 260 bitmap datarefs;
637 /* Information of builtin parition. */ 261 /* Information of builtin parition. */
638 struct builtin_info *builtin; 262 struct builtin_info *builtin;
639 }; 263 };
640 264
641
642 /* Allocate and initialize a partition from BITMAP. */
643
644 static partition *
645 partition_alloc (void)
646 {
647 partition *partition = XCNEW (struct partition);
648 partition->stmts = BITMAP_ALLOC (NULL);
649 partition->reduction_p = false;
650 partition->kind = PKIND_NORMAL;
651 partition->datarefs = BITMAP_ALLOC (NULL);
652 return partition;
653 }
654
655 /* Free PARTITION. */
656
657 static void
658 partition_free (partition *partition)
659 {
660 BITMAP_FREE (partition->stmts);
661 BITMAP_FREE (partition->datarefs);
662 if (partition->builtin)
663 free (partition->builtin);
664
665 free (partition);
666 }
667
668 /* Returns true if the partition can be generated as a builtin. */
669
670 static bool
671 partition_builtin_p (partition *partition)
672 {
673 return partition->kind > PKIND_PARTIAL_MEMSET;
674 }
675
676 /* Returns true if the partition contains a reduction. */
677
678 static bool
679 partition_reduction_p (partition *partition)
680 {
681 return partition->reduction_p;
682 }
683
684 /* Partitions are fused because of different reasons. */ 265 /* Partitions are fused because of different reasons. */
685 enum fuse_type 266 enum fuse_type
686 { 267 {
687 FUSE_NON_BUILTIN = 0, 268 FUSE_NON_BUILTIN = 0,
688 FUSE_REDUCTION = 1, 269 FUSE_REDUCTION = 1,
697 "they have reductions", 278 "they have reductions",
698 "they have shared memory refs", 279 "they have shared memory refs",
699 "they are in the same dependence scc", 280 "they are in the same dependence scc",
700 "there is no point to distribute loop"}; 281 "there is no point to distribute loop"};
701 282
283
284 /* Dump vertex I in RDG to FILE. */
285
702 static void 286 static void
703 update_type_for_merge (struct graph *, partition *, partition *); 287 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
288 {
289 struct vertex *v = &(rdg->vertices[i]);
290 struct graph_edge *e;
291
292 fprintf (file, "(vertex %d: (%s%s) (in:", i,
293 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
294 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
295
296 if (v->pred)
297 for (e = v->pred; e; e = e->pred_next)
298 fprintf (file, " %d", e->src);
299
300 fprintf (file, ") (out:");
301
302 if (v->succ)
303 for (e = v->succ; e; e = e->succ_next)
304 fprintf (file, " %d", e->dest);
305
306 fprintf (file, ")\n");
307 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
308 fprintf (file, ")\n");
309 }
310
311 /* Call dump_rdg_vertex on stderr. */
312
313 DEBUG_FUNCTION void
314 debug_rdg_vertex (struct graph *rdg, int i)
315 {
316 dump_rdg_vertex (stderr, rdg, i);
317 }
318
319 /* Dump the reduced dependence graph RDG to FILE. */
320
321 static void
322 dump_rdg (FILE *file, struct graph *rdg)
323 {
324 fprintf (file, "(rdg\n");
325 for (int i = 0; i < rdg->n_vertices; i++)
326 dump_rdg_vertex (file, rdg, i);
327 fprintf (file, ")\n");
328 }
329
330 /* Call dump_rdg on stderr. */
331
332 DEBUG_FUNCTION void
333 debug_rdg (struct graph *rdg)
334 {
335 dump_rdg (stderr, rdg);
336 }
337
338 static void
339 dot_rdg_1 (FILE *file, struct graph *rdg)
340 {
341 int i;
342 pretty_printer buffer;
343 pp_needs_newline (&buffer) = false;
344 buffer.buffer->stream = file;
345
346 fprintf (file, "digraph RDG {\n");
347
348 for (i = 0; i < rdg->n_vertices; i++)
349 {
350 struct vertex *v = &(rdg->vertices[i]);
351 struct graph_edge *e;
352
353 fprintf (file, "%d [label=\"[%d] ", i, i);
354 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM);
355 pp_flush (&buffer);
356 fprintf (file, "\"]\n");
357
358 /* Highlight reads from memory. */
359 if (RDG_MEM_READS_STMT (rdg, i))
360 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
361
362 /* Highlight stores to memory. */
363 if (RDG_MEM_WRITE_STMT (rdg, i))
364 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
365
366 if (v->succ)
367 for (e = v->succ; e; e = e->succ_next)
368 switch (RDGE_TYPE (e))
369 {
370 case flow_dd:
371 /* These are the most common dependences: don't print these. */
372 fprintf (file, "%d -> %d \n", i, e->dest);
373 break;
374
375 case control_dd:
376 fprintf (file, "%d -> %d [label=control] \n", i, e->dest);
377 break;
378
379 default:
380 gcc_unreachable ();
381 }
382 }
383
384 fprintf (file, "}\n\n");
385 }
386
387 /* Display the Reduced Dependence Graph using dotty. */
388
389 DEBUG_FUNCTION void
390 dot_rdg (struct graph *rdg)
391 {
392 /* When debugging, you may want to enable the following code. */
393 #ifdef HAVE_POPEN
394 FILE *file = popen ("dot -Tx11", "w");
395 if (!file)
396 return;
397 dot_rdg_1 (file, rdg);
398 fflush (file);
399 close (fileno (file));
400 pclose (file);
401 #else
402 dot_rdg_1 (stderr, rdg);
403 #endif
404 }
405
406 /* Returns the index of STMT in RDG. */
407
408 static int
409 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple *stmt)
410 {
411 int index = gimple_uid (stmt);
412 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt);
413 return index;
414 }
415
416 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
417 the index of DEF in RDG. */
418
419 static void
420 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
421 {
422 use_operand_p imm_use_p;
423 imm_use_iterator iterator;
424
425 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
426 {
427 struct graph_edge *e;
428 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
429
430 if (use < 0)
431 continue;
432
433 e = add_edge (rdg, idef, use);
434 e->data = XNEW (struct rdg_edge);
435 RDGE_TYPE (e) = flow_dd;
436 }
437 }
438
439 /* Creates an edge for the control dependences of BB to the vertex V. */
440
441 static void
442 create_edge_for_control_dependence (struct graph *rdg, basic_block bb,
443 int v, control_dependences *cd)
444 {
445 bitmap_iterator bi;
446 unsigned edge_n;
447 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index),
448 0, edge_n, bi)
449 {
450 basic_block cond_bb = cd->get_edge_src (edge_n);
451 gimple *stmt = last_stmt (cond_bb);
452 if (stmt && is_ctrl_stmt (stmt))
453 {
454 struct graph_edge *e;
455 int c = rdg_vertex_for_stmt (rdg, stmt);
456 if (c < 0)
457 continue;
458
459 e = add_edge (rdg, c, v);
460 e->data = XNEW (struct rdg_edge);
461 RDGE_TYPE (e) = control_dd;
462 }
463 }
464 }
465
466 /* Creates the edges of the reduced dependence graph RDG. */
467
468 static void
469 create_rdg_flow_edges (struct graph *rdg)
470 {
471 int i;
472 def_operand_p def_p;
473 ssa_op_iter iter;
474
475 for (i = 0; i < rdg->n_vertices; i++)
476 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
477 iter, SSA_OP_DEF)
478 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
479 }
480
481 /* Creates the edges of the reduced dependence graph RDG. */
482
483 static void
484 create_rdg_cd_edges (struct graph *rdg, control_dependences *cd, loop_p loop)
485 {
486 int i;
487
488 for (i = 0; i < rdg->n_vertices; i++)
489 {
490 gimple *stmt = RDG_STMT (rdg, i);
491 if (gimple_code (stmt) == GIMPLE_PHI)
492 {
493 edge_iterator ei;
494 edge e;
495 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds)
496 if (flow_bb_inside_loop_p (loop, e->src))
497 create_edge_for_control_dependence (rdg, e->src, i, cd);
498 }
499 else
500 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd);
501 }
502 }
503
504
505 class loop_distribution
506 {
507 private:
508 /* The loop (nest) to be distributed. */
509 vec<loop_p> loop_nest;
510
511 /* Vector of data references in the loop to be distributed. */
512 vec<data_reference_p> datarefs_vec;
513
514 /* If there is nonaddressable data reference in above vector. */
515 bool has_nonaddressable_dataref_p;
516
517 /* Store index of data reference in aux field. */
518
519 /* Hash table for data dependence relation in the loop to be distributed. */
520 hash_table<ddr_hasher> *ddrs_table;
521
522 /* Array mapping basic block's index to its topological order. */
523 int *bb_top_order_index;
524 /* And size of the array. */
525 int bb_top_order_index_size;
526
527 /* Build the vertices of the reduced dependence graph RDG. Return false
528 if that failed. */
529 bool create_rdg_vertices (struct graph *rdg, vec<gimple *> stmts, loop_p loop);
530
531 /* Initialize STMTS with all the statements of LOOP. We use topological
532 order to discover all statements. The order is important because
533 generate_loops_for_partition is using the same traversal for identifying
534 statements in loop copies. */
535 void stmts_from_loop (class loop *loop, vec<gimple *> *stmts);
536
537
538 /* Build the Reduced Dependence Graph (RDG) with one vertex per statement of
539 LOOP, and one edge per flow dependence or control dependence from control
540 dependence CD. During visiting each statement, data references are also
541 collected and recorded in global data DATAREFS_VEC. */
542 struct graph * build_rdg (class loop *loop, control_dependences *cd);
704 543
705 /* Merge PARTITION into the partition DEST. RDG is the reduced dependence 544 /* Merge PARTITION into the partition DEST. RDG is the reduced dependence
706 graph and we update type for result partition if it is non-NULL. */ 545 graph and we update type for result partition if it is non-NULL. */
546 void partition_merge_into (struct graph *rdg,
547 partition *dest, partition *partition,
548 enum fuse_type ft);
549
550
551 /* Return data dependence relation for data references A and B. The two
552 data references must be in lexicographic order wrto reduced dependence
553 graph RDG. We firstly try to find ddr from global ddr hash table. If
554 it doesn't exist, compute the ddr and cache it. */
555 data_dependence_relation * get_data_dependence (struct graph *rdg,
556 data_reference_p a,
557 data_reference_p b);
558
559
560 /* In reduced dependence graph RDG for loop distribution, return true if
561 dependence between references DR1 and DR2 leads to a dependence cycle
562 and such dependence cycle can't be resolved by runtime alias check. */
563 bool data_dep_in_cycle_p (struct graph *rdg, data_reference_p dr1,
564 data_reference_p dr2);
565
566
567 /* Given reduced dependence graph RDG, PARTITION1 and PARTITION2, update
568 PARTITION1's type after merging PARTITION2 into PARTITION1. */
569 void update_type_for_merge (struct graph *rdg,
570 partition *partition1, partition *partition2);
571
572
573 /* Returns a partition with all the statements needed for computing
574 the vertex V of the RDG, also including the loop exit conditions. */
575 partition *build_rdg_partition_for_vertex (struct graph *rdg, int v);
576
577 /* Given data references DST_DR and SRC_DR in loop nest LOOP and RDG, classify
578 if it forms builtin memcpy or memmove call. */
579 void classify_builtin_ldst (loop_p loop, struct graph *rdg, partition *partition,
580 data_reference_p dst_dr, data_reference_p src_dr);
581
582 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
583 For the moment we detect memset, memcpy and memmove patterns. Bitmap
584 STMT_IN_ALL_PARTITIONS contains statements belonging to all partitions.
585 Returns true if there is a reduction in all partitions and we
586 possibly did not mark PARTITION as having one for this reason. */
587
588 bool
589 classify_partition (loop_p loop,
590 struct graph *rdg, partition *partition,
591 bitmap stmt_in_all_partitions);
592
593
594 /* Returns true when PARTITION1 and PARTITION2 access the same memory
595 object in RDG. */
596 bool share_memory_accesses (struct graph *rdg,
597 partition *partition1, partition *partition2);
598
599 /* For each seed statement in STARTING_STMTS, this function builds
600 partition for it by adding depended statements according to RDG.
601 All partitions are recorded in PARTITIONS. */
602 void rdg_build_partitions (struct graph *rdg,
603 vec<gimple *> starting_stmts,
604 vec<partition *> *partitions);
605
606 /* Compute partition dependence created by the data references in DRS1
607 and DRS2, modify and return DIR according to that. IF ALIAS_DDR is
608 not NULL, we record dependence introduced by possible alias between
609 two data references in ALIAS_DDRS; otherwise, we simply ignore such
610 dependence as if it doesn't exist at all. */
611 int pg_add_dependence_edges (struct graph *rdg, int dir, bitmap drs1,
612 bitmap drs2, vec<ddr_p> *alias_ddrs);
613
614
615 /* Build and return partition dependence graph for PARTITIONS. RDG is
616 reduced dependence graph for the loop to be distributed. If IGNORE_ALIAS_P
617 is true, data dependence caused by possible alias between references
618 is ignored, as if it doesn't exist at all; otherwise all depdendences
619 are considered. */
620 struct graph *build_partition_graph (struct graph *rdg,
621 vec<struct partition *> *partitions,
622 bool ignore_alias_p);
623
624 /* Given reduced dependence graph RDG merge strong connected components
625 of PARTITIONS. If IGNORE_ALIAS_P is true, data dependence caused by
626 possible alias between references is ignored, as if it doesn't exist
627 at all; otherwise all depdendences are considered. */
628 void merge_dep_scc_partitions (struct graph *rdg, vec<struct partition *>
629 *partitions, bool ignore_alias_p);
630
631 /* This is the main function breaking strong conected components in
632 PARTITIONS giving reduced depdendence graph RDG. Store data dependence
633 relations for runtime alias check in ALIAS_DDRS. */
634 void break_alias_scc_partitions (struct graph *rdg, vec<struct partition *>
635 *partitions, vec<ddr_p> *alias_ddrs);
636
637
638 /* Fuse PARTITIONS of LOOP if necessary before finalizing distribution.
639 ALIAS_DDRS contains ddrs which need runtime alias check. */
640 void finalize_partitions (class loop *loop, vec<struct partition *>
641 *partitions, vec<ddr_p> *alias_ddrs);
642
643 /* Distributes the code from LOOP in such a way that producer statements
644 are placed before consumer statements. Tries to separate only the
645 statements from STMTS into separate loops. Returns the number of
646 distributed loops. Set NB_CALLS to number of generated builtin calls.
647 Set *DESTROY_P to whether LOOP needs to be destroyed. */
648 int distribute_loop (class loop *loop, vec<gimple *> stmts,
649 control_dependences *cd, int *nb_calls, bool *destroy_p,
650 bool only_patterns_p);
651
652 /* Compute topological order for basic blocks. Topological order is
653 needed because data dependence is computed for data references in
654 lexicographical order. */
655 void bb_top_order_init (void);
656
657 void bb_top_order_destroy (void);
658
659 public:
660
661 /* Getter for bb_top_order. */
662
663 inline int get_bb_top_order_index_size (void)
664 {
665 return bb_top_order_index_size;
666 }
667
668 inline int get_bb_top_order_index (int i)
669 {
670 return bb_top_order_index[i];
671 }
672
673 unsigned int execute (function *fun);
674 };
675
676
677 /* If X has a smaller topological sort number than Y, returns -1;
678 if greater, returns 1. */
679 static int
680 bb_top_order_cmp_r (const void *x, const void *y, void *loop)
681 {
682 loop_distribution *_loop =
683 (loop_distribution *) loop;
684
685 basic_block bb1 = *(const basic_block *) x;
686 basic_block bb2 = *(const basic_block *) y;
687
688 int bb_top_order_index_size = _loop->get_bb_top_order_index_size ();
689
690 gcc_assert (bb1->index < bb_top_order_index_size
691 && bb2->index < bb_top_order_index_size);
692 gcc_assert (bb1 == bb2
693 || _loop->get_bb_top_order_index(bb1->index)
694 != _loop->get_bb_top_order_index(bb2->index));
695
696 return (_loop->get_bb_top_order_index(bb1->index) -
697 _loop->get_bb_top_order_index(bb2->index));
698 }
699
700 bool
701 loop_distribution::create_rdg_vertices (struct graph *rdg, vec<gimple *> stmts,
702 loop_p loop)
703 {
704 int i;
705 gimple *stmt;
706
707 FOR_EACH_VEC_ELT (stmts, i, stmt)
708 {
709 struct vertex *v = &(rdg->vertices[i]);
710
711 /* Record statement to vertex mapping. */
712 gimple_set_uid (stmt, i);
713
714 v->data = XNEW (struct rdg_vertex);
715 RDGV_STMT (v) = stmt;
716 RDGV_DATAREFS (v).create (0);
717 RDGV_HAS_MEM_WRITE (v) = false;
718 RDGV_HAS_MEM_READS (v) = false;
719 if (gimple_code (stmt) == GIMPLE_PHI)
720 continue;
721
722 unsigned drp = datarefs_vec.length ();
723 if (!find_data_references_in_stmt (loop, stmt, &datarefs_vec))
724 return false;
725 for (unsigned j = drp; j < datarefs_vec.length (); ++j)
726 {
727 data_reference_p dr = datarefs_vec[j];
728 if (DR_IS_READ (dr))
729 RDGV_HAS_MEM_READS (v) = true;
730 else
731 RDGV_HAS_MEM_WRITE (v) = true;
732 RDGV_DATAREFS (v).safe_push (dr);
733 has_nonaddressable_dataref_p |= may_be_nonaddressable_p (dr->ref);
734 }
735 }
736 return true;
737 }
738
739 void
740 loop_distribution::stmts_from_loop (class loop *loop, vec<gimple *> *stmts)
741 {
742 unsigned int i;
743 basic_block *bbs = get_loop_body_in_custom_order (loop, this, bb_top_order_cmp_r);
744
745 for (i = 0; i < loop->num_nodes; i++)
746 {
747 basic_block bb = bbs[i];
748
749 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
750 gsi_next (&bsi))
751 if (!virtual_operand_p (gimple_phi_result (bsi.phi ())))
752 stmts->safe_push (bsi.phi ());
753
754 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
755 gsi_next (&bsi))
756 {
757 gimple *stmt = gsi_stmt (bsi);
758 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt))
759 stmts->safe_push (stmt);
760 }
761 }
762
763 free (bbs);
764 }
765
766 /* Free the reduced dependence graph RDG. */
707 767
708 static void 768 static void
709 partition_merge_into (struct graph *rdg, partition *dest, 769 free_rdg (struct graph *rdg)
710 partition *partition, enum fuse_type ft) 770 {
771 int i;
772
773 for (i = 0; i < rdg->n_vertices; i++)
774 {
775 struct vertex *v = &(rdg->vertices[i]);
776 struct graph_edge *e;
777
778 for (e = v->succ; e; e = e->succ_next)
779 free (e->data);
780
781 if (v->data)
782 {
783 gimple_set_uid (RDGV_STMT (v), -1);
784 (RDGV_DATAREFS (v)).release ();
785 free (v->data);
786 }
787 }
788
789 free_graph (rdg);
790 }
791
792 struct graph *
793 loop_distribution::build_rdg (class loop *loop, control_dependences *cd)
794 {
795 struct graph *rdg;
796
797 /* Create the RDG vertices from the stmts of the loop nest. */
798 auto_vec<gimple *, 10> stmts;
799 stmts_from_loop (loop, &stmts);
800 rdg = new_graph (stmts.length ());
801 if (!create_rdg_vertices (rdg, stmts, loop))
802 {
803 free_rdg (rdg);
804 return NULL;
805 }
806 stmts.release ();
807
808 create_rdg_flow_edges (rdg);
809 if (cd)
810 create_rdg_cd_edges (rdg, cd, loop);
811
812 return rdg;
813 }
814
815
816 /* Allocate and initialize a partition from BITMAP. */
817
818 static partition *
819 partition_alloc (void)
820 {
821 partition *partition = XCNEW (struct partition);
822 partition->stmts = BITMAP_ALLOC (NULL);
823 partition->reduction_p = false;
824 partition->loc = UNKNOWN_LOCATION;
825 partition->kind = PKIND_NORMAL;
826 partition->type = PTYPE_PARALLEL;
827 partition->datarefs = BITMAP_ALLOC (NULL);
828 return partition;
829 }
830
831 /* Free PARTITION. */
832
833 static void
834 partition_free (partition *partition)
835 {
836 BITMAP_FREE (partition->stmts);
837 BITMAP_FREE (partition->datarefs);
838 if (partition->builtin)
839 free (partition->builtin);
840
841 free (partition);
842 }
843
844 /* Returns true if the partition can be generated as a builtin. */
845
846 static bool
847 partition_builtin_p (partition *partition)
848 {
849 return partition->kind > PKIND_PARTIAL_MEMSET;
850 }
851
852 /* Returns true if the partition contains a reduction. */
853
854 static bool
855 partition_reduction_p (partition *partition)
856 {
857 return partition->reduction_p;
858 }
859
860 void
861 loop_distribution::partition_merge_into (struct graph *rdg,
862 partition *dest, partition *partition, enum fuse_type ft)
711 { 863 {
712 if (dump_file && (dump_flags & TDF_DETAILS)) 864 if (dump_file && (dump_flags & TDF_DETAILS))
713 { 865 {
714 fprintf (dump_file, "Fuse partitions because %s:\n", fuse_message[ft]); 866 fprintf (dump_file, "Fuse partitions because %s:\n", fuse_message[ft]);
715 fprintf (dump_file, " Part 1: "); 867 fprintf (dump_file, " Part 1: ");
776 return false; 928 return false;
777 } 929 }
778 930
779 /* Return a copy of LOOP placed before LOOP. */ 931 /* Return a copy of LOOP placed before LOOP. */
780 932
781 static struct loop * 933 static class loop *
782 copy_loop_before (struct loop *loop) 934 copy_loop_before (class loop *loop)
783 { 935 {
784 struct loop *res; 936 class loop *res;
785 edge preheader = loop_preheader_edge (loop); 937 edge preheader = loop_preheader_edge (loop);
786 938
787 initialize_original_copy_tables (); 939 initialize_original_copy_tables ();
788 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader); 940 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader);
789 gcc_assert (res != NULL); 941 gcc_assert (res != NULL);
794 } 946 }
795 947
796 /* Creates an empty basic block after LOOP. */ 948 /* Creates an empty basic block after LOOP. */
797 949
798 static void 950 static void
799 create_bb_after_loop (struct loop *loop) 951 create_bb_after_loop (class loop *loop)
800 { 952 {
801 edge exit = single_exit (loop); 953 edge exit = single_exit (loop);
802 954
803 if (!exit) 955 if (!exit)
804 return; 956 return;
811 PARTITION bitmap are removed from the loop or from its copy. The 963 PARTITION bitmap are removed from the loop or from its copy. The
812 statements are indexed in sequence inside a basic block, and the 964 statements are indexed in sequence inside a basic block, and the
813 basic blocks of a loop are taken in dom order. */ 965 basic blocks of a loop are taken in dom order. */
814 966
815 static void 967 static void
816 generate_loops_for_partition (struct loop *loop, partition *partition, 968 generate_loops_for_partition (class loop *loop, partition *partition,
817 bool copy_p) 969 bool copy_p)
818 { 970 {
819 unsigned i; 971 unsigned i;
820 basic_block *bbs; 972 basic_block *bbs;
821 973
983 } 1135 }
984 1136
985 /* Generate a call to memset for PARTITION in LOOP. */ 1137 /* Generate a call to memset for PARTITION in LOOP. */
986 1138
987 static void 1139 static void
988 generate_memset_builtin (struct loop *loop, partition *partition) 1140 generate_memset_builtin (class loop *loop, partition *partition)
989 { 1141 {
990 gimple_stmt_iterator gsi; 1142 gimple_stmt_iterator gsi;
991 tree mem, fn, nb_bytes; 1143 tree mem, fn, nb_bytes;
992 tree val; 1144 tree val;
993 struct builtin_info *builtin = partition->builtin; 1145 struct builtin_info *builtin = partition->builtin;
994 gimple *fn_call; 1146 gimple *fn_call;
995 1147
996 /* The new statements will be placed before LOOP. */ 1148 /* The new statements will be placed before LOOP. */
997 gsi = gsi_last_bb (loop_preheader_edge (loop)->src); 1149 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
998 1150
999 nb_bytes = builtin->size; 1151 nb_bytes = rewrite_to_non_trapping_overflow (builtin->size);
1000 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE, 1152 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
1001 false, GSI_CONTINUE_LINKING); 1153 false, GSI_CONTINUE_LINKING);
1002 mem = builtin->dst_base; 1154 mem = builtin->dst_base;
1003 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE, 1155 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
1004 false, GSI_CONTINUE_LINKING); 1156 false, GSI_CONTINUE_LINKING);
1020 val = tem; 1172 val = tem;
1021 } 1173 }
1022 1174
1023 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET)); 1175 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET));
1024 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes); 1176 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes);
1177 gimple_set_location (fn_call, partition->loc);
1025 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING); 1178 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
1179 fold_stmt (&gsi);
1026 1180
1027 if (dump_file && (dump_flags & TDF_DETAILS)) 1181 if (dump_file && (dump_flags & TDF_DETAILS))
1028 { 1182 {
1029 fprintf (dump_file, "generated memset"); 1183 fprintf (dump_file, "generated memset");
1030 if (bytev == 0) 1184 if (bytev == 0)
1035 } 1189 }
1036 1190
1037 /* Generate a call to memcpy for PARTITION in LOOP. */ 1191 /* Generate a call to memcpy for PARTITION in LOOP. */
1038 1192
1039 static void 1193 static void
1040 generate_memcpy_builtin (struct loop *loop, partition *partition) 1194 generate_memcpy_builtin (class loop *loop, partition *partition)
1041 { 1195 {
1042 gimple_stmt_iterator gsi; 1196 gimple_stmt_iterator gsi;
1043 gimple *fn_call; 1197 gimple *fn_call;
1044 tree dest, src, fn, nb_bytes; 1198 tree dest, src, fn, nb_bytes;
1045 enum built_in_function kind; 1199 enum built_in_function kind;
1046 struct builtin_info *builtin = partition->builtin; 1200 struct builtin_info *builtin = partition->builtin;
1047 1201
1048 /* The new statements will be placed before LOOP. */ 1202 /* The new statements will be placed before LOOP. */
1049 gsi = gsi_last_bb (loop_preheader_edge (loop)->src); 1203 gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
1050 1204
1051 nb_bytes = builtin->size; 1205 nb_bytes = rewrite_to_non_trapping_overflow (builtin->size);
1052 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE, 1206 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
1053 false, GSI_CONTINUE_LINKING); 1207 false, GSI_CONTINUE_LINKING);
1054 dest = builtin->dst_base; 1208 dest = builtin->dst_base;
1055 src = builtin->src_base; 1209 src = builtin->src_base;
1056 if (partition->kind == PKIND_MEMCPY 1210 if (partition->kind == PKIND_MEMCPY
1063 false, GSI_CONTINUE_LINKING); 1217 false, GSI_CONTINUE_LINKING);
1064 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE, 1218 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE,
1065 false, GSI_CONTINUE_LINKING); 1219 false, GSI_CONTINUE_LINKING);
1066 fn = build_fold_addr_expr (builtin_decl_implicit (kind)); 1220 fn = build_fold_addr_expr (builtin_decl_implicit (kind));
1067 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes); 1221 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes);
1222 gimple_set_location (fn_call, partition->loc);
1068 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING); 1223 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING);
1224 fold_stmt (&gsi);
1069 1225
1070 if (dump_file && (dump_flags & TDF_DETAILS)) 1226 if (dump_file && (dump_flags & TDF_DETAILS))
1071 { 1227 {
1072 if (kind == BUILT_IN_MEMCPY) 1228 if (kind == BUILT_IN_MEMCPY)
1073 fprintf (dump_file, "generated memcpy\n"); 1229 fprintf (dump_file, "generated memcpy\n");
1077 } 1233 }
1078 1234
1079 /* Remove and destroy the loop LOOP. */ 1235 /* Remove and destroy the loop LOOP. */
1080 1236
1081 static void 1237 static void
1082 destroy_loop (struct loop *loop) 1238 destroy_loop (class loop *loop)
1083 { 1239 {
1084 unsigned nbbs = loop->num_nodes; 1240 unsigned nbbs = loop->num_nodes;
1085 edge exit = single_exit (loop); 1241 edge exit = single_exit (loop);
1086 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest; 1242 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest;
1087 basic_block *bbs; 1243 basic_block *bbs;
1088 unsigned i; 1244 unsigned i;
1089 1245
1090 bbs = get_loop_body_in_dom_order (loop); 1246 bbs = get_loop_body_in_dom_order (loop);
1091 1247
1248 gimple_stmt_iterator dst_gsi = gsi_after_labels (exit->dest);
1249 bool safe_p = single_pred_p (exit->dest);
1250 for (unsigned i = 0; i < nbbs; ++i)
1251 {
1252 /* We have made sure to not leave any dangling uses of SSA
1253 names defined in the loop. With the exception of virtuals.
1254 Make sure we replace all uses of virtual defs that will remain
1255 outside of the loop with the bare symbol as delete_basic_block
1256 will release them. */
1257 for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi);
1258 gsi_next (&gsi))
1259 {
1260 gphi *phi = gsi.phi ();
1261 if (virtual_operand_p (gimple_phi_result (phi)))
1262 mark_virtual_phi_result_for_renaming (phi);
1263 }
1264 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);)
1265 {
1266 gimple *stmt = gsi_stmt (gsi);
1267 tree vdef = gimple_vdef (stmt);
1268 if (vdef && TREE_CODE (vdef) == SSA_NAME)
1269 mark_virtual_operand_for_renaming (vdef);
1270 /* Also move and eventually reset debug stmts. We can leave
1271 constant values in place in case the stmt dominates the exit.
1272 ??? Non-constant values from the last iteration can be
1273 replaced with final values if we can compute them. */
1274 if (gimple_debug_bind_p (stmt))
1275 {
1276 tree val = gimple_debug_bind_get_value (stmt);
1277 gsi_move_before (&gsi, &dst_gsi);
1278 if (val
1279 && (!safe_p
1280 || !is_gimple_min_invariant (val)
1281 || !dominated_by_p (CDI_DOMINATORS, exit->src, bbs[i])))
1282 {
1283 gimple_debug_bind_reset_value (stmt);
1284 update_stmt (stmt);
1285 }
1286 }
1287 else
1288 gsi_next (&gsi);
1289 }
1290 }
1291
1092 redirect_edge_pred (exit, src); 1292 redirect_edge_pred (exit, src);
1093 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE); 1293 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE);
1094 exit->flags |= EDGE_FALLTHRU; 1294 exit->flags |= EDGE_FALLTHRU;
1095 cancel_loop_tree (loop); 1295 cancel_loop_tree (loop);
1096 rescan_loop_exit (exit, false, true); 1296 rescan_loop_exit (exit, false, true);
1097 1297
1098 i = nbbs; 1298 i = nbbs;
1099 do 1299 do
1100 { 1300 {
1101 /* We have made sure to not leave any dangling uses of SSA
1102 names defined in the loop. With the exception of virtuals.
1103 Make sure we replace all uses of virtual defs that will remain
1104 outside of the loop with the bare symbol as delete_basic_block
1105 will release them. */
1106 --i; 1301 --i;
1107 for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi);
1108 gsi_next (&gsi))
1109 {
1110 gphi *phi = gsi.phi ();
1111 if (virtual_operand_p (gimple_phi_result (phi)))
1112 mark_virtual_phi_result_for_renaming (phi);
1113 }
1114 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);
1115 gsi_next (&gsi))
1116 {
1117 gimple *stmt = gsi_stmt (gsi);
1118 tree vdef = gimple_vdef (stmt);
1119 if (vdef && TREE_CODE (vdef) == SSA_NAME)
1120 mark_virtual_operand_for_renaming (vdef);
1121 }
1122 delete_basic_block (bbs[i]); 1302 delete_basic_block (bbs[i]);
1123 } 1303 }
1124 while (i != 0); 1304 while (i != 0);
1125 1305
1126 free (bbs); 1306 free (bbs);
1130 } 1310 }
1131 1311
1132 /* Generates code for PARTITION. Return whether LOOP needs to be destroyed. */ 1312 /* Generates code for PARTITION. Return whether LOOP needs to be destroyed. */
1133 1313
1134 static bool 1314 static bool
1135 generate_code_for_partition (struct loop *loop, 1315 generate_code_for_partition (class loop *loop,
1136 partition *partition, bool copy_p) 1316 partition *partition, bool copy_p)
1137 { 1317 {
1138 switch (partition->kind) 1318 switch (partition->kind)
1139 { 1319 {
1140 case PKIND_NORMAL: 1320 case PKIND_NORMAL:
1163 if (!copy_p) 1343 if (!copy_p)
1164 return true; 1344 return true;
1165 return false; 1345 return false;
1166 } 1346 }
1167 1347
1168 /* Return data dependence relation for data references A and B. The two 1348 data_dependence_relation *
1169 data references must be in lexicographic order wrto reduced dependence 1349 loop_distribution::get_data_dependence (struct graph *rdg, data_reference_p a,
1170 graph RDG. We firstly try to find ddr from global ddr hash table. If 1350 data_reference_p b)
1171 it doesn't exist, compute the ddr and cache it. */
1172
1173 static data_dependence_relation *
1174 get_data_dependence (struct graph *rdg, data_reference_p a, data_reference_p b)
1175 { 1351 {
1176 struct data_dependence_relation ent, **slot; 1352 struct data_dependence_relation ent, **slot;
1177 struct data_dependence_relation *ddr; 1353 struct data_dependence_relation *ddr;
1178 1354
1179 gcc_assert (DR_IS_WRITE (a) || DR_IS_WRITE (b)); 1355 gcc_assert (DR_IS_WRITE (a) || DR_IS_WRITE (b));
1190 } 1366 }
1191 1367
1192 return *slot; 1368 return *slot;
1193 } 1369 }
1194 1370
1195 /* In reduced dependence graph RDG for loop distribution, return true if 1371 bool
1196 dependence between references DR1 and DR2 leads to a dependence cycle 1372 loop_distribution::data_dep_in_cycle_p (struct graph *rdg,
1197 and such dependence cycle can't be resolved by runtime alias check. */ 1373 data_reference_p dr1,
1198 1374 data_reference_p dr2)
1199 static bool
1200 data_dep_in_cycle_p (struct graph *rdg,
1201 data_reference_p dr1, data_reference_p dr2)
1202 { 1375 {
1203 struct data_dependence_relation *ddr; 1376 struct data_dependence_relation *ddr;
1204 1377
1205 /* Re-shuffle data-refs to be in topological order. */ 1378 /* Re-shuffle data-refs to be in topological order. */
1206 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1)) 1379 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1))
1226 return false; 1399 return false;
1227 1400
1228 return true; 1401 return true;
1229 } 1402 }
1230 1403
1231 /* Given reduced dependence graph RDG, PARTITION1 and PARTITION2, update 1404 void
1232 PARTITION1's type after merging PARTITION2 into PARTITION1. */ 1405 loop_distribution::update_type_for_merge (struct graph *rdg,
1233 1406 partition *partition1,
1234 static void 1407 partition *partition2)
1235 update_type_for_merge (struct graph *rdg,
1236 partition *partition1, partition *partition2)
1237 { 1408 {
1238 unsigned i, j; 1409 unsigned i, j;
1239 bitmap_iterator bi, bj; 1410 bitmap_iterator bi, bj;
1240 data_reference_p dr1, dr2; 1411 data_reference_p dr1, dr2;
1241 1412
1259 } 1430 }
1260 } 1431 }
1261 } 1432 }
1262 } 1433 }
1263 1434
1264 /* Returns a partition with all the statements needed for computing 1435 partition *
1265 the vertex V of the RDG, also including the loop exit conditions. */ 1436 loop_distribution::build_rdg_partition_for_vertex (struct graph *rdg, int v)
1266
1267 static partition *
1268 build_rdg_partition_for_vertex (struct graph *rdg, int v)
1269 { 1437 {
1270 partition *partition = partition_alloc (); 1438 partition *partition = partition_alloc ();
1271 auto_vec<int, 3> nodes; 1439 auto_vec<int, 3> nodes;
1272 unsigned i, j; 1440 unsigned i, j;
1273 int x; 1441 int x;
1307 /* Given PARTITION of LOOP and RDG, record single load/store data references 1475 /* Given PARTITION of LOOP and RDG, record single load/store data references
1308 for builtin partition in SRC_DR/DST_DR, return false if there is no such 1476 for builtin partition in SRC_DR/DST_DR, return false if there is no such
1309 data references. */ 1477 data references. */
1310 1478
1311 static bool 1479 static bool
1312 find_single_drs (struct loop *loop, struct graph *rdg, partition *partition, 1480 find_single_drs (class loop *loop, struct graph *rdg, partition *partition,
1313 data_reference_p *dst_dr, data_reference_p *src_dr) 1481 data_reference_p *dst_dr, data_reference_p *src_dr)
1314 { 1482 {
1315 unsigned i; 1483 unsigned i;
1316 data_reference_p single_ld = NULL, single_st = NULL; 1484 data_reference_p single_ld = NULL, single_st = NULL;
1317 bitmap_iterator bi; 1485 bitmap_iterator bi;
1430 compute_access_range (loop_p loop_nest, data_reference_p dr, tree *base, 1598 compute_access_range (loop_p loop_nest, data_reference_p dr, tree *base,
1431 tree *size, vec<tree> *steps = NULL) 1599 tree *size, vec<tree> *steps = NULL)
1432 { 1600 {
1433 location_t loc = gimple_location (DR_STMT (dr)); 1601 location_t loc = gimple_location (DR_STMT (dr));
1434 basic_block bb = gimple_bb (DR_STMT (dr)); 1602 basic_block bb = gimple_bb (DR_STMT (dr));
1435 struct loop *loop = bb->loop_father; 1603 class loop *loop = bb->loop_father;
1436 tree ref = DR_REF (dr); 1604 tree ref = DR_REF (dr);
1437 tree access_base = build_fold_addr_expr (ref); 1605 tree access_base = build_fold_addr_expr (ref);
1438 tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (ref)); 1606 tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (ref));
1439 int res = 0; 1607 int res = 0;
1440 1608
1557 } 1725 }
1558 1726
1559 /* Given data references DST_DR and SRC_DR in loop nest LOOP and RDG, classify 1727 /* Given data references DST_DR and SRC_DR in loop nest LOOP and RDG, classify
1560 if it forms builtin memcpy or memmove call. */ 1728 if it forms builtin memcpy or memmove call. */
1561 1729
1562 static void 1730 void
1563 classify_builtin_ldst (loop_p loop, struct graph *rdg, partition *partition, 1731 loop_distribution::classify_builtin_ldst (loop_p loop, struct graph *rdg,
1564 data_reference_p dst_dr, data_reference_p src_dr) 1732 partition *partition,
1733 data_reference_p dst_dr,
1734 data_reference_p src_dr)
1565 { 1735 {
1566 tree base, size, src_base, src_size; 1736 tree base, size, src_base, src_size;
1567 auto_vec<tree> dst_steps, src_steps; 1737 auto_vec<tree> dst_steps, src_steps;
1568 1738
1569 /* Compute access range of both load and store. */ 1739 /* Compute access range of both load and store. */
1617 partition->builtin = alloc_builtin (dst_dr, src_dr, base, src_base, size); 1787 partition->builtin = alloc_builtin (dst_dr, src_dr, base, src_base, size);
1618 partition->kind = PKIND_MEMMOVE; 1788 partition->kind = PKIND_MEMMOVE;
1619 return; 1789 return;
1620 } 1790 }
1621 1791
1622 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP. 1792 bool
1623 For the moment we detect memset, memcpy and memmove patterns. Bitmap 1793 loop_distribution::classify_partition (loop_p loop,
1624 STMT_IN_ALL_PARTITIONS contains statements belonging to all partitions. */ 1794 struct graph *rdg, partition *partition,
1625 1795 bitmap stmt_in_all_partitions)
1626 static void
1627 classify_partition (loop_p loop, struct graph *rdg, partition *partition,
1628 bitmap stmt_in_all_partitions)
1629 { 1796 {
1630 bitmap_iterator bi; 1797 bitmap_iterator bi;
1631 unsigned i; 1798 unsigned i;
1632 data_reference_p single_ld = NULL, single_st = NULL; 1799 data_reference_p single_ld = NULL, single_st = NULL;
1633 bool volatiles_p = false, has_reduction = false; 1800 bool volatiles_p = false, has_reduction = false;
1649 is used outside of loop. To workaround this issue, we skip 1816 is used outside of loop. To workaround this issue, we skip
1650 marking partition as reudction if the reduction stmt belongs 1817 marking partition as reudction if the reduction stmt belongs
1651 to all partitions. In such case, reduction will be computed 1818 to all partitions. In such case, reduction will be computed
1652 correctly no matter how partitions are fused/distributed. */ 1819 correctly no matter how partitions are fused/distributed. */
1653 if (!bitmap_bit_p (stmt_in_all_partitions, i)) 1820 if (!bitmap_bit_p (stmt_in_all_partitions, i))
1654 { 1821 partition->reduction_p = true;
1655 partition->reduction_p = true; 1822 else
1656 return; 1823 has_reduction = true;
1657 } 1824 }
1658 has_reduction = true; 1825 }
1659 } 1826
1660 } 1827 /* Simple workaround to prevent classifying the partition as builtin
1828 if it contains any use outside of loop. For the case where all
1829 partitions have the reduction this simple workaround is delayed
1830 to only affect the last partition. */
1831 if (partition->reduction_p)
1832 return has_reduction;
1661 1833
1662 /* Perform general partition disqualification for builtins. */ 1834 /* Perform general partition disqualification for builtins. */
1663 if (volatiles_p 1835 if (volatiles_p
1664 /* Simple workaround to prevent classifying the partition as builtin
1665 if it contains any use outside of loop. */
1666 || has_reduction
1667 || !flag_tree_loop_distribute_patterns) 1836 || !flag_tree_loop_distribute_patterns)
1668 return; 1837 return has_reduction;
1669 1838
1670 /* Find single load/store data references for builtin partition. */ 1839 /* Find single load/store data references for builtin partition. */
1671 if (!find_single_drs (loop, rdg, partition, &single_st, &single_ld)) 1840 if (!find_single_drs (loop, rdg, partition, &single_st, &single_ld))
1672 return; 1841 return has_reduction;
1842
1843 partition->loc = gimple_location (DR_STMT (single_st));
1673 1844
1674 /* Classify the builtin kind. */ 1845 /* Classify the builtin kind. */
1675 if (single_ld == NULL) 1846 if (single_ld == NULL)
1676 classify_builtin_st (loop, partition, single_st); 1847 classify_builtin_st (loop, partition, single_st);
1677 else 1848 else
1678 classify_builtin_ldst (loop, rdg, partition, single_st, single_ld); 1849 classify_builtin_ldst (loop, rdg, partition, single_st, single_ld);
1679 } 1850 return has_reduction;
1680 1851 }
1681 /* Returns true when PARTITION1 and PARTITION2 access the same memory 1852
1682 object in RDG. */ 1853 bool
1683 1854 loop_distribution::share_memory_accesses (struct graph *rdg,
1684 static bool
1685 share_memory_accesses (struct graph *rdg,
1686 partition *partition1, partition *partition2) 1855 partition *partition1, partition *partition2)
1687 { 1856 {
1688 unsigned i, j; 1857 unsigned i, j;
1689 bitmap_iterator bi, bj; 1858 bitmap_iterator bi, bj;
1690 data_reference_p dr1, dr2; 1859 data_reference_p dr1, dr2;
1727 1896
1728 /* For each seed statement in STARTING_STMTS, this function builds 1897 /* For each seed statement in STARTING_STMTS, this function builds
1729 partition for it by adding depended statements according to RDG. 1898 partition for it by adding depended statements according to RDG.
1730 All partitions are recorded in PARTITIONS. */ 1899 All partitions are recorded in PARTITIONS. */
1731 1900
1732 static void 1901 void
1733 rdg_build_partitions (struct graph *rdg, 1902 loop_distribution::rdg_build_partitions (struct graph *rdg,
1734 vec<gimple *> starting_stmts, 1903 vec<gimple *> starting_stmts,
1735 vec<partition *> *partitions) 1904 vec<partition *> *partitions)
1736 { 1905 {
1737 auto_bitmap processed; 1906 auto_bitmap processed;
1738 int i; 1907 int i;
1739 gimple *stmt; 1908 gimple *stmt;
1740 1909
1846 return true; 2015 return true;
1847 2016
1848 return false; 2017 return false;
1849 } 2018 }
1850 2019
1851 /* Compute partition dependence created by the data references in DRS1 2020 int
1852 and DRS2, modify and return DIR according to that. IF ALIAS_DDR is 2021 loop_distribution::pg_add_dependence_edges (struct graph *rdg, int dir,
1853 not NULL, we record dependence introduced by possible alias between
1854 two data references in ALIAS_DDRS; otherwise, we simply ignore such
1855 dependence as if it doesn't exist at all. */
1856
1857 static int
1858 pg_add_dependence_edges (struct graph *rdg, int dir,
1859 bitmap drs1, bitmap drs2, vec<ddr_p> *alias_ddrs) 2022 bitmap drs1, bitmap drs2, vec<ddr_p> *alias_ddrs)
1860 { 2023 {
1861 unsigned i, j; 2024 unsigned i, j;
1862 bitmap_iterator bi, bj; 2025 bitmap_iterator bi, bj;
1863 data_reference_p dr1, dr2, saved_dr1; 2026 data_reference_p dr1, dr2, saved_dr1;
1894 res = data_ref_compare_tree (DR_BASE_ADDRESS (dr1), 2057 res = data_ref_compare_tree (DR_BASE_ADDRESS (dr1),
1895 DR_BASE_ADDRESS (dr2)); 2058 DR_BASE_ADDRESS (dr2));
1896 /* Be conservative. If data references are not well analyzed, 2059 /* Be conservative. If data references are not well analyzed,
1897 or the two data references have the same base address and 2060 or the two data references have the same base address and
1898 offset, add dependence and consider it alias to each other. 2061 offset, add dependence and consider it alias to each other.
1899 In other words, the dependence can not be resolved by 2062 In other words, the dependence cannot be resolved by
1900 runtime alias check. */ 2063 runtime alias check. */
1901 if (!DR_BASE_ADDRESS (dr1) || !DR_BASE_ADDRESS (dr2) 2064 if (!DR_BASE_ADDRESS (dr1) || !DR_BASE_ADDRESS (dr2)
1902 || !DR_OFFSET (dr1) || !DR_OFFSET (dr2) 2065 || !DR_OFFSET (dr1) || !DR_OFFSET (dr2)
1903 || !DR_INIT (dr1) || !DR_INIT (dr2) 2066 || !DR_INIT (dr1) || !DR_INIT (dr2)
1904 || !DR_STEP (dr1) || !tree_fits_uhwi_p (DR_STEP (dr1)) 2067 || !DR_STEP (dr1) || !tree_fits_uhwi_p (DR_STEP (dr1))
2069 reduced dependence graph for the loop to be distributed. If IGNORE_ALIAS_P 2232 reduced dependence graph for the loop to be distributed. If IGNORE_ALIAS_P
2070 is true, data dependence caused by possible alias between references 2233 is true, data dependence caused by possible alias between references
2071 is ignored, as if it doesn't exist at all; otherwise all depdendences 2234 is ignored, as if it doesn't exist at all; otherwise all depdendences
2072 are considered. */ 2235 are considered. */
2073 2236
2074 static struct graph * 2237 struct graph *
2075 build_partition_graph (struct graph *rdg, 2238 loop_distribution::build_partition_graph (struct graph *rdg,
2076 vec<struct partition *> *partitions, 2239 vec<struct partition *> *partitions,
2077 bool ignore_alias_p) 2240 bool ignore_alias_p)
2078 { 2241 {
2079 int i, j; 2242 int i, j;
2080 struct partition *partition1, *partition2; 2243 struct partition *partition1, *partition2;
2081 graph *pg = new_graph (partitions->length ()); 2244 graph *pg = new_graph (partitions->length ());
2082 auto_vec<ddr_p> alias_ddrs, *alias_ddrs_p; 2245 auto_vec<ddr_p> alias_ddrs, *alias_ddrs_p;
2107 dir = pg_add_dependence_edges (rdg, dir, partition1->datarefs, 2270 dir = pg_add_dependence_edges (rdg, dir, partition1->datarefs,
2108 partition2->datarefs, alias_ddrs_p); 2271 partition2->datarefs, alias_ddrs_p);
2109 2272
2110 /* Add edge to partition graph if there exists dependence. There 2273 /* Add edge to partition graph if there exists dependence. There
2111 are two types of edges. One type edge is caused by compilation 2274 are two types of edges. One type edge is caused by compilation
2112 time known dependence, this type can not be resolved by runtime 2275 time known dependence, this type cannot be resolved by runtime
2113 alias check. The other type can be resolved by runtime alias 2276 alias check. The other type can be resolved by runtime alias
2114 check. */ 2277 check. */
2115 if (dir == 1 || dir == 2 2278 if (dir == 1 || dir == 2
2116 || alias_ddrs.length () > 0) 2279 || alias_ddrs.length () > 0)
2117 { 2280 {
2154 if (data->partition) 2317 if (data->partition)
2155 partitions->safe_push (data->partition); 2318 partitions->safe_push (data->partition);
2156 } 2319 }
2157 } 2320 }
2158 2321
2159 /* Given reduced dependence graph RDG merge strong connected components 2322 void
2160 of PARTITIONS. If IGNORE_ALIAS_P is true, data dependence caused by 2323 loop_distribution::merge_dep_scc_partitions (struct graph *rdg,
2161 possible alias between references is ignored, as if it doesn't exist 2324 vec<struct partition *> *partitions,
2162 at all; otherwise all depdendences are considered. */ 2325 bool ignore_alias_p)
2163
2164 static void
2165 merge_dep_scc_partitions (struct graph *rdg,
2166 vec<struct partition *> *partitions,
2167 bool ignore_alias_p)
2168 { 2326 {
2169 struct partition *partition1, *partition2; 2327 struct partition *partition1, *partition2;
2170 struct pg_vdata *data; 2328 struct pg_vdata *data;
2171 graph *pg = build_partition_graph (rdg, partitions, ignore_alias_p); 2329 graph *pg = build_partition_graph (rdg, partitions, ignore_alias_p);
2172 int i, j, num_sccs = graphds_scc (pg, NULL); 2330 int i, j, num_sccs = graphds_scc (pg, NULL);
2235 } 2393 }
2236 2394
2237 /* This is the main function breaking strong conected components in 2395 /* This is the main function breaking strong conected components in
2238 PARTITIONS giving reduced depdendence graph RDG. Store data dependence 2396 PARTITIONS giving reduced depdendence graph RDG. Store data dependence
2239 relations for runtime alias check in ALIAS_DDRS. */ 2397 relations for runtime alias check in ALIAS_DDRS. */
2240 2398 void
2241 static void 2399 loop_distribution::break_alias_scc_partitions (struct graph *rdg,
2242 break_alias_scc_partitions (struct graph *rdg, 2400 vec<struct partition *> *partitions,
2243 vec<struct partition *> *partitions, 2401 vec<ddr_p> *alias_ddrs)
2244 vec<ddr_p> *alias_ddrs)
2245 { 2402 {
2246 int i, j, k, num_sccs, num_sccs_no_alias; 2403 int i, j, k, num_sccs, num_sccs_no_alias;
2247 /* Build partition dependence graph. */ 2404 /* Build partition dependence graph. */
2248 graph *pg = build_partition_graph (rdg, partitions, false); 2405 graph *pg = build_partition_graph (rdg, partitions, false);
2249 2406
2380 2537
2381 /* Return true if LOOP's latch is dominated by statement for data reference 2538 /* Return true if LOOP's latch is dominated by statement for data reference
2382 DR. */ 2539 DR. */
2383 2540
2384 static inline bool 2541 static inline bool
2385 latch_dominated_by_data_ref (struct loop *loop, data_reference *dr) 2542 latch_dominated_by_data_ref (class loop *loop, data_reference *dr)
2386 { 2543 {
2387 return dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src, 2544 return dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
2388 gimple_bb (DR_STMT (dr))); 2545 gimple_bb (DR_STMT (dr)));
2389 } 2546 }
2390 2547
2391 /* Compute alias check pairs and store them in COMP_ALIAS_PAIRS for LOOP's 2548 /* Compute alias check pairs and store them in COMP_ALIAS_PAIRS for LOOP's
2392 data dependence relations ALIAS_DDRS. */ 2549 data dependence relations ALIAS_DDRS. */
2393 2550
2394 static void 2551 static void
2395 compute_alias_check_pairs (struct loop *loop, vec<ddr_p> *alias_ddrs, 2552 compute_alias_check_pairs (class loop *loop, vec<ddr_p> *alias_ddrs,
2396 vec<dr_with_seg_len_pair_t> *comp_alias_pairs) 2553 vec<dr_with_seg_len_pair_t> *comp_alias_pairs)
2397 { 2554 {
2398 unsigned int i; 2555 unsigned int i;
2399 unsigned HOST_WIDE_INT factor = 1; 2556 unsigned HOST_WIDE_INT factor = 1;
2400 tree niters_plus_one, niters = number_of_latch_executions (loop); 2557 tree niters_plus_one, niters = number_of_latch_executions (loop);
2411 { 2568 {
2412 ddr_p ddr = (*alias_ddrs)[i]; 2569 ddr_p ddr = (*alias_ddrs)[i];
2413 struct data_reference *dr_a = DDR_A (ddr); 2570 struct data_reference *dr_a = DDR_A (ddr);
2414 struct data_reference *dr_b = DDR_B (ddr); 2571 struct data_reference *dr_b = DDR_B (ddr);
2415 tree seg_length_a, seg_length_b; 2572 tree seg_length_a, seg_length_b;
2416 int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a),
2417 DR_BASE_ADDRESS (dr_b));
2418
2419 if (comp_res == 0)
2420 comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b));
2421 gcc_assert (comp_res != 0);
2422 2573
2423 if (latch_dominated_by_data_ref (loop, dr_a)) 2574 if (latch_dominated_by_data_ref (loop, dr_a))
2424 seg_length_a = data_ref_segment_size (dr_a, niters_plus_one); 2575 seg_length_a = data_ref_segment_size (dr_a, niters_plus_one);
2425 else 2576 else
2426 seg_length_a = data_ref_segment_size (dr_a, niters); 2577 seg_length_a = data_ref_segment_size (dr_a, niters);
2437 unsigned int align_a = TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_a))); 2588 unsigned int align_a = TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_a)));
2438 unsigned int align_b = TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_b))); 2589 unsigned int align_b = TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_b)));
2439 2590
2440 dr_with_seg_len_pair_t dr_with_seg_len_pair 2591 dr_with_seg_len_pair_t dr_with_seg_len_pair
2441 (dr_with_seg_len (dr_a, seg_length_a, access_size_a, align_a), 2592 (dr_with_seg_len (dr_a, seg_length_a, access_size_a, align_a),
2442 dr_with_seg_len (dr_b, seg_length_b, access_size_b, align_b)); 2593 dr_with_seg_len (dr_b, seg_length_b, access_size_b, align_b),
2443 2594 /* ??? Would WELL_ORDERED be safe? */
2444 /* Canonicalize pairs by sorting the two DR members. */ 2595 dr_with_seg_len_pair_t::REORDERED);
2445 if (comp_res > 0)
2446 std::swap (dr_with_seg_len_pair.first, dr_with_seg_len_pair.second);
2447 2596
2448 comp_alias_pairs->safe_push (dr_with_seg_len_pair); 2597 comp_alias_pairs->safe_push (dr_with_seg_len_pair);
2449 } 2598 }
2450 2599
2451 if (tree_fits_uhwi_p (niters)) 2600 if (tree_fits_uhwi_p (niters))
2462 /* Given data dependence relations in ALIAS_DDRS, generate runtime alias 2611 /* Given data dependence relations in ALIAS_DDRS, generate runtime alias
2463 checks and version LOOP under condition of these runtime alias checks. */ 2612 checks and version LOOP under condition of these runtime alias checks. */
2464 2613
2465 static void 2614 static void
2466 version_loop_by_alias_check (vec<struct partition *> *partitions, 2615 version_loop_by_alias_check (vec<struct partition *> *partitions,
2467 struct loop *loop, vec<ddr_p> *alias_ddrs) 2616 class loop *loop, vec<ddr_p> *alias_ddrs)
2468 { 2617 {
2469 profile_probability prob; 2618 profile_probability prob;
2470 basic_block cond_bb; 2619 basic_block cond_bb;
2471 struct loop *nloop; 2620 class loop *nloop;
2472 tree lhs, arg0, cond_expr = NULL_TREE; 2621 tree lhs, arg0, cond_expr = NULL_TREE;
2473 gimple_seq cond_stmts = NULL; 2622 gimple_seq cond_stmts = NULL;
2474 gimple *call_stmt = NULL; 2623 gimple *call_stmt = NULL;
2475 auto_vec<dr_with_seg_len_pair_t> comp_alias_pairs; 2624 auto_vec<dr_with_seg_len_pair_t> comp_alias_pairs;
2476 2625
2673 partition_free (part2); 2822 partition_free (part2);
2674 partitions->ordered_remove (i + 1); 2823 partitions->ordered_remove (i + 1);
2675 } 2824 }
2676 } 2825 }
2677 2826
2678 /* Fuse PARTITIONS of LOOP if necessary before finalizing distribution. 2827 void
2679 ALIAS_DDRS contains ddrs which need runtime alias check. */ 2828 loop_distribution::finalize_partitions (class loop *loop,
2680 2829 vec<struct partition *> *partitions,
2681 static void 2830 vec<ddr_p> *alias_ddrs)
2682 finalize_partitions (struct loop *loop, vec<struct partition *> *partitions,
2683 vec<ddr_p> *alias_ddrs)
2684 { 2831 {
2685 unsigned i; 2832 unsigned i;
2686 struct partition *partition, *a; 2833 struct partition *partition, *a;
2687 2834
2688 if (partitions->length () == 1 2835 if (partitions->length () == 1
2733 are placed before consumer statements. Tries to separate only the 2880 are placed before consumer statements. Tries to separate only the
2734 statements from STMTS into separate loops. Returns the number of 2881 statements from STMTS into separate loops. Returns the number of
2735 distributed loops. Set NB_CALLS to number of generated builtin calls. 2882 distributed loops. Set NB_CALLS to number of generated builtin calls.
2736 Set *DESTROY_P to whether LOOP needs to be destroyed. */ 2883 Set *DESTROY_P to whether LOOP needs to be destroyed. */
2737 2884
2738 static int 2885 int
2739 distribute_loop (struct loop *loop, vec<gimple *> stmts, 2886 loop_distribution::distribute_loop (class loop *loop, vec<gimple *> stmts,
2740 control_dependences *cd, int *nb_calls, bool *destroy_p) 2887 control_dependences *cd, int *nb_calls, bool *destroy_p,
2888 bool only_patterns_p)
2741 { 2889 {
2742 ddrs_table = new hash_table<ddr_hasher> (389); 2890 ddrs_table = new hash_table<ddr_hasher> (389);
2743 struct graph *rdg; 2891 struct graph *rdg;
2744 partition *partition; 2892 partition *partition;
2745 bool any_builtin;
2746 int i, nbp; 2893 int i, nbp;
2747 2894
2748 *destroy_p = false; 2895 *destroy_p = false;
2749 *nb_calls = 0; 2896 *nb_calls = 0;
2750 loop_nest.create (0); 2897 loop_nest.create (0);
2754 delete ddrs_table; 2901 delete ddrs_table;
2755 return 0; 2902 return 0;
2756 } 2903 }
2757 2904
2758 datarefs_vec.create (20); 2905 datarefs_vec.create (20);
2906 has_nonaddressable_dataref_p = false;
2759 rdg = build_rdg (loop, cd); 2907 rdg = build_rdg (loop, cd);
2760 if (!rdg) 2908 if (!rdg)
2761 { 2909 {
2762 if (dump_file && (dump_flags & TDF_DETAILS)) 2910 if (dump_file && (dump_flags & TDF_DETAILS))
2763 fprintf (dump_file, 2911 fprintf (dump_file,
2799 auto_bitmap stmt_in_all_partitions; 2947 auto_bitmap stmt_in_all_partitions;
2800 bitmap_copy (stmt_in_all_partitions, partitions[0]->stmts); 2948 bitmap_copy (stmt_in_all_partitions, partitions[0]->stmts);
2801 for (i = 1; partitions.iterate (i, &partition); ++i) 2949 for (i = 1; partitions.iterate (i, &partition); ++i)
2802 bitmap_and_into (stmt_in_all_partitions, partitions[i]->stmts); 2950 bitmap_and_into (stmt_in_all_partitions, partitions[i]->stmts);
2803 2951
2804 any_builtin = false; 2952 bool any_builtin = false;
2953 bool reduction_in_all = false;
2805 FOR_EACH_VEC_ELT (partitions, i, partition) 2954 FOR_EACH_VEC_ELT (partitions, i, partition)
2806 { 2955 {
2807 classify_partition (loop, rdg, partition, stmt_in_all_partitions); 2956 reduction_in_all
2957 |= classify_partition (loop, rdg, partition, stmt_in_all_partitions);
2808 any_builtin |= partition_builtin_p (partition); 2958 any_builtin |= partition_builtin_p (partition);
2809 } 2959 }
2810 2960
2811 /* If we are only distributing patterns but did not detect any, 2961 /* If we are only distributing patterns but did not detect any,
2812 simply bail out. */ 2962 simply bail out. */
2813 if (!flag_tree_loop_distribution 2963 if (only_patterns_p
2814 && !any_builtin) 2964 && !any_builtin)
2815 { 2965 {
2816 nbp = 0; 2966 nbp = 0;
2817 goto ldist_done; 2967 goto ldist_done;
2818 } 2968 }
2820 /* If we are only distributing patterns fuse all partitions that 2970 /* If we are only distributing patterns fuse all partitions that
2821 were not classified as builtins. This also avoids chopping 2971 were not classified as builtins. This also avoids chopping
2822 a loop into pieces, separated by builtin calls. That is, we 2972 a loop into pieces, separated by builtin calls. That is, we
2823 only want no or a single loop body remaining. */ 2973 only want no or a single loop body remaining. */
2824 struct partition *into; 2974 struct partition *into;
2825 if (!flag_tree_loop_distribution) 2975 if (only_patterns_p)
2826 { 2976 {
2827 for (i = 0; partitions.iterate (i, &into); ++i) 2977 for (i = 0; partitions.iterate (i, &into); ++i)
2828 if (!partition_builtin_p (into)) 2978 if (!partition_builtin_p (into))
2829 break; 2979 break;
2830 for (++i; partitions.iterate (i, &partition); ++i) 2980 for (++i; partitions.iterate (i, &partition); ++i)
2877 1 into 0,2. So try again if we did any merging into 0. */ 3027 1 into 0,2. So try again if we did any merging into 0. */
2878 if (changed) 3028 if (changed)
2879 i--; 3029 i--;
2880 } 3030 }
2881 3031
3032 /* Put a non-builtin partition last if we need to preserve a reduction.
3033 ??? This is a workaround that makes sort_partitions_by_post_order do
3034 the correct thing while in reality it should sort each component
3035 separately and then put the component with a reduction or a non-builtin
3036 last. */
3037 if (reduction_in_all
3038 && partition_builtin_p (partitions.last()))
3039 FOR_EACH_VEC_ELT (partitions, i, partition)
3040 if (!partition_builtin_p (partition))
3041 {
3042 partitions.unordered_remove (i);
3043 partitions.quick_push (partition);
3044 break;
3045 }
3046
2882 /* Build the partition dependency graph and fuse partitions in strong 3047 /* Build the partition dependency graph and fuse partitions in strong
2883 connected component. */ 3048 connected component. */
2884 if (partitions.length () > 1) 3049 if (partitions.length () > 1)
2885 { 3050 {
2886 /* Don't support loop nest distribution under runtime alias check 3051 /* Don't support loop nest distribution under runtime alias check
2887 since it's not likely to enable many vectorization opportunities. */ 3052 since it's not likely to enable many vectorization opportunities.
2888 if (loop->inner) 3053 Also if loop has any data reference which may be not addressable
3054 since alias check needs to take, compare address of the object. */
3055 if (loop->inner || has_nonaddressable_dataref_p)
2889 merge_dep_scc_partitions (rdg, &partitions, false); 3056 merge_dep_scc_partitions (rdg, &partitions, false);
2890 else 3057 else
2891 { 3058 {
2892 merge_dep_scc_partitions (rdg, &partitions, true); 3059 merge_dep_scc_partitions (rdg, &partitions, true);
2893 if (partitions.length () > 1) 3060 if (partitions.length () > 1)
2894 break_alias_scc_partitions (rdg, &partitions, &alias_ddrs); 3061 break_alias_scc_partitions (rdg, &partitions, &alias_ddrs);
2895 } 3062 }
2896 } 3063 }
2897 3064
2898 finalize_partitions (loop, &partitions, &alias_ddrs); 3065 finalize_partitions (loop, &partitions, &alias_ddrs);
3066
3067 /* If there is a reduction in all partitions make sure the last one
3068 is not classified for builtin code generation. */
3069 if (reduction_in_all)
3070 {
3071 partition = partitions.last ();
3072 if (only_patterns_p
3073 && partition_builtin_p (partition)
3074 && !partition_builtin_p (partitions[0]))
3075 {
3076 nbp = 0;
3077 goto ldist_done;
3078 }
3079 partition->kind = PKIND_NORMAL;
3080 }
2899 3081
2900 nbp = partitions.length (); 3082 nbp = partitions.length ();
2901 if (nbp == 0 3083 if (nbp == 0
2902 || (nbp == 1 && !partition_builtin_p (partitions[0])) 3084 || (nbp == 1 && !partition_builtin_p (partitions[0]))
2903 || (nbp > 1 && partition_contains_all_rw (rdg, partitions))) 3085 || (nbp > 1 && partition_contains_all_rw (rdg, partitions)))
2938 partition_free (partition); 3120 partition_free (partition);
2939 3121
2940 free_rdg (rdg); 3122 free_rdg (rdg);
2941 return nbp - *nb_calls; 3123 return nbp - *nb_calls;
2942 } 3124 }
3125
3126
3127 void loop_distribution::bb_top_order_init (void)
3128 {
3129 int rpo_num;
3130 int *rpo = XNEWVEC (int, last_basic_block_for_fn (cfun));
3131
3132 bb_top_order_index = XNEWVEC (int, last_basic_block_for_fn (cfun));
3133 bb_top_order_index_size = last_basic_block_for_fn (cfun);
3134 rpo_num = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, true);
3135 for (int i = 0; i < rpo_num; i++)
3136 bb_top_order_index[rpo[i]] = i;
3137
3138 free (rpo);
3139 }
3140
3141 void loop_distribution::bb_top_order_destroy ()
3142 {
3143 free (bb_top_order_index);
3144 bb_top_order_index = NULL;
3145 bb_top_order_index_size = 0;
3146 }
3147
3148
3149 /* Given LOOP, this function records seed statements for distribution in
3150 WORK_LIST. Return false if there is nothing for distribution. */
3151
3152 static bool
3153 find_seed_stmts_for_distribution (class loop *loop, vec<gimple *> *work_list)
3154 {
3155 basic_block *bbs = get_loop_body_in_dom_order (loop);
3156
3157 /* Initialize the worklist with stmts we seed the partitions with. */
3158 for (unsigned i = 0; i < loop->num_nodes; ++i)
3159 {
3160 for (gphi_iterator gsi = gsi_start_phis (bbs[i]);
3161 !gsi_end_p (gsi); gsi_next (&gsi))
3162 {
3163 gphi *phi = gsi.phi ();
3164 if (virtual_operand_p (gimple_phi_result (phi)))
3165 continue;
3166 /* Distribute stmts which have defs that are used outside of
3167 the loop. */
3168 if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
3169 continue;
3170 work_list->safe_push (phi);
3171 }
3172 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
3173 !gsi_end_p (gsi); gsi_next (&gsi))
3174 {
3175 gimple *stmt = gsi_stmt (gsi);
3176
3177 /* Ignore clobbers, they do not have true side effects. */
3178 if (gimple_clobber_p (stmt))
3179 continue;
3180
3181 /* If there is a stmt with side-effects bail out - we
3182 cannot and should not distribute this loop. */
3183 if (gimple_has_side_effects (stmt))
3184 {
3185 free (bbs);
3186 return false;
3187 }
3188
3189 /* Distribute stmts which have defs that are used outside of
3190 the loop. */
3191 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
3192 ;
3193 /* Otherwise only distribute stores for now. */
3194 else if (!gimple_vdef (stmt))
3195 continue;
3196
3197 work_list->safe_push (stmt);
3198 }
3199 }
3200 free (bbs);
3201 return work_list->length () > 0;
3202 }
3203
3204 /* Given innermost LOOP, return the outermost enclosing loop that forms a
3205 perfect loop nest. */
3206
3207 static class loop *
3208 prepare_perfect_loop_nest (class loop *loop)
3209 {
3210 class loop *outer = loop_outer (loop);
3211 tree niters = number_of_latch_executions (loop);
3212
3213 /* TODO: We only support the innermost 3-level loop nest distribution
3214 because of compilation time issue for now. This should be relaxed
3215 in the future. Note we only allow 3-level loop nest distribution
3216 when parallelizing loops. */
3217 while ((loop->inner == NULL
3218 || (loop->inner->inner == NULL && flag_tree_parallelize_loops > 1))
3219 && loop_outer (outer)
3220 && outer->inner == loop && loop->next == NULL
3221 && single_exit (outer)
3222 && !chrec_contains_symbols_defined_in_loop (niters, outer->num)
3223 && (niters = number_of_latch_executions (outer)) != NULL_TREE
3224 && niters != chrec_dont_know)
3225 {
3226 loop = outer;
3227 outer = loop_outer (loop);
3228 }
3229
3230 return loop;
3231 }
3232
3233
3234 unsigned int
3235 loop_distribution::execute (function *fun)
3236 {
3237 class loop *loop;
3238 bool changed = false;
3239 basic_block bb;
3240 control_dependences *cd = NULL;
3241 auto_vec<loop_p> loops_to_be_destroyed;
3242
3243 if (number_of_loops (fun) <= 1)
3244 return 0;
3245
3246 bb_top_order_init ();
3247
3248 FOR_ALL_BB_FN (bb, fun)
3249 {
3250 gimple_stmt_iterator gsi;
3251 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3252 gimple_set_uid (gsi_stmt (gsi), -1);
3253 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3254 gimple_set_uid (gsi_stmt (gsi), -1);
3255 }
3256
3257 /* We can at the moment only distribute non-nested loops, thus restrict
3258 walking to innermost loops. */
3259 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
3260 {
3261 /* Don't distribute multiple exit edges loop, or cold loop when
3262 not doing pattern detection. */
3263 if (!single_exit (loop)
3264 || (!flag_tree_loop_distribute_patterns
3265 && !optimize_loop_for_speed_p (loop)))
3266 continue;
3267
3268 /* Don't distribute loop if niters is unknown. */
3269 tree niters = number_of_latch_executions (loop);
3270 if (niters == NULL_TREE || niters == chrec_dont_know)
3271 continue;
3272
3273 /* Get the perfect loop nest for distribution. */
3274 loop = prepare_perfect_loop_nest (loop);
3275 for (; loop; loop = loop->inner)
3276 {
3277 auto_vec<gimple *> work_list;
3278 if (!find_seed_stmts_for_distribution (loop, &work_list))
3279 break;
3280
3281 const char *str = loop->inner ? " nest" : "";
3282 dump_user_location_t loc = find_loop_location (loop);
3283 if (!cd)
3284 {
3285 calculate_dominance_info (CDI_DOMINATORS);
3286 calculate_dominance_info (CDI_POST_DOMINATORS);
3287 cd = new control_dependences ();
3288 free_dominance_info (CDI_POST_DOMINATORS);
3289 }
3290
3291 bool destroy_p;
3292 int nb_generated_loops, nb_generated_calls;
3293 nb_generated_loops
3294 = distribute_loop (loop, work_list, cd, &nb_generated_calls,
3295 &destroy_p, (!optimize_loop_for_speed_p (loop)
3296 || !flag_tree_loop_distribution));
3297 if (destroy_p)
3298 loops_to_be_destroyed.safe_push (loop);
3299
3300 if (nb_generated_loops + nb_generated_calls > 0)
3301 {
3302 changed = true;
3303 if (dump_enabled_p ())
3304 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
3305 loc, "Loop%s %d distributed: split to %d loops "
3306 "and %d library calls.\n", str, loop->num,
3307 nb_generated_loops, nb_generated_calls);
3308
3309 break;
3310 }
3311
3312 if (dump_file && (dump_flags & TDF_DETAILS))
3313 fprintf (dump_file, "Loop%s %d not distributed.\n", str, loop->num);
3314 }
3315 }
3316
3317 if (cd)
3318 delete cd;
3319
3320 if (bb_top_order_index != NULL)
3321 bb_top_order_destroy ();
3322
3323 if (changed)
3324 {
3325 /* Destroy loop bodies that could not be reused. Do this late as we
3326 otherwise can end up refering to stale data in control dependences. */
3327 unsigned i;
3328 FOR_EACH_VEC_ELT (loops_to_be_destroyed, i, loop)
3329 destroy_loop (loop);
3330
3331 /* Cached scalar evolutions now may refer to wrong or non-existing
3332 loops. */
3333 scev_reset_htab ();
3334 mark_virtual_operands_for_renaming (fun);
3335 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
3336 }
3337
3338 checking_verify_loop_structure ();
3339
3340 return changed ? TODO_cleanup_cfg : 0;
3341 }
3342
2943 3343
2944 /* Distribute all loops in the current function. */ 3344 /* Distribute all loops in the current function. */
2945 3345
2946 namespace { 3346 namespace {
2947 3347
2974 3374
2975 virtual unsigned int execute (function *); 3375 virtual unsigned int execute (function *);
2976 3376
2977 }; // class pass_loop_distribution 3377 }; // class pass_loop_distribution
2978 3378
2979
2980 /* Given LOOP, this function records seed statements for distribution in
2981 WORK_LIST. Return false if there is nothing for distribution. */
2982
2983 static bool
2984 find_seed_stmts_for_distribution (struct loop *loop, vec<gimple *> *work_list)
2985 {
2986 basic_block *bbs = get_loop_body_in_dom_order (loop);
2987
2988 /* Initialize the worklist with stmts we seed the partitions with. */
2989 for (unsigned i = 0; i < loop->num_nodes; ++i)
2990 {
2991 for (gphi_iterator gsi = gsi_start_phis (bbs[i]);
2992 !gsi_end_p (gsi); gsi_next (&gsi))
2993 {
2994 gphi *phi = gsi.phi ();
2995 if (virtual_operand_p (gimple_phi_result (phi)))
2996 continue;
2997 /* Distribute stmts which have defs that are used outside of
2998 the loop. */
2999 if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
3000 continue;
3001 work_list->safe_push (phi);
3002 }
3003 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
3004 !gsi_end_p (gsi); gsi_next (&gsi))
3005 {
3006 gimple *stmt = gsi_stmt (gsi);
3007
3008 /* If there is a stmt with side-effects bail out - we
3009 cannot and should not distribute this loop. */
3010 if (gimple_has_side_effects (stmt))
3011 {
3012 free (bbs);
3013 return false;
3014 }
3015
3016 /* Distribute stmts which have defs that are used outside of
3017 the loop. */
3018 if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
3019 ;
3020 /* Otherwise only distribute stores for now. */
3021 else if (!gimple_vdef (stmt))
3022 continue;
3023
3024 work_list->safe_push (stmt);
3025 }
3026 }
3027 free (bbs);
3028 return work_list->length () > 0;
3029 }
3030
3031 /* Given innermost LOOP, return the outermost enclosing loop that forms a
3032 perfect loop nest. */
3033
3034 static struct loop *
3035 prepare_perfect_loop_nest (struct loop *loop)
3036 {
3037 struct loop *outer = loop_outer (loop);
3038 tree niters = number_of_latch_executions (loop);
3039
3040 /* TODO: We only support the innermost 3-level loop nest distribution
3041 because of compilation time issue for now. This should be relaxed
3042 in the future. Note we only allow 3-level loop nest distribution
3043 when parallelizing loops. */
3044 while ((loop->inner == NULL
3045 || (loop->inner->inner == NULL && flag_tree_parallelize_loops > 1))
3046 && loop_outer (outer)
3047 && outer->inner == loop && loop->next == NULL
3048 && single_exit (outer)
3049 && optimize_loop_for_speed_p (outer)
3050 && !chrec_contains_symbols_defined_in_loop (niters, outer->num)
3051 && (niters = number_of_latch_executions (outer)) != NULL_TREE
3052 && niters != chrec_dont_know)
3053 {
3054 loop = outer;
3055 outer = loop_outer (loop);
3056 }
3057
3058 return loop;
3059 }
3060
3061 unsigned int 3379 unsigned int
3062 pass_loop_distribution::execute (function *fun) 3380 pass_loop_distribution::execute (function *fun)
3063 { 3381 {
3064 struct loop *loop; 3382 return loop_distribution ().execute (fun);
3065 bool changed = false;
3066 basic_block bb;
3067 control_dependences *cd = NULL;
3068 auto_vec<loop_p> loops_to_be_destroyed;
3069
3070 if (number_of_loops (fun) <= 1)
3071 return 0;
3072
3073 /* Compute topological order for basic blocks. Topological order is
3074 needed because data dependence is computed for data references in
3075 lexicographical order. */
3076 if (bb_top_order_index == NULL)
3077 {
3078 int rpo_num;
3079 int *rpo = XNEWVEC (int, last_basic_block_for_fn (cfun));
3080
3081 bb_top_order_index = XNEWVEC (int, last_basic_block_for_fn (cfun));
3082 bb_top_order_index_size = last_basic_block_for_fn (cfun);
3083 rpo_num = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, true);
3084 for (int i = 0; i < rpo_num; i++)
3085 bb_top_order_index[rpo[i]] = i;
3086
3087 free (rpo);
3088 }
3089
3090 FOR_ALL_BB_FN (bb, fun)
3091 {
3092 gimple_stmt_iterator gsi;
3093 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3094 gimple_set_uid (gsi_stmt (gsi), -1);
3095 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
3096 gimple_set_uid (gsi_stmt (gsi), -1);
3097 }
3098
3099 /* We can at the moment only distribute non-nested loops, thus restrict
3100 walking to innermost loops. */
3101 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
3102 {
3103 /* Don't distribute multiple exit edges loop, or cold loop. */
3104 if (!single_exit (loop)
3105 || !optimize_loop_for_speed_p (loop))
3106 continue;
3107
3108 /* Don't distribute loop if niters is unknown. */
3109 tree niters = number_of_latch_executions (loop);
3110 if (niters == NULL_TREE || niters == chrec_dont_know)
3111 continue;
3112
3113 /* Get the perfect loop nest for distribution. */
3114 loop = prepare_perfect_loop_nest (loop);
3115 for (; loop; loop = loop->inner)
3116 {
3117 auto_vec<gimple *> work_list;
3118 if (!find_seed_stmts_for_distribution (loop, &work_list))
3119 break;
3120
3121 const char *str = loop->inner ? " nest" : "";
3122 dump_user_location_t loc = find_loop_location (loop);
3123 if (!cd)
3124 {
3125 calculate_dominance_info (CDI_DOMINATORS);
3126 calculate_dominance_info (CDI_POST_DOMINATORS);
3127 cd = new control_dependences ();
3128 free_dominance_info (CDI_POST_DOMINATORS);
3129 }
3130
3131 bool destroy_p;
3132 int nb_generated_loops, nb_generated_calls;
3133 nb_generated_loops = distribute_loop (loop, work_list, cd,
3134 &nb_generated_calls,
3135 &destroy_p);
3136 if (destroy_p)
3137 loops_to_be_destroyed.safe_push (loop);
3138
3139 if (nb_generated_loops + nb_generated_calls > 0)
3140 {
3141 changed = true;
3142 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
3143 loc, "Loop%s %d distributed: split to %d loops "
3144 "and %d library calls.\n", str, loop->num,
3145 nb_generated_loops, nb_generated_calls);
3146
3147 break;
3148 }
3149
3150 if (dump_file && (dump_flags & TDF_DETAILS))
3151 fprintf (dump_file, "Loop%s %d not distributed.\n", str, loop->num);
3152 }
3153 }
3154
3155 if (cd)
3156 delete cd;
3157
3158 if (bb_top_order_index != NULL)
3159 {
3160 free (bb_top_order_index);
3161 bb_top_order_index = NULL;
3162 bb_top_order_index_size = 0;
3163 }
3164
3165 if (changed)
3166 {
3167 /* Destroy loop bodies that could not be reused. Do this late as we
3168 otherwise can end up refering to stale data in control dependences. */
3169 unsigned i;
3170 FOR_EACH_VEC_ELT (loops_to_be_destroyed, i, loop)
3171 destroy_loop (loop);
3172
3173 /* Cached scalar evolutions now may refer to wrong or non-existing
3174 loops. */
3175 scev_reset_htab ();
3176 mark_virtual_operands_for_renaming (fun);
3177 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
3178 }
3179
3180 checking_verify_loop_structure ();
3181
3182 return changed ? TODO_cleanup_cfg : 0;
3183 } 3383 }
3184 3384
3185 } // anon namespace 3385 } // anon namespace
3186 3386
3187 gimple_opt_pass * 3387 gimple_opt_pass *