Mercurial > hg > CbC > CbC_gcc
comparison gcc/tree-loop-distribution.c @ 145:1830386684a0
gcc-9.2.0
author | anatofuz |
---|---|
date | Thu, 13 Feb 2020 11:34:05 +0900 |
parents | 84e7813d76e9 |
children |
comparison
equal
deleted
inserted
replaced
131:84e7813d76e9 | 145:1830386684a0 |
---|---|
1 /* Loop distribution. | 1 /* Loop distribution. |
2 Copyright (C) 2006-2018 Free Software Foundation, Inc. | 2 Copyright (C) 2006-2020 Free Software Foundation, Inc. |
3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr> | 3 Contributed by Georges-Andre Silber <Georges-Andre.Silber@ensmp.fr> |
4 and Sebastian Pop <sebastian.pop@amd.com>. | 4 and Sebastian Pop <sebastian.pop@amd.com>. |
5 | 5 |
6 This file is part of GCC. | 6 This file is part of GCC. |
7 | 7 |
110 #include "tree-ssa-loop.h" | 110 #include "tree-ssa-loop.h" |
111 #include "tree-into-ssa.h" | 111 #include "tree-into-ssa.h" |
112 #include "tree-ssa.h" | 112 #include "tree-ssa.h" |
113 #include "cfgloop.h" | 113 #include "cfgloop.h" |
114 #include "tree-scalar-evolution.h" | 114 #include "tree-scalar-evolution.h" |
115 #include "params.h" | |
116 #include "tree-vectorizer.h" | 115 #include "tree-vectorizer.h" |
116 #include "tree-eh.h" | |
117 #include "gimple-fold.h" | |
117 | 118 |
118 | 119 |
119 #define MAX_DATAREFS_NUM \ | 120 #define MAX_DATAREFS_NUM \ |
120 ((unsigned) PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS)) | 121 ((unsigned) param_loop_max_datarefs_for_datadeps) |
121 | 122 |
122 /* Threshold controlling number of distributed partitions. Given it may | 123 /* Threshold controlling number of distributed partitions. Given it may |
123 be unnecessary if a memory stream cost model is invented in the future, | 124 be unnecessary if a memory stream cost model is invented in the future, |
124 we define it as a temporary macro, rather than a parameter. */ | 125 we define it as a temporary macro, rather than a parameter. */ |
125 #define NUM_PARTITION_THRESHOLD (4) | 126 #define NUM_PARTITION_THRESHOLD (4) |
151 const data_dependence_relation *ddr2) | 152 const data_dependence_relation *ddr2) |
152 { | 153 { |
153 return (DDR_A (ddr1) == DDR_A (ddr2) && DDR_B (ddr1) == DDR_B (ddr2)); | 154 return (DDR_A (ddr1) == DDR_A (ddr2) && DDR_B (ddr1) == DDR_B (ddr2)); |
154 } | 155 } |
155 | 156 |
156 /* The loop (nest) to be distributed. */ | 157 |
157 static vec<loop_p> loop_nest; | 158 |
158 | |
159 /* Vector of data references in the loop to be distributed. */ | |
160 static vec<data_reference_p> datarefs_vec; | |
161 | |
162 /* Store index of data reference in aux field. */ | |
163 #define DR_INDEX(dr) ((uintptr_t) (dr)->aux) | 159 #define DR_INDEX(dr) ((uintptr_t) (dr)->aux) |
164 | |
165 /* Hash table for data dependence relation in the loop to be distributed. */ | |
166 static hash_table<ddr_hasher> *ddrs_table; | |
167 | 160 |
168 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */ | 161 /* A Reduced Dependence Graph (RDG) vertex representing a statement. */ |
169 struct rdg_vertex | 162 struct rdg_vertex |
170 { | 163 { |
171 /* The statement represented by this vertex. */ | 164 /* The statement represented by this vertex. */ |
208 /* Type of the dependence. */ | 201 /* Type of the dependence. */ |
209 enum rdg_dep_type type; | 202 enum rdg_dep_type type; |
210 }; | 203 }; |
211 | 204 |
212 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type | 205 #define RDGE_TYPE(E) ((struct rdg_edge *) ((E)->data))->type |
213 | |
214 /* Dump vertex I in RDG to FILE. */ | |
215 | |
216 static void | |
217 dump_rdg_vertex (FILE *file, struct graph *rdg, int i) | |
218 { | |
219 struct vertex *v = &(rdg->vertices[i]); | |
220 struct graph_edge *e; | |
221 | |
222 fprintf (file, "(vertex %d: (%s%s) (in:", i, | |
223 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "", | |
224 RDG_MEM_READS_STMT (rdg, i) ? "r" : ""); | |
225 | |
226 if (v->pred) | |
227 for (e = v->pred; e; e = e->pred_next) | |
228 fprintf (file, " %d", e->src); | |
229 | |
230 fprintf (file, ") (out:"); | |
231 | |
232 if (v->succ) | |
233 for (e = v->succ; e; e = e->succ_next) | |
234 fprintf (file, " %d", e->dest); | |
235 | |
236 fprintf (file, ")\n"); | |
237 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS); | |
238 fprintf (file, ")\n"); | |
239 } | |
240 | |
241 /* Call dump_rdg_vertex on stderr. */ | |
242 | |
243 DEBUG_FUNCTION void | |
244 debug_rdg_vertex (struct graph *rdg, int i) | |
245 { | |
246 dump_rdg_vertex (stderr, rdg, i); | |
247 } | |
248 | |
249 /* Dump the reduced dependence graph RDG to FILE. */ | |
250 | |
251 static void | |
252 dump_rdg (FILE *file, struct graph *rdg) | |
253 { | |
254 fprintf (file, "(rdg\n"); | |
255 for (int i = 0; i < rdg->n_vertices; i++) | |
256 dump_rdg_vertex (file, rdg, i); | |
257 fprintf (file, ")\n"); | |
258 } | |
259 | |
260 /* Call dump_rdg on stderr. */ | |
261 | |
262 DEBUG_FUNCTION void | |
263 debug_rdg (struct graph *rdg) | |
264 { | |
265 dump_rdg (stderr, rdg); | |
266 } | |
267 | |
268 static void | |
269 dot_rdg_1 (FILE *file, struct graph *rdg) | |
270 { | |
271 int i; | |
272 pretty_printer buffer; | |
273 pp_needs_newline (&buffer) = false; | |
274 buffer.buffer->stream = file; | |
275 | |
276 fprintf (file, "digraph RDG {\n"); | |
277 | |
278 for (i = 0; i < rdg->n_vertices; i++) | |
279 { | |
280 struct vertex *v = &(rdg->vertices[i]); | |
281 struct graph_edge *e; | |
282 | |
283 fprintf (file, "%d [label=\"[%d] ", i, i); | |
284 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM); | |
285 pp_flush (&buffer); | |
286 fprintf (file, "\"]\n"); | |
287 | |
288 /* Highlight reads from memory. */ | |
289 if (RDG_MEM_READS_STMT (rdg, i)) | |
290 fprintf (file, "%d [style=filled, fillcolor=green]\n", i); | |
291 | |
292 /* Highlight stores to memory. */ | |
293 if (RDG_MEM_WRITE_STMT (rdg, i)) | |
294 fprintf (file, "%d [style=filled, fillcolor=red]\n", i); | |
295 | |
296 if (v->succ) | |
297 for (e = v->succ; e; e = e->succ_next) | |
298 switch (RDGE_TYPE (e)) | |
299 { | |
300 case flow_dd: | |
301 /* These are the most common dependences: don't print these. */ | |
302 fprintf (file, "%d -> %d \n", i, e->dest); | |
303 break; | |
304 | |
305 case control_dd: | |
306 fprintf (file, "%d -> %d [label=control] \n", i, e->dest); | |
307 break; | |
308 | |
309 default: | |
310 gcc_unreachable (); | |
311 } | |
312 } | |
313 | |
314 fprintf (file, "}\n\n"); | |
315 } | |
316 | |
317 /* Display the Reduced Dependence Graph using dotty. */ | |
318 | |
319 DEBUG_FUNCTION void | |
320 dot_rdg (struct graph *rdg) | |
321 { | |
322 /* When debugging, you may want to enable the following code. */ | |
323 #ifdef HAVE_POPEN | |
324 FILE *file = popen ("dot -Tx11", "w"); | |
325 if (!file) | |
326 return; | |
327 dot_rdg_1 (file, rdg); | |
328 fflush (file); | |
329 close (fileno (file)); | |
330 pclose (file); | |
331 #else | |
332 dot_rdg_1 (stderr, rdg); | |
333 #endif | |
334 } | |
335 | |
336 /* Returns the index of STMT in RDG. */ | |
337 | |
338 static int | |
339 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple *stmt) | |
340 { | |
341 int index = gimple_uid (stmt); | |
342 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt); | |
343 return index; | |
344 } | |
345 | |
346 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is | |
347 the index of DEF in RDG. */ | |
348 | |
349 static void | |
350 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef) | |
351 { | |
352 use_operand_p imm_use_p; | |
353 imm_use_iterator iterator; | |
354 | |
355 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def) | |
356 { | |
357 struct graph_edge *e; | |
358 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p)); | |
359 | |
360 if (use < 0) | |
361 continue; | |
362 | |
363 e = add_edge (rdg, idef, use); | |
364 e->data = XNEW (struct rdg_edge); | |
365 RDGE_TYPE (e) = flow_dd; | |
366 } | |
367 } | |
368 | |
369 /* Creates an edge for the control dependences of BB to the vertex V. */ | |
370 | |
371 static void | |
372 create_edge_for_control_dependence (struct graph *rdg, basic_block bb, | |
373 int v, control_dependences *cd) | |
374 { | |
375 bitmap_iterator bi; | |
376 unsigned edge_n; | |
377 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index), | |
378 0, edge_n, bi) | |
379 { | |
380 basic_block cond_bb = cd->get_edge_src (edge_n); | |
381 gimple *stmt = last_stmt (cond_bb); | |
382 if (stmt && is_ctrl_stmt (stmt)) | |
383 { | |
384 struct graph_edge *e; | |
385 int c = rdg_vertex_for_stmt (rdg, stmt); | |
386 if (c < 0) | |
387 continue; | |
388 | |
389 e = add_edge (rdg, c, v); | |
390 e->data = XNEW (struct rdg_edge); | |
391 RDGE_TYPE (e) = control_dd; | |
392 } | |
393 } | |
394 } | |
395 | |
396 /* Creates the edges of the reduced dependence graph RDG. */ | |
397 | |
398 static void | |
399 create_rdg_flow_edges (struct graph *rdg) | |
400 { | |
401 int i; | |
402 def_operand_p def_p; | |
403 ssa_op_iter iter; | |
404 | |
405 for (i = 0; i < rdg->n_vertices; i++) | |
406 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i), | |
407 iter, SSA_OP_DEF) | |
408 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i); | |
409 } | |
410 | |
411 /* Creates the edges of the reduced dependence graph RDG. */ | |
412 | |
413 static void | |
414 create_rdg_cd_edges (struct graph *rdg, control_dependences *cd, loop_p loop) | |
415 { | |
416 int i; | |
417 | |
418 for (i = 0; i < rdg->n_vertices; i++) | |
419 { | |
420 gimple *stmt = RDG_STMT (rdg, i); | |
421 if (gimple_code (stmt) == GIMPLE_PHI) | |
422 { | |
423 edge_iterator ei; | |
424 edge e; | |
425 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds) | |
426 if (flow_bb_inside_loop_p (loop, e->src)) | |
427 create_edge_for_control_dependence (rdg, e->src, i, cd); | |
428 } | |
429 else | |
430 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd); | |
431 } | |
432 } | |
433 | |
434 /* Build the vertices of the reduced dependence graph RDG. Return false | |
435 if that failed. */ | |
436 | |
437 static bool | |
438 create_rdg_vertices (struct graph *rdg, vec<gimple *> stmts, loop_p loop) | |
439 { | |
440 int i; | |
441 gimple *stmt; | |
442 | |
443 FOR_EACH_VEC_ELT (stmts, i, stmt) | |
444 { | |
445 struct vertex *v = &(rdg->vertices[i]); | |
446 | |
447 /* Record statement to vertex mapping. */ | |
448 gimple_set_uid (stmt, i); | |
449 | |
450 v->data = XNEW (struct rdg_vertex); | |
451 RDGV_STMT (v) = stmt; | |
452 RDGV_DATAREFS (v).create (0); | |
453 RDGV_HAS_MEM_WRITE (v) = false; | |
454 RDGV_HAS_MEM_READS (v) = false; | |
455 if (gimple_code (stmt) == GIMPLE_PHI) | |
456 continue; | |
457 | |
458 unsigned drp = datarefs_vec.length (); | |
459 if (!find_data_references_in_stmt (loop, stmt, &datarefs_vec)) | |
460 return false; | |
461 for (unsigned j = drp; j < datarefs_vec.length (); ++j) | |
462 { | |
463 data_reference_p dr = datarefs_vec[j]; | |
464 if (DR_IS_READ (dr)) | |
465 RDGV_HAS_MEM_READS (v) = true; | |
466 else | |
467 RDGV_HAS_MEM_WRITE (v) = true; | |
468 RDGV_DATAREFS (v).safe_push (dr); | |
469 } | |
470 } | |
471 return true; | |
472 } | |
473 | |
474 /* Array mapping basic block's index to its topological order. */ | |
475 static int *bb_top_order_index; | |
476 /* And size of the array. */ | |
477 static int bb_top_order_index_size; | |
478 | |
479 /* If X has a smaller topological sort number than Y, returns -1; | |
480 if greater, returns 1. */ | |
481 | |
482 static int | |
483 bb_top_order_cmp (const void *x, const void *y) | |
484 { | |
485 basic_block bb1 = *(const basic_block *) x; | |
486 basic_block bb2 = *(const basic_block *) y; | |
487 | |
488 gcc_assert (bb1->index < bb_top_order_index_size | |
489 && bb2->index < bb_top_order_index_size); | |
490 gcc_assert (bb1 == bb2 | |
491 || bb_top_order_index[bb1->index] | |
492 != bb_top_order_index[bb2->index]); | |
493 | |
494 return (bb_top_order_index[bb1->index] - bb_top_order_index[bb2->index]); | |
495 } | |
496 | |
497 /* Initialize STMTS with all the statements of LOOP. We use topological | |
498 order to discover all statements. The order is important because | |
499 generate_loops_for_partition is using the same traversal for identifying | |
500 statements in loop copies. */ | |
501 | |
502 static void | |
503 stmts_from_loop (struct loop *loop, vec<gimple *> *stmts) | |
504 { | |
505 unsigned int i; | |
506 basic_block *bbs = get_loop_body_in_custom_order (loop, bb_top_order_cmp); | |
507 | |
508 for (i = 0; i < loop->num_nodes; i++) | |
509 { | |
510 basic_block bb = bbs[i]; | |
511 | |
512 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); | |
513 gsi_next (&bsi)) | |
514 if (!virtual_operand_p (gimple_phi_result (bsi.phi ()))) | |
515 stmts->safe_push (bsi.phi ()); | |
516 | |
517 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); | |
518 gsi_next (&bsi)) | |
519 { | |
520 gimple *stmt = gsi_stmt (bsi); | |
521 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt)) | |
522 stmts->safe_push (stmt); | |
523 } | |
524 } | |
525 | |
526 free (bbs); | |
527 } | |
528 | |
529 /* Free the reduced dependence graph RDG. */ | |
530 | |
531 static void | |
532 free_rdg (struct graph *rdg) | |
533 { | |
534 int i; | |
535 | |
536 for (i = 0; i < rdg->n_vertices; i++) | |
537 { | |
538 struct vertex *v = &(rdg->vertices[i]); | |
539 struct graph_edge *e; | |
540 | |
541 for (e = v->succ; e; e = e->succ_next) | |
542 free (e->data); | |
543 | |
544 if (v->data) | |
545 { | |
546 gimple_set_uid (RDGV_STMT (v), -1); | |
547 (RDGV_DATAREFS (v)).release (); | |
548 free (v->data); | |
549 } | |
550 } | |
551 | |
552 free_graph (rdg); | |
553 } | |
554 | |
555 /* Build the Reduced Dependence Graph (RDG) with one vertex per statement of | |
556 LOOP, and one edge per flow dependence or control dependence from control | |
557 dependence CD. During visiting each statement, data references are also | |
558 collected and recorded in global data DATAREFS_VEC. */ | |
559 | |
560 static struct graph * | |
561 build_rdg (struct loop *loop, control_dependences *cd) | |
562 { | |
563 struct graph *rdg; | |
564 | |
565 /* Create the RDG vertices from the stmts of the loop nest. */ | |
566 auto_vec<gimple *, 10> stmts; | |
567 stmts_from_loop (loop, &stmts); | |
568 rdg = new_graph (stmts.length ()); | |
569 if (!create_rdg_vertices (rdg, stmts, loop)) | |
570 { | |
571 free_rdg (rdg); | |
572 return NULL; | |
573 } | |
574 stmts.release (); | |
575 | |
576 create_rdg_flow_edges (rdg); | |
577 if (cd) | |
578 create_rdg_cd_edges (rdg, cd, loop); | |
579 | |
580 return rdg; | |
581 } | |
582 | |
583 | 206 |
584 /* Kind of distributed loop. */ | 207 /* Kind of distributed loop. */ |
585 enum partition_kind { | 208 enum partition_kind { |
586 PKIND_NORMAL, | 209 PKIND_NORMAL, |
587 /* Partial memset stands for a paritition can be distributed into a loop | 210 /* Partial memset stands for a paritition can be distributed into a loop |
628 { | 251 { |
629 /* Statements of the partition. */ | 252 /* Statements of the partition. */ |
630 bitmap stmts; | 253 bitmap stmts; |
631 /* True if the partition defines variable which is used outside of loop. */ | 254 /* True if the partition defines variable which is used outside of loop. */ |
632 bool reduction_p; | 255 bool reduction_p; |
256 location_t loc; | |
633 enum partition_kind kind; | 257 enum partition_kind kind; |
634 enum partition_type type; | 258 enum partition_type type; |
635 /* Data references in the partition. */ | 259 /* Data references in the partition. */ |
636 bitmap datarefs; | 260 bitmap datarefs; |
637 /* Information of builtin parition. */ | 261 /* Information of builtin parition. */ |
638 struct builtin_info *builtin; | 262 struct builtin_info *builtin; |
639 }; | 263 }; |
640 | 264 |
641 | |
642 /* Allocate and initialize a partition from BITMAP. */ | |
643 | |
644 static partition * | |
645 partition_alloc (void) | |
646 { | |
647 partition *partition = XCNEW (struct partition); | |
648 partition->stmts = BITMAP_ALLOC (NULL); | |
649 partition->reduction_p = false; | |
650 partition->kind = PKIND_NORMAL; | |
651 partition->datarefs = BITMAP_ALLOC (NULL); | |
652 return partition; | |
653 } | |
654 | |
655 /* Free PARTITION. */ | |
656 | |
657 static void | |
658 partition_free (partition *partition) | |
659 { | |
660 BITMAP_FREE (partition->stmts); | |
661 BITMAP_FREE (partition->datarefs); | |
662 if (partition->builtin) | |
663 free (partition->builtin); | |
664 | |
665 free (partition); | |
666 } | |
667 | |
668 /* Returns true if the partition can be generated as a builtin. */ | |
669 | |
670 static bool | |
671 partition_builtin_p (partition *partition) | |
672 { | |
673 return partition->kind > PKIND_PARTIAL_MEMSET; | |
674 } | |
675 | |
676 /* Returns true if the partition contains a reduction. */ | |
677 | |
678 static bool | |
679 partition_reduction_p (partition *partition) | |
680 { | |
681 return partition->reduction_p; | |
682 } | |
683 | |
684 /* Partitions are fused because of different reasons. */ | 265 /* Partitions are fused because of different reasons. */ |
685 enum fuse_type | 266 enum fuse_type |
686 { | 267 { |
687 FUSE_NON_BUILTIN = 0, | 268 FUSE_NON_BUILTIN = 0, |
688 FUSE_REDUCTION = 1, | 269 FUSE_REDUCTION = 1, |
697 "they have reductions", | 278 "they have reductions", |
698 "they have shared memory refs", | 279 "they have shared memory refs", |
699 "they are in the same dependence scc", | 280 "they are in the same dependence scc", |
700 "there is no point to distribute loop"}; | 281 "there is no point to distribute loop"}; |
701 | 282 |
283 | |
284 /* Dump vertex I in RDG to FILE. */ | |
285 | |
702 static void | 286 static void |
703 update_type_for_merge (struct graph *, partition *, partition *); | 287 dump_rdg_vertex (FILE *file, struct graph *rdg, int i) |
288 { | |
289 struct vertex *v = &(rdg->vertices[i]); | |
290 struct graph_edge *e; | |
291 | |
292 fprintf (file, "(vertex %d: (%s%s) (in:", i, | |
293 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "", | |
294 RDG_MEM_READS_STMT (rdg, i) ? "r" : ""); | |
295 | |
296 if (v->pred) | |
297 for (e = v->pred; e; e = e->pred_next) | |
298 fprintf (file, " %d", e->src); | |
299 | |
300 fprintf (file, ") (out:"); | |
301 | |
302 if (v->succ) | |
303 for (e = v->succ; e; e = e->succ_next) | |
304 fprintf (file, " %d", e->dest); | |
305 | |
306 fprintf (file, ")\n"); | |
307 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS); | |
308 fprintf (file, ")\n"); | |
309 } | |
310 | |
311 /* Call dump_rdg_vertex on stderr. */ | |
312 | |
313 DEBUG_FUNCTION void | |
314 debug_rdg_vertex (struct graph *rdg, int i) | |
315 { | |
316 dump_rdg_vertex (stderr, rdg, i); | |
317 } | |
318 | |
319 /* Dump the reduced dependence graph RDG to FILE. */ | |
320 | |
321 static void | |
322 dump_rdg (FILE *file, struct graph *rdg) | |
323 { | |
324 fprintf (file, "(rdg\n"); | |
325 for (int i = 0; i < rdg->n_vertices; i++) | |
326 dump_rdg_vertex (file, rdg, i); | |
327 fprintf (file, ")\n"); | |
328 } | |
329 | |
330 /* Call dump_rdg on stderr. */ | |
331 | |
332 DEBUG_FUNCTION void | |
333 debug_rdg (struct graph *rdg) | |
334 { | |
335 dump_rdg (stderr, rdg); | |
336 } | |
337 | |
338 static void | |
339 dot_rdg_1 (FILE *file, struct graph *rdg) | |
340 { | |
341 int i; | |
342 pretty_printer buffer; | |
343 pp_needs_newline (&buffer) = false; | |
344 buffer.buffer->stream = file; | |
345 | |
346 fprintf (file, "digraph RDG {\n"); | |
347 | |
348 for (i = 0; i < rdg->n_vertices; i++) | |
349 { | |
350 struct vertex *v = &(rdg->vertices[i]); | |
351 struct graph_edge *e; | |
352 | |
353 fprintf (file, "%d [label=\"[%d] ", i, i); | |
354 pp_gimple_stmt_1 (&buffer, RDGV_STMT (v), 0, TDF_SLIM); | |
355 pp_flush (&buffer); | |
356 fprintf (file, "\"]\n"); | |
357 | |
358 /* Highlight reads from memory. */ | |
359 if (RDG_MEM_READS_STMT (rdg, i)) | |
360 fprintf (file, "%d [style=filled, fillcolor=green]\n", i); | |
361 | |
362 /* Highlight stores to memory. */ | |
363 if (RDG_MEM_WRITE_STMT (rdg, i)) | |
364 fprintf (file, "%d [style=filled, fillcolor=red]\n", i); | |
365 | |
366 if (v->succ) | |
367 for (e = v->succ; e; e = e->succ_next) | |
368 switch (RDGE_TYPE (e)) | |
369 { | |
370 case flow_dd: | |
371 /* These are the most common dependences: don't print these. */ | |
372 fprintf (file, "%d -> %d \n", i, e->dest); | |
373 break; | |
374 | |
375 case control_dd: | |
376 fprintf (file, "%d -> %d [label=control] \n", i, e->dest); | |
377 break; | |
378 | |
379 default: | |
380 gcc_unreachable (); | |
381 } | |
382 } | |
383 | |
384 fprintf (file, "}\n\n"); | |
385 } | |
386 | |
387 /* Display the Reduced Dependence Graph using dotty. */ | |
388 | |
389 DEBUG_FUNCTION void | |
390 dot_rdg (struct graph *rdg) | |
391 { | |
392 /* When debugging, you may want to enable the following code. */ | |
393 #ifdef HAVE_POPEN | |
394 FILE *file = popen ("dot -Tx11", "w"); | |
395 if (!file) | |
396 return; | |
397 dot_rdg_1 (file, rdg); | |
398 fflush (file); | |
399 close (fileno (file)); | |
400 pclose (file); | |
401 #else | |
402 dot_rdg_1 (stderr, rdg); | |
403 #endif | |
404 } | |
405 | |
406 /* Returns the index of STMT in RDG. */ | |
407 | |
408 static int | |
409 rdg_vertex_for_stmt (struct graph *rdg ATTRIBUTE_UNUSED, gimple *stmt) | |
410 { | |
411 int index = gimple_uid (stmt); | |
412 gcc_checking_assert (index == -1 || RDG_STMT (rdg, index) == stmt); | |
413 return index; | |
414 } | |
415 | |
416 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is | |
417 the index of DEF in RDG. */ | |
418 | |
419 static void | |
420 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef) | |
421 { | |
422 use_operand_p imm_use_p; | |
423 imm_use_iterator iterator; | |
424 | |
425 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def) | |
426 { | |
427 struct graph_edge *e; | |
428 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p)); | |
429 | |
430 if (use < 0) | |
431 continue; | |
432 | |
433 e = add_edge (rdg, idef, use); | |
434 e->data = XNEW (struct rdg_edge); | |
435 RDGE_TYPE (e) = flow_dd; | |
436 } | |
437 } | |
438 | |
439 /* Creates an edge for the control dependences of BB to the vertex V. */ | |
440 | |
441 static void | |
442 create_edge_for_control_dependence (struct graph *rdg, basic_block bb, | |
443 int v, control_dependences *cd) | |
444 { | |
445 bitmap_iterator bi; | |
446 unsigned edge_n; | |
447 EXECUTE_IF_SET_IN_BITMAP (cd->get_edges_dependent_on (bb->index), | |
448 0, edge_n, bi) | |
449 { | |
450 basic_block cond_bb = cd->get_edge_src (edge_n); | |
451 gimple *stmt = last_stmt (cond_bb); | |
452 if (stmt && is_ctrl_stmt (stmt)) | |
453 { | |
454 struct graph_edge *e; | |
455 int c = rdg_vertex_for_stmt (rdg, stmt); | |
456 if (c < 0) | |
457 continue; | |
458 | |
459 e = add_edge (rdg, c, v); | |
460 e->data = XNEW (struct rdg_edge); | |
461 RDGE_TYPE (e) = control_dd; | |
462 } | |
463 } | |
464 } | |
465 | |
466 /* Creates the edges of the reduced dependence graph RDG. */ | |
467 | |
468 static void | |
469 create_rdg_flow_edges (struct graph *rdg) | |
470 { | |
471 int i; | |
472 def_operand_p def_p; | |
473 ssa_op_iter iter; | |
474 | |
475 for (i = 0; i < rdg->n_vertices; i++) | |
476 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i), | |
477 iter, SSA_OP_DEF) | |
478 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i); | |
479 } | |
480 | |
481 /* Creates the edges of the reduced dependence graph RDG. */ | |
482 | |
483 static void | |
484 create_rdg_cd_edges (struct graph *rdg, control_dependences *cd, loop_p loop) | |
485 { | |
486 int i; | |
487 | |
488 for (i = 0; i < rdg->n_vertices; i++) | |
489 { | |
490 gimple *stmt = RDG_STMT (rdg, i); | |
491 if (gimple_code (stmt) == GIMPLE_PHI) | |
492 { | |
493 edge_iterator ei; | |
494 edge e; | |
495 FOR_EACH_EDGE (e, ei, gimple_bb (stmt)->preds) | |
496 if (flow_bb_inside_loop_p (loop, e->src)) | |
497 create_edge_for_control_dependence (rdg, e->src, i, cd); | |
498 } | |
499 else | |
500 create_edge_for_control_dependence (rdg, gimple_bb (stmt), i, cd); | |
501 } | |
502 } | |
503 | |
504 | |
505 class loop_distribution | |
506 { | |
507 private: | |
508 /* The loop (nest) to be distributed. */ | |
509 vec<loop_p> loop_nest; | |
510 | |
511 /* Vector of data references in the loop to be distributed. */ | |
512 vec<data_reference_p> datarefs_vec; | |
513 | |
514 /* If there is nonaddressable data reference in above vector. */ | |
515 bool has_nonaddressable_dataref_p; | |
516 | |
517 /* Store index of data reference in aux field. */ | |
518 | |
519 /* Hash table for data dependence relation in the loop to be distributed. */ | |
520 hash_table<ddr_hasher> *ddrs_table; | |
521 | |
522 /* Array mapping basic block's index to its topological order. */ | |
523 int *bb_top_order_index; | |
524 /* And size of the array. */ | |
525 int bb_top_order_index_size; | |
526 | |
527 /* Build the vertices of the reduced dependence graph RDG. Return false | |
528 if that failed. */ | |
529 bool create_rdg_vertices (struct graph *rdg, vec<gimple *> stmts, loop_p loop); | |
530 | |
531 /* Initialize STMTS with all the statements of LOOP. We use topological | |
532 order to discover all statements. The order is important because | |
533 generate_loops_for_partition is using the same traversal for identifying | |
534 statements in loop copies. */ | |
535 void stmts_from_loop (class loop *loop, vec<gimple *> *stmts); | |
536 | |
537 | |
538 /* Build the Reduced Dependence Graph (RDG) with one vertex per statement of | |
539 LOOP, and one edge per flow dependence or control dependence from control | |
540 dependence CD. During visiting each statement, data references are also | |
541 collected and recorded in global data DATAREFS_VEC. */ | |
542 struct graph * build_rdg (class loop *loop, control_dependences *cd); | |
704 | 543 |
705 /* Merge PARTITION into the partition DEST. RDG is the reduced dependence | 544 /* Merge PARTITION into the partition DEST. RDG is the reduced dependence |
706 graph and we update type for result partition if it is non-NULL. */ | 545 graph and we update type for result partition if it is non-NULL. */ |
546 void partition_merge_into (struct graph *rdg, | |
547 partition *dest, partition *partition, | |
548 enum fuse_type ft); | |
549 | |
550 | |
551 /* Return data dependence relation for data references A and B. The two | |
552 data references must be in lexicographic order wrto reduced dependence | |
553 graph RDG. We firstly try to find ddr from global ddr hash table. If | |
554 it doesn't exist, compute the ddr and cache it. */ | |
555 data_dependence_relation * get_data_dependence (struct graph *rdg, | |
556 data_reference_p a, | |
557 data_reference_p b); | |
558 | |
559 | |
560 /* In reduced dependence graph RDG for loop distribution, return true if | |
561 dependence between references DR1 and DR2 leads to a dependence cycle | |
562 and such dependence cycle can't be resolved by runtime alias check. */ | |
563 bool data_dep_in_cycle_p (struct graph *rdg, data_reference_p dr1, | |
564 data_reference_p dr2); | |
565 | |
566 | |
567 /* Given reduced dependence graph RDG, PARTITION1 and PARTITION2, update | |
568 PARTITION1's type after merging PARTITION2 into PARTITION1. */ | |
569 void update_type_for_merge (struct graph *rdg, | |
570 partition *partition1, partition *partition2); | |
571 | |
572 | |
573 /* Returns a partition with all the statements needed for computing | |
574 the vertex V of the RDG, also including the loop exit conditions. */ | |
575 partition *build_rdg_partition_for_vertex (struct graph *rdg, int v); | |
576 | |
577 /* Given data references DST_DR and SRC_DR in loop nest LOOP and RDG, classify | |
578 if it forms builtin memcpy or memmove call. */ | |
579 void classify_builtin_ldst (loop_p loop, struct graph *rdg, partition *partition, | |
580 data_reference_p dst_dr, data_reference_p src_dr); | |
581 | |
582 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP. | |
583 For the moment we detect memset, memcpy and memmove patterns. Bitmap | |
584 STMT_IN_ALL_PARTITIONS contains statements belonging to all partitions. | |
585 Returns true if there is a reduction in all partitions and we | |
586 possibly did not mark PARTITION as having one for this reason. */ | |
587 | |
588 bool | |
589 classify_partition (loop_p loop, | |
590 struct graph *rdg, partition *partition, | |
591 bitmap stmt_in_all_partitions); | |
592 | |
593 | |
594 /* Returns true when PARTITION1 and PARTITION2 access the same memory | |
595 object in RDG. */ | |
596 bool share_memory_accesses (struct graph *rdg, | |
597 partition *partition1, partition *partition2); | |
598 | |
599 /* For each seed statement in STARTING_STMTS, this function builds | |
600 partition for it by adding depended statements according to RDG. | |
601 All partitions are recorded in PARTITIONS. */ | |
602 void rdg_build_partitions (struct graph *rdg, | |
603 vec<gimple *> starting_stmts, | |
604 vec<partition *> *partitions); | |
605 | |
606 /* Compute partition dependence created by the data references in DRS1 | |
607 and DRS2, modify and return DIR according to that. IF ALIAS_DDR is | |
608 not NULL, we record dependence introduced by possible alias between | |
609 two data references in ALIAS_DDRS; otherwise, we simply ignore such | |
610 dependence as if it doesn't exist at all. */ | |
611 int pg_add_dependence_edges (struct graph *rdg, int dir, bitmap drs1, | |
612 bitmap drs2, vec<ddr_p> *alias_ddrs); | |
613 | |
614 | |
615 /* Build and return partition dependence graph for PARTITIONS. RDG is | |
616 reduced dependence graph for the loop to be distributed. If IGNORE_ALIAS_P | |
617 is true, data dependence caused by possible alias between references | |
618 is ignored, as if it doesn't exist at all; otherwise all depdendences | |
619 are considered. */ | |
620 struct graph *build_partition_graph (struct graph *rdg, | |
621 vec<struct partition *> *partitions, | |
622 bool ignore_alias_p); | |
623 | |
624 /* Given reduced dependence graph RDG merge strong connected components | |
625 of PARTITIONS. If IGNORE_ALIAS_P is true, data dependence caused by | |
626 possible alias between references is ignored, as if it doesn't exist | |
627 at all; otherwise all depdendences are considered. */ | |
628 void merge_dep_scc_partitions (struct graph *rdg, vec<struct partition *> | |
629 *partitions, bool ignore_alias_p); | |
630 | |
631 /* This is the main function breaking strong conected components in | |
632 PARTITIONS giving reduced depdendence graph RDG. Store data dependence | |
633 relations for runtime alias check in ALIAS_DDRS. */ | |
634 void break_alias_scc_partitions (struct graph *rdg, vec<struct partition *> | |
635 *partitions, vec<ddr_p> *alias_ddrs); | |
636 | |
637 | |
638 /* Fuse PARTITIONS of LOOP if necessary before finalizing distribution. | |
639 ALIAS_DDRS contains ddrs which need runtime alias check. */ | |
640 void finalize_partitions (class loop *loop, vec<struct partition *> | |
641 *partitions, vec<ddr_p> *alias_ddrs); | |
642 | |
643 /* Distributes the code from LOOP in such a way that producer statements | |
644 are placed before consumer statements. Tries to separate only the | |
645 statements from STMTS into separate loops. Returns the number of | |
646 distributed loops. Set NB_CALLS to number of generated builtin calls. | |
647 Set *DESTROY_P to whether LOOP needs to be destroyed. */ | |
648 int distribute_loop (class loop *loop, vec<gimple *> stmts, | |
649 control_dependences *cd, int *nb_calls, bool *destroy_p, | |
650 bool only_patterns_p); | |
651 | |
652 /* Compute topological order for basic blocks. Topological order is | |
653 needed because data dependence is computed for data references in | |
654 lexicographical order. */ | |
655 void bb_top_order_init (void); | |
656 | |
657 void bb_top_order_destroy (void); | |
658 | |
659 public: | |
660 | |
661 /* Getter for bb_top_order. */ | |
662 | |
663 inline int get_bb_top_order_index_size (void) | |
664 { | |
665 return bb_top_order_index_size; | |
666 } | |
667 | |
668 inline int get_bb_top_order_index (int i) | |
669 { | |
670 return bb_top_order_index[i]; | |
671 } | |
672 | |
673 unsigned int execute (function *fun); | |
674 }; | |
675 | |
676 | |
677 /* If X has a smaller topological sort number than Y, returns -1; | |
678 if greater, returns 1. */ | |
679 static int | |
680 bb_top_order_cmp_r (const void *x, const void *y, void *loop) | |
681 { | |
682 loop_distribution *_loop = | |
683 (loop_distribution *) loop; | |
684 | |
685 basic_block bb1 = *(const basic_block *) x; | |
686 basic_block bb2 = *(const basic_block *) y; | |
687 | |
688 int bb_top_order_index_size = _loop->get_bb_top_order_index_size (); | |
689 | |
690 gcc_assert (bb1->index < bb_top_order_index_size | |
691 && bb2->index < bb_top_order_index_size); | |
692 gcc_assert (bb1 == bb2 | |
693 || _loop->get_bb_top_order_index(bb1->index) | |
694 != _loop->get_bb_top_order_index(bb2->index)); | |
695 | |
696 return (_loop->get_bb_top_order_index(bb1->index) - | |
697 _loop->get_bb_top_order_index(bb2->index)); | |
698 } | |
699 | |
700 bool | |
701 loop_distribution::create_rdg_vertices (struct graph *rdg, vec<gimple *> stmts, | |
702 loop_p loop) | |
703 { | |
704 int i; | |
705 gimple *stmt; | |
706 | |
707 FOR_EACH_VEC_ELT (stmts, i, stmt) | |
708 { | |
709 struct vertex *v = &(rdg->vertices[i]); | |
710 | |
711 /* Record statement to vertex mapping. */ | |
712 gimple_set_uid (stmt, i); | |
713 | |
714 v->data = XNEW (struct rdg_vertex); | |
715 RDGV_STMT (v) = stmt; | |
716 RDGV_DATAREFS (v).create (0); | |
717 RDGV_HAS_MEM_WRITE (v) = false; | |
718 RDGV_HAS_MEM_READS (v) = false; | |
719 if (gimple_code (stmt) == GIMPLE_PHI) | |
720 continue; | |
721 | |
722 unsigned drp = datarefs_vec.length (); | |
723 if (!find_data_references_in_stmt (loop, stmt, &datarefs_vec)) | |
724 return false; | |
725 for (unsigned j = drp; j < datarefs_vec.length (); ++j) | |
726 { | |
727 data_reference_p dr = datarefs_vec[j]; | |
728 if (DR_IS_READ (dr)) | |
729 RDGV_HAS_MEM_READS (v) = true; | |
730 else | |
731 RDGV_HAS_MEM_WRITE (v) = true; | |
732 RDGV_DATAREFS (v).safe_push (dr); | |
733 has_nonaddressable_dataref_p |= may_be_nonaddressable_p (dr->ref); | |
734 } | |
735 } | |
736 return true; | |
737 } | |
738 | |
739 void | |
740 loop_distribution::stmts_from_loop (class loop *loop, vec<gimple *> *stmts) | |
741 { | |
742 unsigned int i; | |
743 basic_block *bbs = get_loop_body_in_custom_order (loop, this, bb_top_order_cmp_r); | |
744 | |
745 for (i = 0; i < loop->num_nodes; i++) | |
746 { | |
747 basic_block bb = bbs[i]; | |
748 | |
749 for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi); | |
750 gsi_next (&bsi)) | |
751 if (!virtual_operand_p (gimple_phi_result (bsi.phi ()))) | |
752 stmts->safe_push (bsi.phi ()); | |
753 | |
754 for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi); | |
755 gsi_next (&bsi)) | |
756 { | |
757 gimple *stmt = gsi_stmt (bsi); | |
758 if (gimple_code (stmt) != GIMPLE_LABEL && !is_gimple_debug (stmt)) | |
759 stmts->safe_push (stmt); | |
760 } | |
761 } | |
762 | |
763 free (bbs); | |
764 } | |
765 | |
766 /* Free the reduced dependence graph RDG. */ | |
707 | 767 |
708 static void | 768 static void |
709 partition_merge_into (struct graph *rdg, partition *dest, | 769 free_rdg (struct graph *rdg) |
710 partition *partition, enum fuse_type ft) | 770 { |
771 int i; | |
772 | |
773 for (i = 0; i < rdg->n_vertices; i++) | |
774 { | |
775 struct vertex *v = &(rdg->vertices[i]); | |
776 struct graph_edge *e; | |
777 | |
778 for (e = v->succ; e; e = e->succ_next) | |
779 free (e->data); | |
780 | |
781 if (v->data) | |
782 { | |
783 gimple_set_uid (RDGV_STMT (v), -1); | |
784 (RDGV_DATAREFS (v)).release (); | |
785 free (v->data); | |
786 } | |
787 } | |
788 | |
789 free_graph (rdg); | |
790 } | |
791 | |
792 struct graph * | |
793 loop_distribution::build_rdg (class loop *loop, control_dependences *cd) | |
794 { | |
795 struct graph *rdg; | |
796 | |
797 /* Create the RDG vertices from the stmts of the loop nest. */ | |
798 auto_vec<gimple *, 10> stmts; | |
799 stmts_from_loop (loop, &stmts); | |
800 rdg = new_graph (stmts.length ()); | |
801 if (!create_rdg_vertices (rdg, stmts, loop)) | |
802 { | |
803 free_rdg (rdg); | |
804 return NULL; | |
805 } | |
806 stmts.release (); | |
807 | |
808 create_rdg_flow_edges (rdg); | |
809 if (cd) | |
810 create_rdg_cd_edges (rdg, cd, loop); | |
811 | |
812 return rdg; | |
813 } | |
814 | |
815 | |
816 /* Allocate and initialize a partition from BITMAP. */ | |
817 | |
818 static partition * | |
819 partition_alloc (void) | |
820 { | |
821 partition *partition = XCNEW (struct partition); | |
822 partition->stmts = BITMAP_ALLOC (NULL); | |
823 partition->reduction_p = false; | |
824 partition->loc = UNKNOWN_LOCATION; | |
825 partition->kind = PKIND_NORMAL; | |
826 partition->type = PTYPE_PARALLEL; | |
827 partition->datarefs = BITMAP_ALLOC (NULL); | |
828 return partition; | |
829 } | |
830 | |
831 /* Free PARTITION. */ | |
832 | |
833 static void | |
834 partition_free (partition *partition) | |
835 { | |
836 BITMAP_FREE (partition->stmts); | |
837 BITMAP_FREE (partition->datarefs); | |
838 if (partition->builtin) | |
839 free (partition->builtin); | |
840 | |
841 free (partition); | |
842 } | |
843 | |
844 /* Returns true if the partition can be generated as a builtin. */ | |
845 | |
846 static bool | |
847 partition_builtin_p (partition *partition) | |
848 { | |
849 return partition->kind > PKIND_PARTIAL_MEMSET; | |
850 } | |
851 | |
852 /* Returns true if the partition contains a reduction. */ | |
853 | |
854 static bool | |
855 partition_reduction_p (partition *partition) | |
856 { | |
857 return partition->reduction_p; | |
858 } | |
859 | |
860 void | |
861 loop_distribution::partition_merge_into (struct graph *rdg, | |
862 partition *dest, partition *partition, enum fuse_type ft) | |
711 { | 863 { |
712 if (dump_file && (dump_flags & TDF_DETAILS)) | 864 if (dump_file && (dump_flags & TDF_DETAILS)) |
713 { | 865 { |
714 fprintf (dump_file, "Fuse partitions because %s:\n", fuse_message[ft]); | 866 fprintf (dump_file, "Fuse partitions because %s:\n", fuse_message[ft]); |
715 fprintf (dump_file, " Part 1: "); | 867 fprintf (dump_file, " Part 1: "); |
776 return false; | 928 return false; |
777 } | 929 } |
778 | 930 |
779 /* Return a copy of LOOP placed before LOOP. */ | 931 /* Return a copy of LOOP placed before LOOP. */ |
780 | 932 |
781 static struct loop * | 933 static class loop * |
782 copy_loop_before (struct loop *loop) | 934 copy_loop_before (class loop *loop) |
783 { | 935 { |
784 struct loop *res; | 936 class loop *res; |
785 edge preheader = loop_preheader_edge (loop); | 937 edge preheader = loop_preheader_edge (loop); |
786 | 938 |
787 initialize_original_copy_tables (); | 939 initialize_original_copy_tables (); |
788 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader); | 940 res = slpeel_tree_duplicate_loop_to_edge_cfg (loop, NULL, preheader); |
789 gcc_assert (res != NULL); | 941 gcc_assert (res != NULL); |
794 } | 946 } |
795 | 947 |
796 /* Creates an empty basic block after LOOP. */ | 948 /* Creates an empty basic block after LOOP. */ |
797 | 949 |
798 static void | 950 static void |
799 create_bb_after_loop (struct loop *loop) | 951 create_bb_after_loop (class loop *loop) |
800 { | 952 { |
801 edge exit = single_exit (loop); | 953 edge exit = single_exit (loop); |
802 | 954 |
803 if (!exit) | 955 if (!exit) |
804 return; | 956 return; |
811 PARTITION bitmap are removed from the loop or from its copy. The | 963 PARTITION bitmap are removed from the loop or from its copy. The |
812 statements are indexed in sequence inside a basic block, and the | 964 statements are indexed in sequence inside a basic block, and the |
813 basic blocks of a loop are taken in dom order. */ | 965 basic blocks of a loop are taken in dom order. */ |
814 | 966 |
815 static void | 967 static void |
816 generate_loops_for_partition (struct loop *loop, partition *partition, | 968 generate_loops_for_partition (class loop *loop, partition *partition, |
817 bool copy_p) | 969 bool copy_p) |
818 { | 970 { |
819 unsigned i; | 971 unsigned i; |
820 basic_block *bbs; | 972 basic_block *bbs; |
821 | 973 |
983 } | 1135 } |
984 | 1136 |
985 /* Generate a call to memset for PARTITION in LOOP. */ | 1137 /* Generate a call to memset for PARTITION in LOOP. */ |
986 | 1138 |
987 static void | 1139 static void |
988 generate_memset_builtin (struct loop *loop, partition *partition) | 1140 generate_memset_builtin (class loop *loop, partition *partition) |
989 { | 1141 { |
990 gimple_stmt_iterator gsi; | 1142 gimple_stmt_iterator gsi; |
991 tree mem, fn, nb_bytes; | 1143 tree mem, fn, nb_bytes; |
992 tree val; | 1144 tree val; |
993 struct builtin_info *builtin = partition->builtin; | 1145 struct builtin_info *builtin = partition->builtin; |
994 gimple *fn_call; | 1146 gimple *fn_call; |
995 | 1147 |
996 /* The new statements will be placed before LOOP. */ | 1148 /* The new statements will be placed before LOOP. */ |
997 gsi = gsi_last_bb (loop_preheader_edge (loop)->src); | 1149 gsi = gsi_last_bb (loop_preheader_edge (loop)->src); |
998 | 1150 |
999 nb_bytes = builtin->size; | 1151 nb_bytes = rewrite_to_non_trapping_overflow (builtin->size); |
1000 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE, | 1152 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE, |
1001 false, GSI_CONTINUE_LINKING); | 1153 false, GSI_CONTINUE_LINKING); |
1002 mem = builtin->dst_base; | 1154 mem = builtin->dst_base; |
1003 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE, | 1155 mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE, |
1004 false, GSI_CONTINUE_LINKING); | 1156 false, GSI_CONTINUE_LINKING); |
1020 val = tem; | 1172 val = tem; |
1021 } | 1173 } |
1022 | 1174 |
1023 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET)); | 1175 fn = build_fold_addr_expr (builtin_decl_implicit (BUILT_IN_MEMSET)); |
1024 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes); | 1176 fn_call = gimple_build_call (fn, 3, mem, val, nb_bytes); |
1177 gimple_set_location (fn_call, partition->loc); | |
1025 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING); | 1178 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING); |
1179 fold_stmt (&gsi); | |
1026 | 1180 |
1027 if (dump_file && (dump_flags & TDF_DETAILS)) | 1181 if (dump_file && (dump_flags & TDF_DETAILS)) |
1028 { | 1182 { |
1029 fprintf (dump_file, "generated memset"); | 1183 fprintf (dump_file, "generated memset"); |
1030 if (bytev == 0) | 1184 if (bytev == 0) |
1035 } | 1189 } |
1036 | 1190 |
1037 /* Generate a call to memcpy for PARTITION in LOOP. */ | 1191 /* Generate a call to memcpy for PARTITION in LOOP. */ |
1038 | 1192 |
1039 static void | 1193 static void |
1040 generate_memcpy_builtin (struct loop *loop, partition *partition) | 1194 generate_memcpy_builtin (class loop *loop, partition *partition) |
1041 { | 1195 { |
1042 gimple_stmt_iterator gsi; | 1196 gimple_stmt_iterator gsi; |
1043 gimple *fn_call; | 1197 gimple *fn_call; |
1044 tree dest, src, fn, nb_bytes; | 1198 tree dest, src, fn, nb_bytes; |
1045 enum built_in_function kind; | 1199 enum built_in_function kind; |
1046 struct builtin_info *builtin = partition->builtin; | 1200 struct builtin_info *builtin = partition->builtin; |
1047 | 1201 |
1048 /* The new statements will be placed before LOOP. */ | 1202 /* The new statements will be placed before LOOP. */ |
1049 gsi = gsi_last_bb (loop_preheader_edge (loop)->src); | 1203 gsi = gsi_last_bb (loop_preheader_edge (loop)->src); |
1050 | 1204 |
1051 nb_bytes = builtin->size; | 1205 nb_bytes = rewrite_to_non_trapping_overflow (builtin->size); |
1052 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE, | 1206 nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE, |
1053 false, GSI_CONTINUE_LINKING); | 1207 false, GSI_CONTINUE_LINKING); |
1054 dest = builtin->dst_base; | 1208 dest = builtin->dst_base; |
1055 src = builtin->src_base; | 1209 src = builtin->src_base; |
1056 if (partition->kind == PKIND_MEMCPY | 1210 if (partition->kind == PKIND_MEMCPY |
1063 false, GSI_CONTINUE_LINKING); | 1217 false, GSI_CONTINUE_LINKING); |
1064 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE, | 1218 src = force_gimple_operand_gsi (&gsi, src, true, NULL_TREE, |
1065 false, GSI_CONTINUE_LINKING); | 1219 false, GSI_CONTINUE_LINKING); |
1066 fn = build_fold_addr_expr (builtin_decl_implicit (kind)); | 1220 fn = build_fold_addr_expr (builtin_decl_implicit (kind)); |
1067 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes); | 1221 fn_call = gimple_build_call (fn, 3, dest, src, nb_bytes); |
1222 gimple_set_location (fn_call, partition->loc); | |
1068 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING); | 1223 gsi_insert_after (&gsi, fn_call, GSI_CONTINUE_LINKING); |
1224 fold_stmt (&gsi); | |
1069 | 1225 |
1070 if (dump_file && (dump_flags & TDF_DETAILS)) | 1226 if (dump_file && (dump_flags & TDF_DETAILS)) |
1071 { | 1227 { |
1072 if (kind == BUILT_IN_MEMCPY) | 1228 if (kind == BUILT_IN_MEMCPY) |
1073 fprintf (dump_file, "generated memcpy\n"); | 1229 fprintf (dump_file, "generated memcpy\n"); |
1077 } | 1233 } |
1078 | 1234 |
1079 /* Remove and destroy the loop LOOP. */ | 1235 /* Remove and destroy the loop LOOP. */ |
1080 | 1236 |
1081 static void | 1237 static void |
1082 destroy_loop (struct loop *loop) | 1238 destroy_loop (class loop *loop) |
1083 { | 1239 { |
1084 unsigned nbbs = loop->num_nodes; | 1240 unsigned nbbs = loop->num_nodes; |
1085 edge exit = single_exit (loop); | 1241 edge exit = single_exit (loop); |
1086 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest; | 1242 basic_block src = loop_preheader_edge (loop)->src, dest = exit->dest; |
1087 basic_block *bbs; | 1243 basic_block *bbs; |
1088 unsigned i; | 1244 unsigned i; |
1089 | 1245 |
1090 bbs = get_loop_body_in_dom_order (loop); | 1246 bbs = get_loop_body_in_dom_order (loop); |
1091 | 1247 |
1248 gimple_stmt_iterator dst_gsi = gsi_after_labels (exit->dest); | |
1249 bool safe_p = single_pred_p (exit->dest); | |
1250 for (unsigned i = 0; i < nbbs; ++i) | |
1251 { | |
1252 /* We have made sure to not leave any dangling uses of SSA | |
1253 names defined in the loop. With the exception of virtuals. | |
1254 Make sure we replace all uses of virtual defs that will remain | |
1255 outside of the loop with the bare symbol as delete_basic_block | |
1256 will release them. */ | |
1257 for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); | |
1258 gsi_next (&gsi)) | |
1259 { | |
1260 gphi *phi = gsi.phi (); | |
1261 if (virtual_operand_p (gimple_phi_result (phi))) | |
1262 mark_virtual_phi_result_for_renaming (phi); | |
1263 } | |
1264 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi);) | |
1265 { | |
1266 gimple *stmt = gsi_stmt (gsi); | |
1267 tree vdef = gimple_vdef (stmt); | |
1268 if (vdef && TREE_CODE (vdef) == SSA_NAME) | |
1269 mark_virtual_operand_for_renaming (vdef); | |
1270 /* Also move and eventually reset debug stmts. We can leave | |
1271 constant values in place in case the stmt dominates the exit. | |
1272 ??? Non-constant values from the last iteration can be | |
1273 replaced with final values if we can compute them. */ | |
1274 if (gimple_debug_bind_p (stmt)) | |
1275 { | |
1276 tree val = gimple_debug_bind_get_value (stmt); | |
1277 gsi_move_before (&gsi, &dst_gsi); | |
1278 if (val | |
1279 && (!safe_p | |
1280 || !is_gimple_min_invariant (val) | |
1281 || !dominated_by_p (CDI_DOMINATORS, exit->src, bbs[i]))) | |
1282 { | |
1283 gimple_debug_bind_reset_value (stmt); | |
1284 update_stmt (stmt); | |
1285 } | |
1286 } | |
1287 else | |
1288 gsi_next (&gsi); | |
1289 } | |
1290 } | |
1291 | |
1092 redirect_edge_pred (exit, src); | 1292 redirect_edge_pred (exit, src); |
1093 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE); | 1293 exit->flags &= ~(EDGE_TRUE_VALUE|EDGE_FALSE_VALUE); |
1094 exit->flags |= EDGE_FALLTHRU; | 1294 exit->flags |= EDGE_FALLTHRU; |
1095 cancel_loop_tree (loop); | 1295 cancel_loop_tree (loop); |
1096 rescan_loop_exit (exit, false, true); | 1296 rescan_loop_exit (exit, false, true); |
1097 | 1297 |
1098 i = nbbs; | 1298 i = nbbs; |
1099 do | 1299 do |
1100 { | 1300 { |
1101 /* We have made sure to not leave any dangling uses of SSA | |
1102 names defined in the loop. With the exception of virtuals. | |
1103 Make sure we replace all uses of virtual defs that will remain | |
1104 outside of the loop with the bare symbol as delete_basic_block | |
1105 will release them. */ | |
1106 --i; | 1301 --i; |
1107 for (gphi_iterator gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); | |
1108 gsi_next (&gsi)) | |
1109 { | |
1110 gphi *phi = gsi.phi (); | |
1111 if (virtual_operand_p (gimple_phi_result (phi))) | |
1112 mark_virtual_phi_result_for_renaming (phi); | |
1113 } | |
1114 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); !gsi_end_p (gsi); | |
1115 gsi_next (&gsi)) | |
1116 { | |
1117 gimple *stmt = gsi_stmt (gsi); | |
1118 tree vdef = gimple_vdef (stmt); | |
1119 if (vdef && TREE_CODE (vdef) == SSA_NAME) | |
1120 mark_virtual_operand_for_renaming (vdef); | |
1121 } | |
1122 delete_basic_block (bbs[i]); | 1302 delete_basic_block (bbs[i]); |
1123 } | 1303 } |
1124 while (i != 0); | 1304 while (i != 0); |
1125 | 1305 |
1126 free (bbs); | 1306 free (bbs); |
1130 } | 1310 } |
1131 | 1311 |
1132 /* Generates code for PARTITION. Return whether LOOP needs to be destroyed. */ | 1312 /* Generates code for PARTITION. Return whether LOOP needs to be destroyed. */ |
1133 | 1313 |
1134 static bool | 1314 static bool |
1135 generate_code_for_partition (struct loop *loop, | 1315 generate_code_for_partition (class loop *loop, |
1136 partition *partition, bool copy_p) | 1316 partition *partition, bool copy_p) |
1137 { | 1317 { |
1138 switch (partition->kind) | 1318 switch (partition->kind) |
1139 { | 1319 { |
1140 case PKIND_NORMAL: | 1320 case PKIND_NORMAL: |
1163 if (!copy_p) | 1343 if (!copy_p) |
1164 return true; | 1344 return true; |
1165 return false; | 1345 return false; |
1166 } | 1346 } |
1167 | 1347 |
1168 /* Return data dependence relation for data references A and B. The two | 1348 data_dependence_relation * |
1169 data references must be in lexicographic order wrto reduced dependence | 1349 loop_distribution::get_data_dependence (struct graph *rdg, data_reference_p a, |
1170 graph RDG. We firstly try to find ddr from global ddr hash table. If | 1350 data_reference_p b) |
1171 it doesn't exist, compute the ddr and cache it. */ | |
1172 | |
1173 static data_dependence_relation * | |
1174 get_data_dependence (struct graph *rdg, data_reference_p a, data_reference_p b) | |
1175 { | 1351 { |
1176 struct data_dependence_relation ent, **slot; | 1352 struct data_dependence_relation ent, **slot; |
1177 struct data_dependence_relation *ddr; | 1353 struct data_dependence_relation *ddr; |
1178 | 1354 |
1179 gcc_assert (DR_IS_WRITE (a) || DR_IS_WRITE (b)); | 1355 gcc_assert (DR_IS_WRITE (a) || DR_IS_WRITE (b)); |
1190 } | 1366 } |
1191 | 1367 |
1192 return *slot; | 1368 return *slot; |
1193 } | 1369 } |
1194 | 1370 |
1195 /* In reduced dependence graph RDG for loop distribution, return true if | 1371 bool |
1196 dependence between references DR1 and DR2 leads to a dependence cycle | 1372 loop_distribution::data_dep_in_cycle_p (struct graph *rdg, |
1197 and such dependence cycle can't be resolved by runtime alias check. */ | 1373 data_reference_p dr1, |
1198 | 1374 data_reference_p dr2) |
1199 static bool | |
1200 data_dep_in_cycle_p (struct graph *rdg, | |
1201 data_reference_p dr1, data_reference_p dr2) | |
1202 { | 1375 { |
1203 struct data_dependence_relation *ddr; | 1376 struct data_dependence_relation *ddr; |
1204 | 1377 |
1205 /* Re-shuffle data-refs to be in topological order. */ | 1378 /* Re-shuffle data-refs to be in topological order. */ |
1206 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1)) | 1379 if (rdg_vertex_for_stmt (rdg, DR_STMT (dr1)) |
1226 return false; | 1399 return false; |
1227 | 1400 |
1228 return true; | 1401 return true; |
1229 } | 1402 } |
1230 | 1403 |
1231 /* Given reduced dependence graph RDG, PARTITION1 and PARTITION2, update | 1404 void |
1232 PARTITION1's type after merging PARTITION2 into PARTITION1. */ | 1405 loop_distribution::update_type_for_merge (struct graph *rdg, |
1233 | 1406 partition *partition1, |
1234 static void | 1407 partition *partition2) |
1235 update_type_for_merge (struct graph *rdg, | |
1236 partition *partition1, partition *partition2) | |
1237 { | 1408 { |
1238 unsigned i, j; | 1409 unsigned i, j; |
1239 bitmap_iterator bi, bj; | 1410 bitmap_iterator bi, bj; |
1240 data_reference_p dr1, dr2; | 1411 data_reference_p dr1, dr2; |
1241 | 1412 |
1259 } | 1430 } |
1260 } | 1431 } |
1261 } | 1432 } |
1262 } | 1433 } |
1263 | 1434 |
1264 /* Returns a partition with all the statements needed for computing | 1435 partition * |
1265 the vertex V of the RDG, also including the loop exit conditions. */ | 1436 loop_distribution::build_rdg_partition_for_vertex (struct graph *rdg, int v) |
1266 | |
1267 static partition * | |
1268 build_rdg_partition_for_vertex (struct graph *rdg, int v) | |
1269 { | 1437 { |
1270 partition *partition = partition_alloc (); | 1438 partition *partition = partition_alloc (); |
1271 auto_vec<int, 3> nodes; | 1439 auto_vec<int, 3> nodes; |
1272 unsigned i, j; | 1440 unsigned i, j; |
1273 int x; | 1441 int x; |
1307 /* Given PARTITION of LOOP and RDG, record single load/store data references | 1475 /* Given PARTITION of LOOP and RDG, record single load/store data references |
1308 for builtin partition in SRC_DR/DST_DR, return false if there is no such | 1476 for builtin partition in SRC_DR/DST_DR, return false if there is no such |
1309 data references. */ | 1477 data references. */ |
1310 | 1478 |
1311 static bool | 1479 static bool |
1312 find_single_drs (struct loop *loop, struct graph *rdg, partition *partition, | 1480 find_single_drs (class loop *loop, struct graph *rdg, partition *partition, |
1313 data_reference_p *dst_dr, data_reference_p *src_dr) | 1481 data_reference_p *dst_dr, data_reference_p *src_dr) |
1314 { | 1482 { |
1315 unsigned i; | 1483 unsigned i; |
1316 data_reference_p single_ld = NULL, single_st = NULL; | 1484 data_reference_p single_ld = NULL, single_st = NULL; |
1317 bitmap_iterator bi; | 1485 bitmap_iterator bi; |
1430 compute_access_range (loop_p loop_nest, data_reference_p dr, tree *base, | 1598 compute_access_range (loop_p loop_nest, data_reference_p dr, tree *base, |
1431 tree *size, vec<tree> *steps = NULL) | 1599 tree *size, vec<tree> *steps = NULL) |
1432 { | 1600 { |
1433 location_t loc = gimple_location (DR_STMT (dr)); | 1601 location_t loc = gimple_location (DR_STMT (dr)); |
1434 basic_block bb = gimple_bb (DR_STMT (dr)); | 1602 basic_block bb = gimple_bb (DR_STMT (dr)); |
1435 struct loop *loop = bb->loop_father; | 1603 class loop *loop = bb->loop_father; |
1436 tree ref = DR_REF (dr); | 1604 tree ref = DR_REF (dr); |
1437 tree access_base = build_fold_addr_expr (ref); | 1605 tree access_base = build_fold_addr_expr (ref); |
1438 tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (ref)); | 1606 tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (ref)); |
1439 int res = 0; | 1607 int res = 0; |
1440 | 1608 |
1557 } | 1725 } |
1558 | 1726 |
1559 /* Given data references DST_DR and SRC_DR in loop nest LOOP and RDG, classify | 1727 /* Given data references DST_DR and SRC_DR in loop nest LOOP and RDG, classify |
1560 if it forms builtin memcpy or memmove call. */ | 1728 if it forms builtin memcpy or memmove call. */ |
1561 | 1729 |
1562 static void | 1730 void |
1563 classify_builtin_ldst (loop_p loop, struct graph *rdg, partition *partition, | 1731 loop_distribution::classify_builtin_ldst (loop_p loop, struct graph *rdg, |
1564 data_reference_p dst_dr, data_reference_p src_dr) | 1732 partition *partition, |
1733 data_reference_p dst_dr, | |
1734 data_reference_p src_dr) | |
1565 { | 1735 { |
1566 tree base, size, src_base, src_size; | 1736 tree base, size, src_base, src_size; |
1567 auto_vec<tree> dst_steps, src_steps; | 1737 auto_vec<tree> dst_steps, src_steps; |
1568 | 1738 |
1569 /* Compute access range of both load and store. */ | 1739 /* Compute access range of both load and store. */ |
1617 partition->builtin = alloc_builtin (dst_dr, src_dr, base, src_base, size); | 1787 partition->builtin = alloc_builtin (dst_dr, src_dr, base, src_base, size); |
1618 partition->kind = PKIND_MEMMOVE; | 1788 partition->kind = PKIND_MEMMOVE; |
1619 return; | 1789 return; |
1620 } | 1790 } |
1621 | 1791 |
1622 /* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP. | 1792 bool |
1623 For the moment we detect memset, memcpy and memmove patterns. Bitmap | 1793 loop_distribution::classify_partition (loop_p loop, |
1624 STMT_IN_ALL_PARTITIONS contains statements belonging to all partitions. */ | 1794 struct graph *rdg, partition *partition, |
1625 | 1795 bitmap stmt_in_all_partitions) |
1626 static void | |
1627 classify_partition (loop_p loop, struct graph *rdg, partition *partition, | |
1628 bitmap stmt_in_all_partitions) | |
1629 { | 1796 { |
1630 bitmap_iterator bi; | 1797 bitmap_iterator bi; |
1631 unsigned i; | 1798 unsigned i; |
1632 data_reference_p single_ld = NULL, single_st = NULL; | 1799 data_reference_p single_ld = NULL, single_st = NULL; |
1633 bool volatiles_p = false, has_reduction = false; | 1800 bool volatiles_p = false, has_reduction = false; |
1649 is used outside of loop. To workaround this issue, we skip | 1816 is used outside of loop. To workaround this issue, we skip |
1650 marking partition as reudction if the reduction stmt belongs | 1817 marking partition as reudction if the reduction stmt belongs |
1651 to all partitions. In such case, reduction will be computed | 1818 to all partitions. In such case, reduction will be computed |
1652 correctly no matter how partitions are fused/distributed. */ | 1819 correctly no matter how partitions are fused/distributed. */ |
1653 if (!bitmap_bit_p (stmt_in_all_partitions, i)) | 1820 if (!bitmap_bit_p (stmt_in_all_partitions, i)) |
1654 { | 1821 partition->reduction_p = true; |
1655 partition->reduction_p = true; | 1822 else |
1656 return; | 1823 has_reduction = true; |
1657 } | 1824 } |
1658 has_reduction = true; | 1825 } |
1659 } | 1826 |
1660 } | 1827 /* Simple workaround to prevent classifying the partition as builtin |
1828 if it contains any use outside of loop. For the case where all | |
1829 partitions have the reduction this simple workaround is delayed | |
1830 to only affect the last partition. */ | |
1831 if (partition->reduction_p) | |
1832 return has_reduction; | |
1661 | 1833 |
1662 /* Perform general partition disqualification for builtins. */ | 1834 /* Perform general partition disqualification for builtins. */ |
1663 if (volatiles_p | 1835 if (volatiles_p |
1664 /* Simple workaround to prevent classifying the partition as builtin | |
1665 if it contains any use outside of loop. */ | |
1666 || has_reduction | |
1667 || !flag_tree_loop_distribute_patterns) | 1836 || !flag_tree_loop_distribute_patterns) |
1668 return; | 1837 return has_reduction; |
1669 | 1838 |
1670 /* Find single load/store data references for builtin partition. */ | 1839 /* Find single load/store data references for builtin partition. */ |
1671 if (!find_single_drs (loop, rdg, partition, &single_st, &single_ld)) | 1840 if (!find_single_drs (loop, rdg, partition, &single_st, &single_ld)) |
1672 return; | 1841 return has_reduction; |
1842 | |
1843 partition->loc = gimple_location (DR_STMT (single_st)); | |
1673 | 1844 |
1674 /* Classify the builtin kind. */ | 1845 /* Classify the builtin kind. */ |
1675 if (single_ld == NULL) | 1846 if (single_ld == NULL) |
1676 classify_builtin_st (loop, partition, single_st); | 1847 classify_builtin_st (loop, partition, single_st); |
1677 else | 1848 else |
1678 classify_builtin_ldst (loop, rdg, partition, single_st, single_ld); | 1849 classify_builtin_ldst (loop, rdg, partition, single_st, single_ld); |
1679 } | 1850 return has_reduction; |
1680 | 1851 } |
1681 /* Returns true when PARTITION1 and PARTITION2 access the same memory | 1852 |
1682 object in RDG. */ | 1853 bool |
1683 | 1854 loop_distribution::share_memory_accesses (struct graph *rdg, |
1684 static bool | |
1685 share_memory_accesses (struct graph *rdg, | |
1686 partition *partition1, partition *partition2) | 1855 partition *partition1, partition *partition2) |
1687 { | 1856 { |
1688 unsigned i, j; | 1857 unsigned i, j; |
1689 bitmap_iterator bi, bj; | 1858 bitmap_iterator bi, bj; |
1690 data_reference_p dr1, dr2; | 1859 data_reference_p dr1, dr2; |
1727 | 1896 |
1728 /* For each seed statement in STARTING_STMTS, this function builds | 1897 /* For each seed statement in STARTING_STMTS, this function builds |
1729 partition for it by adding depended statements according to RDG. | 1898 partition for it by adding depended statements according to RDG. |
1730 All partitions are recorded in PARTITIONS. */ | 1899 All partitions are recorded in PARTITIONS. */ |
1731 | 1900 |
1732 static void | 1901 void |
1733 rdg_build_partitions (struct graph *rdg, | 1902 loop_distribution::rdg_build_partitions (struct graph *rdg, |
1734 vec<gimple *> starting_stmts, | 1903 vec<gimple *> starting_stmts, |
1735 vec<partition *> *partitions) | 1904 vec<partition *> *partitions) |
1736 { | 1905 { |
1737 auto_bitmap processed; | 1906 auto_bitmap processed; |
1738 int i; | 1907 int i; |
1739 gimple *stmt; | 1908 gimple *stmt; |
1740 | 1909 |
1846 return true; | 2015 return true; |
1847 | 2016 |
1848 return false; | 2017 return false; |
1849 } | 2018 } |
1850 | 2019 |
1851 /* Compute partition dependence created by the data references in DRS1 | 2020 int |
1852 and DRS2, modify and return DIR according to that. IF ALIAS_DDR is | 2021 loop_distribution::pg_add_dependence_edges (struct graph *rdg, int dir, |
1853 not NULL, we record dependence introduced by possible alias between | |
1854 two data references in ALIAS_DDRS; otherwise, we simply ignore such | |
1855 dependence as if it doesn't exist at all. */ | |
1856 | |
1857 static int | |
1858 pg_add_dependence_edges (struct graph *rdg, int dir, | |
1859 bitmap drs1, bitmap drs2, vec<ddr_p> *alias_ddrs) | 2022 bitmap drs1, bitmap drs2, vec<ddr_p> *alias_ddrs) |
1860 { | 2023 { |
1861 unsigned i, j; | 2024 unsigned i, j; |
1862 bitmap_iterator bi, bj; | 2025 bitmap_iterator bi, bj; |
1863 data_reference_p dr1, dr2, saved_dr1; | 2026 data_reference_p dr1, dr2, saved_dr1; |
1894 res = data_ref_compare_tree (DR_BASE_ADDRESS (dr1), | 2057 res = data_ref_compare_tree (DR_BASE_ADDRESS (dr1), |
1895 DR_BASE_ADDRESS (dr2)); | 2058 DR_BASE_ADDRESS (dr2)); |
1896 /* Be conservative. If data references are not well analyzed, | 2059 /* Be conservative. If data references are not well analyzed, |
1897 or the two data references have the same base address and | 2060 or the two data references have the same base address and |
1898 offset, add dependence and consider it alias to each other. | 2061 offset, add dependence and consider it alias to each other. |
1899 In other words, the dependence can not be resolved by | 2062 In other words, the dependence cannot be resolved by |
1900 runtime alias check. */ | 2063 runtime alias check. */ |
1901 if (!DR_BASE_ADDRESS (dr1) || !DR_BASE_ADDRESS (dr2) | 2064 if (!DR_BASE_ADDRESS (dr1) || !DR_BASE_ADDRESS (dr2) |
1902 || !DR_OFFSET (dr1) || !DR_OFFSET (dr2) | 2065 || !DR_OFFSET (dr1) || !DR_OFFSET (dr2) |
1903 || !DR_INIT (dr1) || !DR_INIT (dr2) | 2066 || !DR_INIT (dr1) || !DR_INIT (dr2) |
1904 || !DR_STEP (dr1) || !tree_fits_uhwi_p (DR_STEP (dr1)) | 2067 || !DR_STEP (dr1) || !tree_fits_uhwi_p (DR_STEP (dr1)) |
2069 reduced dependence graph for the loop to be distributed. If IGNORE_ALIAS_P | 2232 reduced dependence graph for the loop to be distributed. If IGNORE_ALIAS_P |
2070 is true, data dependence caused by possible alias between references | 2233 is true, data dependence caused by possible alias between references |
2071 is ignored, as if it doesn't exist at all; otherwise all depdendences | 2234 is ignored, as if it doesn't exist at all; otherwise all depdendences |
2072 are considered. */ | 2235 are considered. */ |
2073 | 2236 |
2074 static struct graph * | 2237 struct graph * |
2075 build_partition_graph (struct graph *rdg, | 2238 loop_distribution::build_partition_graph (struct graph *rdg, |
2076 vec<struct partition *> *partitions, | 2239 vec<struct partition *> *partitions, |
2077 bool ignore_alias_p) | 2240 bool ignore_alias_p) |
2078 { | 2241 { |
2079 int i, j; | 2242 int i, j; |
2080 struct partition *partition1, *partition2; | 2243 struct partition *partition1, *partition2; |
2081 graph *pg = new_graph (partitions->length ()); | 2244 graph *pg = new_graph (partitions->length ()); |
2082 auto_vec<ddr_p> alias_ddrs, *alias_ddrs_p; | 2245 auto_vec<ddr_p> alias_ddrs, *alias_ddrs_p; |
2107 dir = pg_add_dependence_edges (rdg, dir, partition1->datarefs, | 2270 dir = pg_add_dependence_edges (rdg, dir, partition1->datarefs, |
2108 partition2->datarefs, alias_ddrs_p); | 2271 partition2->datarefs, alias_ddrs_p); |
2109 | 2272 |
2110 /* Add edge to partition graph if there exists dependence. There | 2273 /* Add edge to partition graph if there exists dependence. There |
2111 are two types of edges. One type edge is caused by compilation | 2274 are two types of edges. One type edge is caused by compilation |
2112 time known dependence, this type can not be resolved by runtime | 2275 time known dependence, this type cannot be resolved by runtime |
2113 alias check. The other type can be resolved by runtime alias | 2276 alias check. The other type can be resolved by runtime alias |
2114 check. */ | 2277 check. */ |
2115 if (dir == 1 || dir == 2 | 2278 if (dir == 1 || dir == 2 |
2116 || alias_ddrs.length () > 0) | 2279 || alias_ddrs.length () > 0) |
2117 { | 2280 { |
2154 if (data->partition) | 2317 if (data->partition) |
2155 partitions->safe_push (data->partition); | 2318 partitions->safe_push (data->partition); |
2156 } | 2319 } |
2157 } | 2320 } |
2158 | 2321 |
2159 /* Given reduced dependence graph RDG merge strong connected components | 2322 void |
2160 of PARTITIONS. If IGNORE_ALIAS_P is true, data dependence caused by | 2323 loop_distribution::merge_dep_scc_partitions (struct graph *rdg, |
2161 possible alias between references is ignored, as if it doesn't exist | 2324 vec<struct partition *> *partitions, |
2162 at all; otherwise all depdendences are considered. */ | 2325 bool ignore_alias_p) |
2163 | |
2164 static void | |
2165 merge_dep_scc_partitions (struct graph *rdg, | |
2166 vec<struct partition *> *partitions, | |
2167 bool ignore_alias_p) | |
2168 { | 2326 { |
2169 struct partition *partition1, *partition2; | 2327 struct partition *partition1, *partition2; |
2170 struct pg_vdata *data; | 2328 struct pg_vdata *data; |
2171 graph *pg = build_partition_graph (rdg, partitions, ignore_alias_p); | 2329 graph *pg = build_partition_graph (rdg, partitions, ignore_alias_p); |
2172 int i, j, num_sccs = graphds_scc (pg, NULL); | 2330 int i, j, num_sccs = graphds_scc (pg, NULL); |
2235 } | 2393 } |
2236 | 2394 |
2237 /* This is the main function breaking strong conected components in | 2395 /* This is the main function breaking strong conected components in |
2238 PARTITIONS giving reduced depdendence graph RDG. Store data dependence | 2396 PARTITIONS giving reduced depdendence graph RDG. Store data dependence |
2239 relations for runtime alias check in ALIAS_DDRS. */ | 2397 relations for runtime alias check in ALIAS_DDRS. */ |
2240 | 2398 void |
2241 static void | 2399 loop_distribution::break_alias_scc_partitions (struct graph *rdg, |
2242 break_alias_scc_partitions (struct graph *rdg, | 2400 vec<struct partition *> *partitions, |
2243 vec<struct partition *> *partitions, | 2401 vec<ddr_p> *alias_ddrs) |
2244 vec<ddr_p> *alias_ddrs) | |
2245 { | 2402 { |
2246 int i, j, k, num_sccs, num_sccs_no_alias; | 2403 int i, j, k, num_sccs, num_sccs_no_alias; |
2247 /* Build partition dependence graph. */ | 2404 /* Build partition dependence graph. */ |
2248 graph *pg = build_partition_graph (rdg, partitions, false); | 2405 graph *pg = build_partition_graph (rdg, partitions, false); |
2249 | 2406 |
2380 | 2537 |
2381 /* Return true if LOOP's latch is dominated by statement for data reference | 2538 /* Return true if LOOP's latch is dominated by statement for data reference |
2382 DR. */ | 2539 DR. */ |
2383 | 2540 |
2384 static inline bool | 2541 static inline bool |
2385 latch_dominated_by_data_ref (struct loop *loop, data_reference *dr) | 2542 latch_dominated_by_data_ref (class loop *loop, data_reference *dr) |
2386 { | 2543 { |
2387 return dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src, | 2544 return dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src, |
2388 gimple_bb (DR_STMT (dr))); | 2545 gimple_bb (DR_STMT (dr))); |
2389 } | 2546 } |
2390 | 2547 |
2391 /* Compute alias check pairs and store them in COMP_ALIAS_PAIRS for LOOP's | 2548 /* Compute alias check pairs and store them in COMP_ALIAS_PAIRS for LOOP's |
2392 data dependence relations ALIAS_DDRS. */ | 2549 data dependence relations ALIAS_DDRS. */ |
2393 | 2550 |
2394 static void | 2551 static void |
2395 compute_alias_check_pairs (struct loop *loop, vec<ddr_p> *alias_ddrs, | 2552 compute_alias_check_pairs (class loop *loop, vec<ddr_p> *alias_ddrs, |
2396 vec<dr_with_seg_len_pair_t> *comp_alias_pairs) | 2553 vec<dr_with_seg_len_pair_t> *comp_alias_pairs) |
2397 { | 2554 { |
2398 unsigned int i; | 2555 unsigned int i; |
2399 unsigned HOST_WIDE_INT factor = 1; | 2556 unsigned HOST_WIDE_INT factor = 1; |
2400 tree niters_plus_one, niters = number_of_latch_executions (loop); | 2557 tree niters_plus_one, niters = number_of_latch_executions (loop); |
2411 { | 2568 { |
2412 ddr_p ddr = (*alias_ddrs)[i]; | 2569 ddr_p ddr = (*alias_ddrs)[i]; |
2413 struct data_reference *dr_a = DDR_A (ddr); | 2570 struct data_reference *dr_a = DDR_A (ddr); |
2414 struct data_reference *dr_b = DDR_B (ddr); | 2571 struct data_reference *dr_b = DDR_B (ddr); |
2415 tree seg_length_a, seg_length_b; | 2572 tree seg_length_a, seg_length_b; |
2416 int comp_res = data_ref_compare_tree (DR_BASE_ADDRESS (dr_a), | |
2417 DR_BASE_ADDRESS (dr_b)); | |
2418 | |
2419 if (comp_res == 0) | |
2420 comp_res = data_ref_compare_tree (DR_OFFSET (dr_a), DR_OFFSET (dr_b)); | |
2421 gcc_assert (comp_res != 0); | |
2422 | 2573 |
2423 if (latch_dominated_by_data_ref (loop, dr_a)) | 2574 if (latch_dominated_by_data_ref (loop, dr_a)) |
2424 seg_length_a = data_ref_segment_size (dr_a, niters_plus_one); | 2575 seg_length_a = data_ref_segment_size (dr_a, niters_plus_one); |
2425 else | 2576 else |
2426 seg_length_a = data_ref_segment_size (dr_a, niters); | 2577 seg_length_a = data_ref_segment_size (dr_a, niters); |
2437 unsigned int align_a = TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_a))); | 2588 unsigned int align_a = TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_a))); |
2438 unsigned int align_b = TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_b))); | 2589 unsigned int align_b = TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_b))); |
2439 | 2590 |
2440 dr_with_seg_len_pair_t dr_with_seg_len_pair | 2591 dr_with_seg_len_pair_t dr_with_seg_len_pair |
2441 (dr_with_seg_len (dr_a, seg_length_a, access_size_a, align_a), | 2592 (dr_with_seg_len (dr_a, seg_length_a, access_size_a, align_a), |
2442 dr_with_seg_len (dr_b, seg_length_b, access_size_b, align_b)); | 2593 dr_with_seg_len (dr_b, seg_length_b, access_size_b, align_b), |
2443 | 2594 /* ??? Would WELL_ORDERED be safe? */ |
2444 /* Canonicalize pairs by sorting the two DR members. */ | 2595 dr_with_seg_len_pair_t::REORDERED); |
2445 if (comp_res > 0) | |
2446 std::swap (dr_with_seg_len_pair.first, dr_with_seg_len_pair.second); | |
2447 | 2596 |
2448 comp_alias_pairs->safe_push (dr_with_seg_len_pair); | 2597 comp_alias_pairs->safe_push (dr_with_seg_len_pair); |
2449 } | 2598 } |
2450 | 2599 |
2451 if (tree_fits_uhwi_p (niters)) | 2600 if (tree_fits_uhwi_p (niters)) |
2462 /* Given data dependence relations in ALIAS_DDRS, generate runtime alias | 2611 /* Given data dependence relations in ALIAS_DDRS, generate runtime alias |
2463 checks and version LOOP under condition of these runtime alias checks. */ | 2612 checks and version LOOP under condition of these runtime alias checks. */ |
2464 | 2613 |
2465 static void | 2614 static void |
2466 version_loop_by_alias_check (vec<struct partition *> *partitions, | 2615 version_loop_by_alias_check (vec<struct partition *> *partitions, |
2467 struct loop *loop, vec<ddr_p> *alias_ddrs) | 2616 class loop *loop, vec<ddr_p> *alias_ddrs) |
2468 { | 2617 { |
2469 profile_probability prob; | 2618 profile_probability prob; |
2470 basic_block cond_bb; | 2619 basic_block cond_bb; |
2471 struct loop *nloop; | 2620 class loop *nloop; |
2472 tree lhs, arg0, cond_expr = NULL_TREE; | 2621 tree lhs, arg0, cond_expr = NULL_TREE; |
2473 gimple_seq cond_stmts = NULL; | 2622 gimple_seq cond_stmts = NULL; |
2474 gimple *call_stmt = NULL; | 2623 gimple *call_stmt = NULL; |
2475 auto_vec<dr_with_seg_len_pair_t> comp_alias_pairs; | 2624 auto_vec<dr_with_seg_len_pair_t> comp_alias_pairs; |
2476 | 2625 |
2673 partition_free (part2); | 2822 partition_free (part2); |
2674 partitions->ordered_remove (i + 1); | 2823 partitions->ordered_remove (i + 1); |
2675 } | 2824 } |
2676 } | 2825 } |
2677 | 2826 |
2678 /* Fuse PARTITIONS of LOOP if necessary before finalizing distribution. | 2827 void |
2679 ALIAS_DDRS contains ddrs which need runtime alias check. */ | 2828 loop_distribution::finalize_partitions (class loop *loop, |
2680 | 2829 vec<struct partition *> *partitions, |
2681 static void | 2830 vec<ddr_p> *alias_ddrs) |
2682 finalize_partitions (struct loop *loop, vec<struct partition *> *partitions, | |
2683 vec<ddr_p> *alias_ddrs) | |
2684 { | 2831 { |
2685 unsigned i; | 2832 unsigned i; |
2686 struct partition *partition, *a; | 2833 struct partition *partition, *a; |
2687 | 2834 |
2688 if (partitions->length () == 1 | 2835 if (partitions->length () == 1 |
2733 are placed before consumer statements. Tries to separate only the | 2880 are placed before consumer statements. Tries to separate only the |
2734 statements from STMTS into separate loops. Returns the number of | 2881 statements from STMTS into separate loops. Returns the number of |
2735 distributed loops. Set NB_CALLS to number of generated builtin calls. | 2882 distributed loops. Set NB_CALLS to number of generated builtin calls. |
2736 Set *DESTROY_P to whether LOOP needs to be destroyed. */ | 2883 Set *DESTROY_P to whether LOOP needs to be destroyed. */ |
2737 | 2884 |
2738 static int | 2885 int |
2739 distribute_loop (struct loop *loop, vec<gimple *> stmts, | 2886 loop_distribution::distribute_loop (class loop *loop, vec<gimple *> stmts, |
2740 control_dependences *cd, int *nb_calls, bool *destroy_p) | 2887 control_dependences *cd, int *nb_calls, bool *destroy_p, |
2888 bool only_patterns_p) | |
2741 { | 2889 { |
2742 ddrs_table = new hash_table<ddr_hasher> (389); | 2890 ddrs_table = new hash_table<ddr_hasher> (389); |
2743 struct graph *rdg; | 2891 struct graph *rdg; |
2744 partition *partition; | 2892 partition *partition; |
2745 bool any_builtin; | |
2746 int i, nbp; | 2893 int i, nbp; |
2747 | 2894 |
2748 *destroy_p = false; | 2895 *destroy_p = false; |
2749 *nb_calls = 0; | 2896 *nb_calls = 0; |
2750 loop_nest.create (0); | 2897 loop_nest.create (0); |
2754 delete ddrs_table; | 2901 delete ddrs_table; |
2755 return 0; | 2902 return 0; |
2756 } | 2903 } |
2757 | 2904 |
2758 datarefs_vec.create (20); | 2905 datarefs_vec.create (20); |
2906 has_nonaddressable_dataref_p = false; | |
2759 rdg = build_rdg (loop, cd); | 2907 rdg = build_rdg (loop, cd); |
2760 if (!rdg) | 2908 if (!rdg) |
2761 { | 2909 { |
2762 if (dump_file && (dump_flags & TDF_DETAILS)) | 2910 if (dump_file && (dump_flags & TDF_DETAILS)) |
2763 fprintf (dump_file, | 2911 fprintf (dump_file, |
2799 auto_bitmap stmt_in_all_partitions; | 2947 auto_bitmap stmt_in_all_partitions; |
2800 bitmap_copy (stmt_in_all_partitions, partitions[0]->stmts); | 2948 bitmap_copy (stmt_in_all_partitions, partitions[0]->stmts); |
2801 for (i = 1; partitions.iterate (i, &partition); ++i) | 2949 for (i = 1; partitions.iterate (i, &partition); ++i) |
2802 bitmap_and_into (stmt_in_all_partitions, partitions[i]->stmts); | 2950 bitmap_and_into (stmt_in_all_partitions, partitions[i]->stmts); |
2803 | 2951 |
2804 any_builtin = false; | 2952 bool any_builtin = false; |
2953 bool reduction_in_all = false; | |
2805 FOR_EACH_VEC_ELT (partitions, i, partition) | 2954 FOR_EACH_VEC_ELT (partitions, i, partition) |
2806 { | 2955 { |
2807 classify_partition (loop, rdg, partition, stmt_in_all_partitions); | 2956 reduction_in_all |
2957 |= classify_partition (loop, rdg, partition, stmt_in_all_partitions); | |
2808 any_builtin |= partition_builtin_p (partition); | 2958 any_builtin |= partition_builtin_p (partition); |
2809 } | 2959 } |
2810 | 2960 |
2811 /* If we are only distributing patterns but did not detect any, | 2961 /* If we are only distributing patterns but did not detect any, |
2812 simply bail out. */ | 2962 simply bail out. */ |
2813 if (!flag_tree_loop_distribution | 2963 if (only_patterns_p |
2814 && !any_builtin) | 2964 && !any_builtin) |
2815 { | 2965 { |
2816 nbp = 0; | 2966 nbp = 0; |
2817 goto ldist_done; | 2967 goto ldist_done; |
2818 } | 2968 } |
2820 /* If we are only distributing patterns fuse all partitions that | 2970 /* If we are only distributing patterns fuse all partitions that |
2821 were not classified as builtins. This also avoids chopping | 2971 were not classified as builtins. This also avoids chopping |
2822 a loop into pieces, separated by builtin calls. That is, we | 2972 a loop into pieces, separated by builtin calls. That is, we |
2823 only want no or a single loop body remaining. */ | 2973 only want no or a single loop body remaining. */ |
2824 struct partition *into; | 2974 struct partition *into; |
2825 if (!flag_tree_loop_distribution) | 2975 if (only_patterns_p) |
2826 { | 2976 { |
2827 for (i = 0; partitions.iterate (i, &into); ++i) | 2977 for (i = 0; partitions.iterate (i, &into); ++i) |
2828 if (!partition_builtin_p (into)) | 2978 if (!partition_builtin_p (into)) |
2829 break; | 2979 break; |
2830 for (++i; partitions.iterate (i, &partition); ++i) | 2980 for (++i; partitions.iterate (i, &partition); ++i) |
2877 1 into 0,2. So try again if we did any merging into 0. */ | 3027 1 into 0,2. So try again if we did any merging into 0. */ |
2878 if (changed) | 3028 if (changed) |
2879 i--; | 3029 i--; |
2880 } | 3030 } |
2881 | 3031 |
3032 /* Put a non-builtin partition last if we need to preserve a reduction. | |
3033 ??? This is a workaround that makes sort_partitions_by_post_order do | |
3034 the correct thing while in reality it should sort each component | |
3035 separately and then put the component with a reduction or a non-builtin | |
3036 last. */ | |
3037 if (reduction_in_all | |
3038 && partition_builtin_p (partitions.last())) | |
3039 FOR_EACH_VEC_ELT (partitions, i, partition) | |
3040 if (!partition_builtin_p (partition)) | |
3041 { | |
3042 partitions.unordered_remove (i); | |
3043 partitions.quick_push (partition); | |
3044 break; | |
3045 } | |
3046 | |
2882 /* Build the partition dependency graph and fuse partitions in strong | 3047 /* Build the partition dependency graph and fuse partitions in strong |
2883 connected component. */ | 3048 connected component. */ |
2884 if (partitions.length () > 1) | 3049 if (partitions.length () > 1) |
2885 { | 3050 { |
2886 /* Don't support loop nest distribution under runtime alias check | 3051 /* Don't support loop nest distribution under runtime alias check |
2887 since it's not likely to enable many vectorization opportunities. */ | 3052 since it's not likely to enable many vectorization opportunities. |
2888 if (loop->inner) | 3053 Also if loop has any data reference which may be not addressable |
3054 since alias check needs to take, compare address of the object. */ | |
3055 if (loop->inner || has_nonaddressable_dataref_p) | |
2889 merge_dep_scc_partitions (rdg, &partitions, false); | 3056 merge_dep_scc_partitions (rdg, &partitions, false); |
2890 else | 3057 else |
2891 { | 3058 { |
2892 merge_dep_scc_partitions (rdg, &partitions, true); | 3059 merge_dep_scc_partitions (rdg, &partitions, true); |
2893 if (partitions.length () > 1) | 3060 if (partitions.length () > 1) |
2894 break_alias_scc_partitions (rdg, &partitions, &alias_ddrs); | 3061 break_alias_scc_partitions (rdg, &partitions, &alias_ddrs); |
2895 } | 3062 } |
2896 } | 3063 } |
2897 | 3064 |
2898 finalize_partitions (loop, &partitions, &alias_ddrs); | 3065 finalize_partitions (loop, &partitions, &alias_ddrs); |
3066 | |
3067 /* If there is a reduction in all partitions make sure the last one | |
3068 is not classified for builtin code generation. */ | |
3069 if (reduction_in_all) | |
3070 { | |
3071 partition = partitions.last (); | |
3072 if (only_patterns_p | |
3073 && partition_builtin_p (partition) | |
3074 && !partition_builtin_p (partitions[0])) | |
3075 { | |
3076 nbp = 0; | |
3077 goto ldist_done; | |
3078 } | |
3079 partition->kind = PKIND_NORMAL; | |
3080 } | |
2899 | 3081 |
2900 nbp = partitions.length (); | 3082 nbp = partitions.length (); |
2901 if (nbp == 0 | 3083 if (nbp == 0 |
2902 || (nbp == 1 && !partition_builtin_p (partitions[0])) | 3084 || (nbp == 1 && !partition_builtin_p (partitions[0])) |
2903 || (nbp > 1 && partition_contains_all_rw (rdg, partitions))) | 3085 || (nbp > 1 && partition_contains_all_rw (rdg, partitions))) |
2938 partition_free (partition); | 3120 partition_free (partition); |
2939 | 3121 |
2940 free_rdg (rdg); | 3122 free_rdg (rdg); |
2941 return nbp - *nb_calls; | 3123 return nbp - *nb_calls; |
2942 } | 3124 } |
3125 | |
3126 | |
3127 void loop_distribution::bb_top_order_init (void) | |
3128 { | |
3129 int rpo_num; | |
3130 int *rpo = XNEWVEC (int, last_basic_block_for_fn (cfun)); | |
3131 | |
3132 bb_top_order_index = XNEWVEC (int, last_basic_block_for_fn (cfun)); | |
3133 bb_top_order_index_size = last_basic_block_for_fn (cfun); | |
3134 rpo_num = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, true); | |
3135 for (int i = 0; i < rpo_num; i++) | |
3136 bb_top_order_index[rpo[i]] = i; | |
3137 | |
3138 free (rpo); | |
3139 } | |
3140 | |
3141 void loop_distribution::bb_top_order_destroy () | |
3142 { | |
3143 free (bb_top_order_index); | |
3144 bb_top_order_index = NULL; | |
3145 bb_top_order_index_size = 0; | |
3146 } | |
3147 | |
3148 | |
3149 /* Given LOOP, this function records seed statements for distribution in | |
3150 WORK_LIST. Return false if there is nothing for distribution. */ | |
3151 | |
3152 static bool | |
3153 find_seed_stmts_for_distribution (class loop *loop, vec<gimple *> *work_list) | |
3154 { | |
3155 basic_block *bbs = get_loop_body_in_dom_order (loop); | |
3156 | |
3157 /* Initialize the worklist with stmts we seed the partitions with. */ | |
3158 for (unsigned i = 0; i < loop->num_nodes; ++i) | |
3159 { | |
3160 for (gphi_iterator gsi = gsi_start_phis (bbs[i]); | |
3161 !gsi_end_p (gsi); gsi_next (&gsi)) | |
3162 { | |
3163 gphi *phi = gsi.phi (); | |
3164 if (virtual_operand_p (gimple_phi_result (phi))) | |
3165 continue; | |
3166 /* Distribute stmts which have defs that are used outside of | |
3167 the loop. */ | |
3168 if (!stmt_has_scalar_dependences_outside_loop (loop, phi)) | |
3169 continue; | |
3170 work_list->safe_push (phi); | |
3171 } | |
3172 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); | |
3173 !gsi_end_p (gsi); gsi_next (&gsi)) | |
3174 { | |
3175 gimple *stmt = gsi_stmt (gsi); | |
3176 | |
3177 /* Ignore clobbers, they do not have true side effects. */ | |
3178 if (gimple_clobber_p (stmt)) | |
3179 continue; | |
3180 | |
3181 /* If there is a stmt with side-effects bail out - we | |
3182 cannot and should not distribute this loop. */ | |
3183 if (gimple_has_side_effects (stmt)) | |
3184 { | |
3185 free (bbs); | |
3186 return false; | |
3187 } | |
3188 | |
3189 /* Distribute stmts which have defs that are used outside of | |
3190 the loop. */ | |
3191 if (stmt_has_scalar_dependences_outside_loop (loop, stmt)) | |
3192 ; | |
3193 /* Otherwise only distribute stores for now. */ | |
3194 else if (!gimple_vdef (stmt)) | |
3195 continue; | |
3196 | |
3197 work_list->safe_push (stmt); | |
3198 } | |
3199 } | |
3200 free (bbs); | |
3201 return work_list->length () > 0; | |
3202 } | |
3203 | |
3204 /* Given innermost LOOP, return the outermost enclosing loop that forms a | |
3205 perfect loop nest. */ | |
3206 | |
3207 static class loop * | |
3208 prepare_perfect_loop_nest (class loop *loop) | |
3209 { | |
3210 class loop *outer = loop_outer (loop); | |
3211 tree niters = number_of_latch_executions (loop); | |
3212 | |
3213 /* TODO: We only support the innermost 3-level loop nest distribution | |
3214 because of compilation time issue for now. This should be relaxed | |
3215 in the future. Note we only allow 3-level loop nest distribution | |
3216 when parallelizing loops. */ | |
3217 while ((loop->inner == NULL | |
3218 || (loop->inner->inner == NULL && flag_tree_parallelize_loops > 1)) | |
3219 && loop_outer (outer) | |
3220 && outer->inner == loop && loop->next == NULL | |
3221 && single_exit (outer) | |
3222 && !chrec_contains_symbols_defined_in_loop (niters, outer->num) | |
3223 && (niters = number_of_latch_executions (outer)) != NULL_TREE | |
3224 && niters != chrec_dont_know) | |
3225 { | |
3226 loop = outer; | |
3227 outer = loop_outer (loop); | |
3228 } | |
3229 | |
3230 return loop; | |
3231 } | |
3232 | |
3233 | |
3234 unsigned int | |
3235 loop_distribution::execute (function *fun) | |
3236 { | |
3237 class loop *loop; | |
3238 bool changed = false; | |
3239 basic_block bb; | |
3240 control_dependences *cd = NULL; | |
3241 auto_vec<loop_p> loops_to_be_destroyed; | |
3242 | |
3243 if (number_of_loops (fun) <= 1) | |
3244 return 0; | |
3245 | |
3246 bb_top_order_init (); | |
3247 | |
3248 FOR_ALL_BB_FN (bb, fun) | |
3249 { | |
3250 gimple_stmt_iterator gsi; | |
3251 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
3252 gimple_set_uid (gsi_stmt (gsi), -1); | |
3253 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
3254 gimple_set_uid (gsi_stmt (gsi), -1); | |
3255 } | |
3256 | |
3257 /* We can at the moment only distribute non-nested loops, thus restrict | |
3258 walking to innermost loops. */ | |
3259 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST) | |
3260 { | |
3261 /* Don't distribute multiple exit edges loop, or cold loop when | |
3262 not doing pattern detection. */ | |
3263 if (!single_exit (loop) | |
3264 || (!flag_tree_loop_distribute_patterns | |
3265 && !optimize_loop_for_speed_p (loop))) | |
3266 continue; | |
3267 | |
3268 /* Don't distribute loop if niters is unknown. */ | |
3269 tree niters = number_of_latch_executions (loop); | |
3270 if (niters == NULL_TREE || niters == chrec_dont_know) | |
3271 continue; | |
3272 | |
3273 /* Get the perfect loop nest for distribution. */ | |
3274 loop = prepare_perfect_loop_nest (loop); | |
3275 for (; loop; loop = loop->inner) | |
3276 { | |
3277 auto_vec<gimple *> work_list; | |
3278 if (!find_seed_stmts_for_distribution (loop, &work_list)) | |
3279 break; | |
3280 | |
3281 const char *str = loop->inner ? " nest" : ""; | |
3282 dump_user_location_t loc = find_loop_location (loop); | |
3283 if (!cd) | |
3284 { | |
3285 calculate_dominance_info (CDI_DOMINATORS); | |
3286 calculate_dominance_info (CDI_POST_DOMINATORS); | |
3287 cd = new control_dependences (); | |
3288 free_dominance_info (CDI_POST_DOMINATORS); | |
3289 } | |
3290 | |
3291 bool destroy_p; | |
3292 int nb_generated_loops, nb_generated_calls; | |
3293 nb_generated_loops | |
3294 = distribute_loop (loop, work_list, cd, &nb_generated_calls, | |
3295 &destroy_p, (!optimize_loop_for_speed_p (loop) | |
3296 || !flag_tree_loop_distribution)); | |
3297 if (destroy_p) | |
3298 loops_to_be_destroyed.safe_push (loop); | |
3299 | |
3300 if (nb_generated_loops + nb_generated_calls > 0) | |
3301 { | |
3302 changed = true; | |
3303 if (dump_enabled_p ()) | |
3304 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, | |
3305 loc, "Loop%s %d distributed: split to %d loops " | |
3306 "and %d library calls.\n", str, loop->num, | |
3307 nb_generated_loops, nb_generated_calls); | |
3308 | |
3309 break; | |
3310 } | |
3311 | |
3312 if (dump_file && (dump_flags & TDF_DETAILS)) | |
3313 fprintf (dump_file, "Loop%s %d not distributed.\n", str, loop->num); | |
3314 } | |
3315 } | |
3316 | |
3317 if (cd) | |
3318 delete cd; | |
3319 | |
3320 if (bb_top_order_index != NULL) | |
3321 bb_top_order_destroy (); | |
3322 | |
3323 if (changed) | |
3324 { | |
3325 /* Destroy loop bodies that could not be reused. Do this late as we | |
3326 otherwise can end up refering to stale data in control dependences. */ | |
3327 unsigned i; | |
3328 FOR_EACH_VEC_ELT (loops_to_be_destroyed, i, loop) | |
3329 destroy_loop (loop); | |
3330 | |
3331 /* Cached scalar evolutions now may refer to wrong or non-existing | |
3332 loops. */ | |
3333 scev_reset_htab (); | |
3334 mark_virtual_operands_for_renaming (fun); | |
3335 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); | |
3336 } | |
3337 | |
3338 checking_verify_loop_structure (); | |
3339 | |
3340 return changed ? TODO_cleanup_cfg : 0; | |
3341 } | |
3342 | |
2943 | 3343 |
2944 /* Distribute all loops in the current function. */ | 3344 /* Distribute all loops in the current function. */ |
2945 | 3345 |
2946 namespace { | 3346 namespace { |
2947 | 3347 |
2974 | 3374 |
2975 virtual unsigned int execute (function *); | 3375 virtual unsigned int execute (function *); |
2976 | 3376 |
2977 }; // class pass_loop_distribution | 3377 }; // class pass_loop_distribution |
2978 | 3378 |
2979 | |
2980 /* Given LOOP, this function records seed statements for distribution in | |
2981 WORK_LIST. Return false if there is nothing for distribution. */ | |
2982 | |
2983 static bool | |
2984 find_seed_stmts_for_distribution (struct loop *loop, vec<gimple *> *work_list) | |
2985 { | |
2986 basic_block *bbs = get_loop_body_in_dom_order (loop); | |
2987 | |
2988 /* Initialize the worklist with stmts we seed the partitions with. */ | |
2989 for (unsigned i = 0; i < loop->num_nodes; ++i) | |
2990 { | |
2991 for (gphi_iterator gsi = gsi_start_phis (bbs[i]); | |
2992 !gsi_end_p (gsi); gsi_next (&gsi)) | |
2993 { | |
2994 gphi *phi = gsi.phi (); | |
2995 if (virtual_operand_p (gimple_phi_result (phi))) | |
2996 continue; | |
2997 /* Distribute stmts which have defs that are used outside of | |
2998 the loop. */ | |
2999 if (!stmt_has_scalar_dependences_outside_loop (loop, phi)) | |
3000 continue; | |
3001 work_list->safe_push (phi); | |
3002 } | |
3003 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); | |
3004 !gsi_end_p (gsi); gsi_next (&gsi)) | |
3005 { | |
3006 gimple *stmt = gsi_stmt (gsi); | |
3007 | |
3008 /* If there is a stmt with side-effects bail out - we | |
3009 cannot and should not distribute this loop. */ | |
3010 if (gimple_has_side_effects (stmt)) | |
3011 { | |
3012 free (bbs); | |
3013 return false; | |
3014 } | |
3015 | |
3016 /* Distribute stmts which have defs that are used outside of | |
3017 the loop. */ | |
3018 if (stmt_has_scalar_dependences_outside_loop (loop, stmt)) | |
3019 ; | |
3020 /* Otherwise only distribute stores for now. */ | |
3021 else if (!gimple_vdef (stmt)) | |
3022 continue; | |
3023 | |
3024 work_list->safe_push (stmt); | |
3025 } | |
3026 } | |
3027 free (bbs); | |
3028 return work_list->length () > 0; | |
3029 } | |
3030 | |
3031 /* Given innermost LOOP, return the outermost enclosing loop that forms a | |
3032 perfect loop nest. */ | |
3033 | |
3034 static struct loop * | |
3035 prepare_perfect_loop_nest (struct loop *loop) | |
3036 { | |
3037 struct loop *outer = loop_outer (loop); | |
3038 tree niters = number_of_latch_executions (loop); | |
3039 | |
3040 /* TODO: We only support the innermost 3-level loop nest distribution | |
3041 because of compilation time issue for now. This should be relaxed | |
3042 in the future. Note we only allow 3-level loop nest distribution | |
3043 when parallelizing loops. */ | |
3044 while ((loop->inner == NULL | |
3045 || (loop->inner->inner == NULL && flag_tree_parallelize_loops > 1)) | |
3046 && loop_outer (outer) | |
3047 && outer->inner == loop && loop->next == NULL | |
3048 && single_exit (outer) | |
3049 && optimize_loop_for_speed_p (outer) | |
3050 && !chrec_contains_symbols_defined_in_loop (niters, outer->num) | |
3051 && (niters = number_of_latch_executions (outer)) != NULL_TREE | |
3052 && niters != chrec_dont_know) | |
3053 { | |
3054 loop = outer; | |
3055 outer = loop_outer (loop); | |
3056 } | |
3057 | |
3058 return loop; | |
3059 } | |
3060 | |
3061 unsigned int | 3379 unsigned int |
3062 pass_loop_distribution::execute (function *fun) | 3380 pass_loop_distribution::execute (function *fun) |
3063 { | 3381 { |
3064 struct loop *loop; | 3382 return loop_distribution ().execute (fun); |
3065 bool changed = false; | |
3066 basic_block bb; | |
3067 control_dependences *cd = NULL; | |
3068 auto_vec<loop_p> loops_to_be_destroyed; | |
3069 | |
3070 if (number_of_loops (fun) <= 1) | |
3071 return 0; | |
3072 | |
3073 /* Compute topological order for basic blocks. Topological order is | |
3074 needed because data dependence is computed for data references in | |
3075 lexicographical order. */ | |
3076 if (bb_top_order_index == NULL) | |
3077 { | |
3078 int rpo_num; | |
3079 int *rpo = XNEWVEC (int, last_basic_block_for_fn (cfun)); | |
3080 | |
3081 bb_top_order_index = XNEWVEC (int, last_basic_block_for_fn (cfun)); | |
3082 bb_top_order_index_size = last_basic_block_for_fn (cfun); | |
3083 rpo_num = pre_and_rev_post_order_compute_fn (cfun, NULL, rpo, true); | |
3084 for (int i = 0; i < rpo_num; i++) | |
3085 bb_top_order_index[rpo[i]] = i; | |
3086 | |
3087 free (rpo); | |
3088 } | |
3089 | |
3090 FOR_ALL_BB_FN (bb, fun) | |
3091 { | |
3092 gimple_stmt_iterator gsi; | |
3093 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
3094 gimple_set_uid (gsi_stmt (gsi), -1); | |
3095 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
3096 gimple_set_uid (gsi_stmt (gsi), -1); | |
3097 } | |
3098 | |
3099 /* We can at the moment only distribute non-nested loops, thus restrict | |
3100 walking to innermost loops. */ | |
3101 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST) | |
3102 { | |
3103 /* Don't distribute multiple exit edges loop, or cold loop. */ | |
3104 if (!single_exit (loop) | |
3105 || !optimize_loop_for_speed_p (loop)) | |
3106 continue; | |
3107 | |
3108 /* Don't distribute loop if niters is unknown. */ | |
3109 tree niters = number_of_latch_executions (loop); | |
3110 if (niters == NULL_TREE || niters == chrec_dont_know) | |
3111 continue; | |
3112 | |
3113 /* Get the perfect loop nest for distribution. */ | |
3114 loop = prepare_perfect_loop_nest (loop); | |
3115 for (; loop; loop = loop->inner) | |
3116 { | |
3117 auto_vec<gimple *> work_list; | |
3118 if (!find_seed_stmts_for_distribution (loop, &work_list)) | |
3119 break; | |
3120 | |
3121 const char *str = loop->inner ? " nest" : ""; | |
3122 dump_user_location_t loc = find_loop_location (loop); | |
3123 if (!cd) | |
3124 { | |
3125 calculate_dominance_info (CDI_DOMINATORS); | |
3126 calculate_dominance_info (CDI_POST_DOMINATORS); | |
3127 cd = new control_dependences (); | |
3128 free_dominance_info (CDI_POST_DOMINATORS); | |
3129 } | |
3130 | |
3131 bool destroy_p; | |
3132 int nb_generated_loops, nb_generated_calls; | |
3133 nb_generated_loops = distribute_loop (loop, work_list, cd, | |
3134 &nb_generated_calls, | |
3135 &destroy_p); | |
3136 if (destroy_p) | |
3137 loops_to_be_destroyed.safe_push (loop); | |
3138 | |
3139 if (nb_generated_loops + nb_generated_calls > 0) | |
3140 { | |
3141 changed = true; | |
3142 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, | |
3143 loc, "Loop%s %d distributed: split to %d loops " | |
3144 "and %d library calls.\n", str, loop->num, | |
3145 nb_generated_loops, nb_generated_calls); | |
3146 | |
3147 break; | |
3148 } | |
3149 | |
3150 if (dump_file && (dump_flags & TDF_DETAILS)) | |
3151 fprintf (dump_file, "Loop%s %d not distributed.\n", str, loop->num); | |
3152 } | |
3153 } | |
3154 | |
3155 if (cd) | |
3156 delete cd; | |
3157 | |
3158 if (bb_top_order_index != NULL) | |
3159 { | |
3160 free (bb_top_order_index); | |
3161 bb_top_order_index = NULL; | |
3162 bb_top_order_index_size = 0; | |
3163 } | |
3164 | |
3165 if (changed) | |
3166 { | |
3167 /* Destroy loop bodies that could not be reused. Do this late as we | |
3168 otherwise can end up refering to stale data in control dependences. */ | |
3169 unsigned i; | |
3170 FOR_EACH_VEC_ELT (loops_to_be_destroyed, i, loop) | |
3171 destroy_loop (loop); | |
3172 | |
3173 /* Cached scalar evolutions now may refer to wrong or non-existing | |
3174 loops. */ | |
3175 scev_reset_htab (); | |
3176 mark_virtual_operands_for_renaming (fun); | |
3177 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa); | |
3178 } | |
3179 | |
3180 checking_verify_loop_structure (); | |
3181 | |
3182 return changed ? TODO_cleanup_cfg : 0; | |
3183 } | 3383 } |
3184 | 3384 |
3185 } // anon namespace | 3385 } // anon namespace |
3186 | 3386 |
3187 gimple_opt_pass * | 3387 gimple_opt_pass * |