Mercurial > hg > CbC > CbC_gcc
comparison gcc/sched-rgn.c @ 0:a06113de4d67
first commit
author | kent <kent@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 17 Jul 2009 14:47:48 +0900 |
parents | |
children | 77e2b8dfacca |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a06113de4d67 |
---|---|
1 /* Instruction scheduling pass. | |
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, | |
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 | |
4 Free Software Foundation, Inc. | |
5 Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by, | |
6 and currently maintained by, Jim Wilson (wilson@cygnus.com) | |
7 | |
8 This file is part of GCC. | |
9 | |
10 GCC is free software; you can redistribute it and/or modify it under | |
11 the terms of the GNU General Public License as published by the Free | |
12 Software Foundation; either version 3, or (at your option) any later | |
13 version. | |
14 | |
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
18 for more details. | |
19 | |
20 You should have received a copy of the GNU General Public License | |
21 along with GCC; see the file COPYING3. If not see | |
22 <http://www.gnu.org/licenses/>. */ | |
23 | |
24 /* This pass implements list scheduling within basic blocks. It is | |
25 run twice: (1) after flow analysis, but before register allocation, | |
26 and (2) after register allocation. | |
27 | |
28 The first run performs interblock scheduling, moving insns between | |
29 different blocks in the same "region", and the second runs only | |
30 basic block scheduling. | |
31 | |
32 Interblock motions performed are useful motions and speculative | |
33 motions, including speculative loads. Motions requiring code | |
34 duplication are not supported. The identification of motion type | |
35 and the check for validity of speculative motions requires | |
36 construction and analysis of the function's control flow graph. | |
37 | |
38 The main entry point for this pass is schedule_insns(), called for | |
39 each function. The work of the scheduler is organized in three | |
40 levels: (1) function level: insns are subject to splitting, | |
41 control-flow-graph is constructed, regions are computed (after | |
42 reload, each region is of one block), (2) region level: control | |
43 flow graph attributes required for interblock scheduling are | |
44 computed (dominators, reachability, etc.), data dependences and | |
45 priorities are computed, and (3) block level: insns in the block | |
46 are actually scheduled. */ | |
47 | |
48 #include "config.h" | |
49 #include "system.h" | |
50 #include "coretypes.h" | |
51 #include "tm.h" | |
52 #include "toplev.h" | |
53 #include "rtl.h" | |
54 #include "tm_p.h" | |
55 #include "hard-reg-set.h" | |
56 #include "regs.h" | |
57 #include "function.h" | |
58 #include "flags.h" | |
59 #include "insn-config.h" | |
60 #include "insn-attr.h" | |
61 #include "except.h" | |
62 #include "toplev.h" | |
63 #include "recog.h" | |
64 #include "cfglayout.h" | |
65 #include "params.h" | |
66 #include "sched-int.h" | |
67 #include "sel-sched.h" | |
68 #include "target.h" | |
69 #include "timevar.h" | |
70 #include "tree-pass.h" | |
71 #include "dbgcnt.h" | |
72 | |
73 #ifdef INSN_SCHEDULING | |
74 | |
75 /* Some accessor macros for h_i_d members only used within this file. */ | |
76 #define FED_BY_SPEC_LOAD(INSN) (HID (INSN)->fed_by_spec_load) | |
77 #define IS_LOAD_INSN(INSN) (HID (insn)->is_load_insn) | |
78 | |
79 /* nr_inter/spec counts interblock/speculative motion for the function. */ | |
80 static int nr_inter, nr_spec; | |
81 | |
82 static int is_cfg_nonregular (void); | |
83 | |
84 /* Number of regions in the procedure. */ | |
85 int nr_regions = 0; | |
86 | |
87 /* Table of region descriptions. */ | |
88 region *rgn_table = NULL; | |
89 | |
90 /* Array of lists of regions' blocks. */ | |
91 int *rgn_bb_table = NULL; | |
92 | |
93 /* Topological order of blocks in the region (if b2 is reachable from | |
94 b1, block_to_bb[b2] > block_to_bb[b1]). Note: A basic block is | |
95 always referred to by either block or b, while its topological | |
96 order name (in the region) is referred to by bb. */ | |
97 int *block_to_bb = NULL; | |
98 | |
99 /* The number of the region containing a block. */ | |
100 int *containing_rgn = NULL; | |
101 | |
102 /* ebb_head [i] - is index in rgn_bb_table of the head basic block of i'th ebb. | |
103 Currently we can get a ebb only through splitting of currently | |
104 scheduling block, therefore, we don't need ebb_head array for every region, | |
105 hence, its sufficient to hold it for current one only. */ | |
106 int *ebb_head = NULL; | |
107 | |
108 /* The minimum probability of reaching a source block so that it will be | |
109 considered for speculative scheduling. */ | |
110 static int min_spec_prob; | |
111 | |
112 static void find_single_block_region (bool); | |
113 static void find_rgns (void); | |
114 static bool too_large (int, int *, int *); | |
115 | |
116 /* Blocks of the current region being scheduled. */ | |
117 int current_nr_blocks; | |
118 int current_blocks; | |
119 | |
120 /* A speculative motion requires checking live information on the path | |
121 from 'source' to 'target'. The split blocks are those to be checked. | |
122 After a speculative motion, live information should be modified in | |
123 the 'update' blocks. | |
124 | |
125 Lists of split and update blocks for each candidate of the current | |
126 target are in array bblst_table. */ | |
127 static basic_block *bblst_table; | |
128 static int bblst_size, bblst_last; | |
129 | |
130 /* Target info declarations. | |
131 | |
132 The block currently being scheduled is referred to as the "target" block, | |
133 while other blocks in the region from which insns can be moved to the | |
134 target are called "source" blocks. The candidate structure holds info | |
135 about such sources: are they valid? Speculative? Etc. */ | |
136 typedef struct | |
137 { | |
138 basic_block *first_member; | |
139 int nr_members; | |
140 } | |
141 bblst; | |
142 | |
143 typedef struct | |
144 { | |
145 char is_valid; | |
146 char is_speculative; | |
147 int src_prob; | |
148 bblst split_bbs; | |
149 bblst update_bbs; | |
150 } | |
151 candidate; | |
152 | |
153 static candidate *candidate_table; | |
154 #define IS_VALID(src) (candidate_table[src].is_valid) | |
155 #define IS_SPECULATIVE(src) (candidate_table[src].is_speculative) | |
156 #define IS_SPECULATIVE_INSN(INSN) \ | |
157 (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN)))) | |
158 #define SRC_PROB(src) ( candidate_table[src].src_prob ) | |
159 | |
160 /* The bb being currently scheduled. */ | |
161 int target_bb; | |
162 | |
163 /* List of edges. */ | |
164 typedef struct | |
165 { | |
166 edge *first_member; | |
167 int nr_members; | |
168 } | |
169 edgelst; | |
170 | |
171 static edge *edgelst_table; | |
172 static int edgelst_last; | |
173 | |
174 static void extract_edgelst (sbitmap, edgelst *); | |
175 | |
176 /* Target info functions. */ | |
177 static void split_edges (int, int, edgelst *); | |
178 static void compute_trg_info (int); | |
179 void debug_candidate (int); | |
180 void debug_candidates (int); | |
181 | |
182 /* Dominators array: dom[i] contains the sbitmap of dominators of | |
183 bb i in the region. */ | |
184 static sbitmap *dom; | |
185 | |
186 /* bb 0 is the only region entry. */ | |
187 #define IS_RGN_ENTRY(bb) (!bb) | |
188 | |
189 /* Is bb_src dominated by bb_trg. */ | |
190 #define IS_DOMINATED(bb_src, bb_trg) \ | |
191 ( TEST_BIT (dom[bb_src], bb_trg) ) | |
192 | |
193 /* Probability: Prob[i] is an int in [0, REG_BR_PROB_BASE] which is | |
194 the probability of bb i relative to the region entry. */ | |
195 static int *prob; | |
196 | |
197 /* Bit-set of edges, where bit i stands for edge i. */ | |
198 typedef sbitmap edgeset; | |
199 | |
200 /* Number of edges in the region. */ | |
201 static int rgn_nr_edges; | |
202 | |
203 /* Array of size rgn_nr_edges. */ | |
204 static edge *rgn_edges; | |
205 | |
206 /* Mapping from each edge in the graph to its number in the rgn. */ | |
207 #define EDGE_TO_BIT(edge) ((int)(size_t)(edge)->aux) | |
208 #define SET_EDGE_TO_BIT(edge,nr) ((edge)->aux = (void *)(size_t)(nr)) | |
209 | |
210 /* The split edges of a source bb is different for each target | |
211 bb. In order to compute this efficiently, the 'potential-split edges' | |
212 are computed for each bb prior to scheduling a region. This is actually | |
213 the split edges of each bb relative to the region entry. | |
214 | |
215 pot_split[bb] is the set of potential split edges of bb. */ | |
216 static edgeset *pot_split; | |
217 | |
218 /* For every bb, a set of its ancestor edges. */ | |
219 static edgeset *ancestor_edges; | |
220 | |
221 #define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN)))) | |
222 | |
223 /* Speculative scheduling functions. */ | |
224 static int check_live_1 (int, rtx); | |
225 static void update_live_1 (int, rtx); | |
226 static int is_pfree (rtx, int, int); | |
227 static int find_conditional_protection (rtx, int); | |
228 static int is_conditionally_protected (rtx, int, int); | |
229 static int is_prisky (rtx, int, int); | |
230 static int is_exception_free (rtx, int, int); | |
231 | |
232 static bool sets_likely_spilled (rtx); | |
233 static void sets_likely_spilled_1 (rtx, const_rtx, void *); | |
234 static void add_branch_dependences (rtx, rtx); | |
235 static void compute_block_dependences (int); | |
236 | |
237 static void schedule_region (int); | |
238 static rtx concat_INSN_LIST (rtx, rtx); | |
239 static void concat_insn_mem_list (rtx, rtx, rtx *, rtx *); | |
240 static void propagate_deps (int, struct deps *); | |
241 static void free_pending_lists (void); | |
242 | |
243 /* Functions for construction of the control flow graph. */ | |
244 | |
245 /* Return 1 if control flow graph should not be constructed, 0 otherwise. | |
246 | |
247 We decide not to build the control flow graph if there is possibly more | |
248 than one entry to the function, if computed branches exist, if we | |
249 have nonlocal gotos, or if we have an unreachable loop. */ | |
250 | |
251 static int | |
252 is_cfg_nonregular (void) | |
253 { | |
254 basic_block b; | |
255 rtx insn; | |
256 | |
257 /* If we have a label that could be the target of a nonlocal goto, then | |
258 the cfg is not well structured. */ | |
259 if (nonlocal_goto_handler_labels) | |
260 return 1; | |
261 | |
262 /* If we have any forced labels, then the cfg is not well structured. */ | |
263 if (forced_labels) | |
264 return 1; | |
265 | |
266 /* If we have exception handlers, then we consider the cfg not well | |
267 structured. ?!? We should be able to handle this now that we | |
268 compute an accurate cfg for EH. */ | |
269 if (current_function_has_exception_handlers ()) | |
270 return 1; | |
271 | |
272 /* If we have insns which refer to labels as non-jumped-to operands, | |
273 then we consider the cfg not well structured. */ | |
274 FOR_EACH_BB (b) | |
275 FOR_BB_INSNS (b, insn) | |
276 { | |
277 rtx note, next, set, dest; | |
278 | |
279 /* If this function has a computed jump, then we consider the cfg | |
280 not well structured. */ | |
281 if (JUMP_P (insn) && computed_jump_p (insn)) | |
282 return 1; | |
283 | |
284 if (!INSN_P (insn)) | |
285 continue; | |
286 | |
287 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX); | |
288 if (note == NULL_RTX) | |
289 continue; | |
290 | |
291 /* For that label not to be seen as a referred-to label, this | |
292 must be a single-set which is feeding a jump *only*. This | |
293 could be a conditional jump with the label split off for | |
294 machine-specific reasons or a casesi/tablejump. */ | |
295 next = next_nonnote_insn (insn); | |
296 if (next == NULL_RTX | |
297 || !JUMP_P (next) | |
298 || (JUMP_LABEL (next) != XEXP (note, 0) | |
299 && find_reg_note (next, REG_LABEL_TARGET, | |
300 XEXP (note, 0)) == NULL_RTX) | |
301 || BLOCK_FOR_INSN (insn) != BLOCK_FOR_INSN (next)) | |
302 return 1; | |
303 | |
304 set = single_set (insn); | |
305 if (set == NULL_RTX) | |
306 return 1; | |
307 | |
308 dest = SET_DEST (set); | |
309 if (!REG_P (dest) || !dead_or_set_p (next, dest)) | |
310 return 1; | |
311 } | |
312 | |
313 /* Unreachable loops with more than one basic block are detected | |
314 during the DFS traversal in find_rgns. | |
315 | |
316 Unreachable loops with a single block are detected here. This | |
317 test is redundant with the one in find_rgns, but it's much | |
318 cheaper to go ahead and catch the trivial case here. */ | |
319 FOR_EACH_BB (b) | |
320 { | |
321 if (EDGE_COUNT (b->preds) == 0 | |
322 || (single_pred_p (b) | |
323 && single_pred (b) == b)) | |
324 return 1; | |
325 } | |
326 | |
327 /* All the tests passed. Consider the cfg well structured. */ | |
328 return 0; | |
329 } | |
330 | |
331 /* Extract list of edges from a bitmap containing EDGE_TO_BIT bits. */ | |
332 | |
333 static void | |
334 extract_edgelst (sbitmap set, edgelst *el) | |
335 { | |
336 unsigned int i = 0; | |
337 sbitmap_iterator sbi; | |
338 | |
339 /* edgelst table space is reused in each call to extract_edgelst. */ | |
340 edgelst_last = 0; | |
341 | |
342 el->first_member = &edgelst_table[edgelst_last]; | |
343 el->nr_members = 0; | |
344 | |
345 /* Iterate over each word in the bitset. */ | |
346 EXECUTE_IF_SET_IN_SBITMAP (set, 0, i, sbi) | |
347 { | |
348 edgelst_table[edgelst_last++] = rgn_edges[i]; | |
349 el->nr_members++; | |
350 } | |
351 } | |
352 | |
353 /* Functions for the construction of regions. */ | |
354 | |
355 /* Print the regions, for debugging purposes. Callable from debugger. */ | |
356 | |
357 void | |
358 debug_regions (void) | |
359 { | |
360 int rgn, bb; | |
361 | |
362 fprintf (sched_dump, "\n;; ------------ REGIONS ----------\n\n"); | |
363 for (rgn = 0; rgn < nr_regions; rgn++) | |
364 { | |
365 fprintf (sched_dump, ";;\trgn %d nr_blocks %d:\n", rgn, | |
366 rgn_table[rgn].rgn_nr_blocks); | |
367 fprintf (sched_dump, ";;\tbb/block: "); | |
368 | |
369 /* We don't have ebb_head initialized yet, so we can't use | |
370 BB_TO_BLOCK (). */ | |
371 current_blocks = RGN_BLOCKS (rgn); | |
372 | |
373 for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++) | |
374 fprintf (sched_dump, " %d/%d ", bb, rgn_bb_table[current_blocks + bb]); | |
375 | |
376 fprintf (sched_dump, "\n\n"); | |
377 } | |
378 } | |
379 | |
380 /* Print the region's basic blocks. */ | |
381 | |
382 void | |
383 debug_region (int rgn) | |
384 { | |
385 int bb; | |
386 | |
387 fprintf (stderr, "\n;; ------------ REGION %d ----------\n\n", rgn); | |
388 fprintf (stderr, ";;\trgn %d nr_blocks %d:\n", rgn, | |
389 rgn_table[rgn].rgn_nr_blocks); | |
390 fprintf (stderr, ";;\tbb/block: "); | |
391 | |
392 /* We don't have ebb_head initialized yet, so we can't use | |
393 BB_TO_BLOCK (). */ | |
394 current_blocks = RGN_BLOCKS (rgn); | |
395 | |
396 for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++) | |
397 fprintf (stderr, " %d/%d ", bb, rgn_bb_table[current_blocks + bb]); | |
398 | |
399 fprintf (stderr, "\n\n"); | |
400 | |
401 for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++) | |
402 { | |
403 debug_bb_n_slim (rgn_bb_table[current_blocks + bb]); | |
404 fprintf (stderr, "\n"); | |
405 } | |
406 | |
407 fprintf (stderr, "\n"); | |
408 | |
409 } | |
410 | |
411 /* True when a bb with index BB_INDEX contained in region RGN. */ | |
412 static bool | |
413 bb_in_region_p (int bb_index, int rgn) | |
414 { | |
415 int i; | |
416 | |
417 for (i = 0; i < rgn_table[rgn].rgn_nr_blocks; i++) | |
418 if (rgn_bb_table[current_blocks + i] == bb_index) | |
419 return true; | |
420 | |
421 return false; | |
422 } | |
423 | |
424 /* Dump region RGN to file F using dot syntax. */ | |
425 void | |
426 dump_region_dot (FILE *f, int rgn) | |
427 { | |
428 int i; | |
429 | |
430 fprintf (f, "digraph Region_%d {\n", rgn); | |
431 | |
432 /* We don't have ebb_head initialized yet, so we can't use | |
433 BB_TO_BLOCK (). */ | |
434 current_blocks = RGN_BLOCKS (rgn); | |
435 | |
436 for (i = 0; i < rgn_table[rgn].rgn_nr_blocks; i++) | |
437 { | |
438 edge e; | |
439 edge_iterator ei; | |
440 int src_bb_num = rgn_bb_table[current_blocks + i]; | |
441 struct basic_block_def *bb = BASIC_BLOCK (src_bb_num); | |
442 | |
443 FOR_EACH_EDGE (e, ei, bb->succs) | |
444 if (bb_in_region_p (e->dest->index, rgn)) | |
445 fprintf (f, "\t%d -> %d\n", src_bb_num, e->dest->index); | |
446 } | |
447 fprintf (f, "}\n"); | |
448 } | |
449 | |
450 /* The same, but first open a file specified by FNAME. */ | |
451 void | |
452 dump_region_dot_file (const char *fname, int rgn) | |
453 { | |
454 FILE *f = fopen (fname, "wt"); | |
455 dump_region_dot (f, rgn); | |
456 fclose (f); | |
457 } | |
458 | |
459 /* Build a single block region for each basic block in the function. | |
460 This allows for using the same code for interblock and basic block | |
461 scheduling. */ | |
462 | |
463 static void | |
464 find_single_block_region (bool ebbs_p) | |
465 { | |
466 basic_block bb, ebb_start; | |
467 int i = 0; | |
468 | |
469 nr_regions = 0; | |
470 | |
471 if (ebbs_p) { | |
472 int probability_cutoff; | |
473 if (profile_info && flag_branch_probabilities) | |
474 probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY_FEEDBACK); | |
475 else | |
476 probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY); | |
477 probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff; | |
478 | |
479 FOR_EACH_BB (ebb_start) | |
480 { | |
481 RGN_NR_BLOCKS (nr_regions) = 0; | |
482 RGN_BLOCKS (nr_regions) = i; | |
483 RGN_DONT_CALC_DEPS (nr_regions) = 0; | |
484 RGN_HAS_REAL_EBB (nr_regions) = 0; | |
485 | |
486 for (bb = ebb_start; ; bb = bb->next_bb) | |
487 { | |
488 edge e; | |
489 edge_iterator ei; | |
490 | |
491 rgn_bb_table[i] = bb->index; | |
492 RGN_NR_BLOCKS (nr_regions)++; | |
493 CONTAINING_RGN (bb->index) = nr_regions; | |
494 BLOCK_TO_BB (bb->index) = i - RGN_BLOCKS (nr_regions); | |
495 i++; | |
496 | |
497 if (bb->next_bb == EXIT_BLOCK_PTR | |
498 || LABEL_P (BB_HEAD (bb->next_bb))) | |
499 break; | |
500 | |
501 FOR_EACH_EDGE (e, ei, bb->succs) | |
502 if ((e->flags & EDGE_FALLTHRU) != 0) | |
503 break; | |
504 if (! e) | |
505 break; | |
506 if (e->probability <= probability_cutoff) | |
507 break; | |
508 } | |
509 | |
510 ebb_start = bb; | |
511 nr_regions++; | |
512 } | |
513 } | |
514 else | |
515 FOR_EACH_BB (bb) | |
516 { | |
517 rgn_bb_table[nr_regions] = bb->index; | |
518 RGN_NR_BLOCKS (nr_regions) = 1; | |
519 RGN_BLOCKS (nr_regions) = nr_regions; | |
520 RGN_DONT_CALC_DEPS (nr_regions) = 0; | |
521 RGN_HAS_REAL_EBB (nr_regions) = 0; | |
522 | |
523 CONTAINING_RGN (bb->index) = nr_regions; | |
524 BLOCK_TO_BB (bb->index) = 0; | |
525 nr_regions++; | |
526 } | |
527 } | |
528 | |
529 /* Estimate number of the insns in the BB. */ | |
530 static int | |
531 rgn_estimate_number_of_insns (basic_block bb) | |
532 { | |
533 return INSN_LUID (BB_END (bb)) - INSN_LUID (BB_HEAD (bb)); | |
534 } | |
535 | |
536 /* Update number of blocks and the estimate for number of insns | |
537 in the region. Return true if the region is "too large" for interblock | |
538 scheduling (compile time considerations). */ | |
539 | |
540 static bool | |
541 too_large (int block, int *num_bbs, int *num_insns) | |
542 { | |
543 (*num_bbs)++; | |
544 (*num_insns) += (common_sched_info->estimate_number_of_insns | |
545 (BASIC_BLOCK (block))); | |
546 | |
547 return ((*num_bbs > PARAM_VALUE (PARAM_MAX_SCHED_REGION_BLOCKS)) | |
548 || (*num_insns > PARAM_VALUE (PARAM_MAX_SCHED_REGION_INSNS))); | |
549 } | |
550 | |
551 /* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk] | |
552 is still an inner loop. Put in max_hdr[blk] the header of the most inner | |
553 loop containing blk. */ | |
554 #define UPDATE_LOOP_RELATIONS(blk, hdr) \ | |
555 { \ | |
556 if (max_hdr[blk] == -1) \ | |
557 max_hdr[blk] = hdr; \ | |
558 else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr]) \ | |
559 RESET_BIT (inner, hdr); \ | |
560 else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr]) \ | |
561 { \ | |
562 RESET_BIT (inner,max_hdr[blk]); \ | |
563 max_hdr[blk] = hdr; \ | |
564 } \ | |
565 } | |
566 | |
567 /* Find regions for interblock scheduling. | |
568 | |
569 A region for scheduling can be: | |
570 | |
571 * A loop-free procedure, or | |
572 | |
573 * A reducible inner loop, or | |
574 | |
575 * A basic block not contained in any other region. | |
576 | |
577 ?!? In theory we could build other regions based on extended basic | |
578 blocks or reverse extended basic blocks. Is it worth the trouble? | |
579 | |
580 Loop blocks that form a region are put into the region's block list | |
581 in topological order. | |
582 | |
583 This procedure stores its results into the following global (ick) variables | |
584 | |
585 * rgn_nr | |
586 * rgn_table | |
587 * rgn_bb_table | |
588 * block_to_bb | |
589 * containing region | |
590 | |
591 We use dominator relationships to avoid making regions out of non-reducible | |
592 loops. | |
593 | |
594 This procedure needs to be converted to work on pred/succ lists instead | |
595 of edge tables. That would simplify it somewhat. */ | |
596 | |
597 static void | |
598 haifa_find_rgns (void) | |
599 { | |
600 int *max_hdr, *dfs_nr, *degree; | |
601 char no_loops = 1; | |
602 int node, child, loop_head, i, head, tail; | |
603 int count = 0, sp, idx = 0; | |
604 edge_iterator current_edge; | |
605 edge_iterator *stack; | |
606 int num_bbs, num_insns, unreachable; | |
607 int too_large_failure; | |
608 basic_block bb; | |
609 | |
610 /* Note if a block is a natural loop header. */ | |
611 sbitmap header; | |
612 | |
613 /* Note if a block is a natural inner loop header. */ | |
614 sbitmap inner; | |
615 | |
616 /* Note if a block is in the block queue. */ | |
617 sbitmap in_queue; | |
618 | |
619 /* Note if a block is in the block queue. */ | |
620 sbitmap in_stack; | |
621 | |
622 /* Perform a DFS traversal of the cfg. Identify loop headers, inner loops | |
623 and a mapping from block to its loop header (if the block is contained | |
624 in a loop, else -1). | |
625 | |
626 Store results in HEADER, INNER, and MAX_HDR respectively, these will | |
627 be used as inputs to the second traversal. | |
628 | |
629 STACK, SP and DFS_NR are only used during the first traversal. */ | |
630 | |
631 /* Allocate and initialize variables for the first traversal. */ | |
632 max_hdr = XNEWVEC (int, last_basic_block); | |
633 dfs_nr = XCNEWVEC (int, last_basic_block); | |
634 stack = XNEWVEC (edge_iterator, n_edges); | |
635 | |
636 inner = sbitmap_alloc (last_basic_block); | |
637 sbitmap_ones (inner); | |
638 | |
639 header = sbitmap_alloc (last_basic_block); | |
640 sbitmap_zero (header); | |
641 | |
642 in_queue = sbitmap_alloc (last_basic_block); | |
643 sbitmap_zero (in_queue); | |
644 | |
645 in_stack = sbitmap_alloc (last_basic_block); | |
646 sbitmap_zero (in_stack); | |
647 | |
648 for (i = 0; i < last_basic_block; i++) | |
649 max_hdr[i] = -1; | |
650 | |
651 #define EDGE_PASSED(E) (ei_end_p ((E)) || ei_edge ((E))->aux) | |
652 #define SET_EDGE_PASSED(E) (ei_edge ((E))->aux = ei_edge ((E))) | |
653 | |
654 /* DFS traversal to find inner loops in the cfg. */ | |
655 | |
656 current_edge = ei_start (single_succ (ENTRY_BLOCK_PTR)->succs); | |
657 sp = -1; | |
658 | |
659 while (1) | |
660 { | |
661 if (EDGE_PASSED (current_edge)) | |
662 { | |
663 /* We have reached a leaf node or a node that was already | |
664 processed. Pop edges off the stack until we find | |
665 an edge that has not yet been processed. */ | |
666 while (sp >= 0 && EDGE_PASSED (current_edge)) | |
667 { | |
668 /* Pop entry off the stack. */ | |
669 current_edge = stack[sp--]; | |
670 node = ei_edge (current_edge)->src->index; | |
671 gcc_assert (node != ENTRY_BLOCK); | |
672 child = ei_edge (current_edge)->dest->index; | |
673 gcc_assert (child != EXIT_BLOCK); | |
674 RESET_BIT (in_stack, child); | |
675 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child])) | |
676 UPDATE_LOOP_RELATIONS (node, max_hdr[child]); | |
677 ei_next (¤t_edge); | |
678 } | |
679 | |
680 /* See if have finished the DFS tree traversal. */ | |
681 if (sp < 0 && EDGE_PASSED (current_edge)) | |
682 break; | |
683 | |
684 /* Nope, continue the traversal with the popped node. */ | |
685 continue; | |
686 } | |
687 | |
688 /* Process a node. */ | |
689 node = ei_edge (current_edge)->src->index; | |
690 gcc_assert (node != ENTRY_BLOCK); | |
691 SET_BIT (in_stack, node); | |
692 dfs_nr[node] = ++count; | |
693 | |
694 /* We don't traverse to the exit block. */ | |
695 child = ei_edge (current_edge)->dest->index; | |
696 if (child == EXIT_BLOCK) | |
697 { | |
698 SET_EDGE_PASSED (current_edge); | |
699 ei_next (¤t_edge); | |
700 continue; | |
701 } | |
702 | |
703 /* If the successor is in the stack, then we've found a loop. | |
704 Mark the loop, if it is not a natural loop, then it will | |
705 be rejected during the second traversal. */ | |
706 if (TEST_BIT (in_stack, child)) | |
707 { | |
708 no_loops = 0; | |
709 SET_BIT (header, child); | |
710 UPDATE_LOOP_RELATIONS (node, child); | |
711 SET_EDGE_PASSED (current_edge); | |
712 ei_next (¤t_edge); | |
713 continue; | |
714 } | |
715 | |
716 /* If the child was already visited, then there is no need to visit | |
717 it again. Just update the loop relationships and restart | |
718 with a new edge. */ | |
719 if (dfs_nr[child]) | |
720 { | |
721 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child])) | |
722 UPDATE_LOOP_RELATIONS (node, max_hdr[child]); | |
723 SET_EDGE_PASSED (current_edge); | |
724 ei_next (¤t_edge); | |
725 continue; | |
726 } | |
727 | |
728 /* Push an entry on the stack and continue DFS traversal. */ | |
729 stack[++sp] = current_edge; | |
730 SET_EDGE_PASSED (current_edge); | |
731 current_edge = ei_start (ei_edge (current_edge)->dest->succs); | |
732 } | |
733 | |
734 /* Reset ->aux field used by EDGE_PASSED. */ | |
735 FOR_ALL_BB (bb) | |
736 { | |
737 edge_iterator ei; | |
738 edge e; | |
739 FOR_EACH_EDGE (e, ei, bb->succs) | |
740 e->aux = NULL; | |
741 } | |
742 | |
743 | |
744 /* Another check for unreachable blocks. The earlier test in | |
745 is_cfg_nonregular only finds unreachable blocks that do not | |
746 form a loop. | |
747 | |
748 The DFS traversal will mark every block that is reachable from | |
749 the entry node by placing a nonzero value in dfs_nr. Thus if | |
750 dfs_nr is zero for any block, then it must be unreachable. */ | |
751 unreachable = 0; | |
752 FOR_EACH_BB (bb) | |
753 if (dfs_nr[bb->index] == 0) | |
754 { | |
755 unreachable = 1; | |
756 break; | |
757 } | |
758 | |
759 /* Gross. To avoid wasting memory, the second pass uses the dfs_nr array | |
760 to hold degree counts. */ | |
761 degree = dfs_nr; | |
762 | |
763 FOR_EACH_BB (bb) | |
764 degree[bb->index] = EDGE_COUNT (bb->preds); | |
765 | |
766 /* Do not perform region scheduling if there are any unreachable | |
767 blocks. */ | |
768 if (!unreachable) | |
769 { | |
770 int *queue, *degree1 = NULL; | |
771 /* We use EXTENDED_RGN_HEADER as an addition to HEADER and put | |
772 there basic blocks, which are forced to be region heads. | |
773 This is done to try to assemble few smaller regions | |
774 from a too_large region. */ | |
775 sbitmap extended_rgn_header = NULL; | |
776 bool extend_regions_p; | |
777 | |
778 if (no_loops) | |
779 SET_BIT (header, 0); | |
780 | |
781 /* Second traversal:find reducible inner loops and topologically sort | |
782 block of each region. */ | |
783 | |
784 queue = XNEWVEC (int, n_basic_blocks); | |
785 | |
786 extend_regions_p = PARAM_VALUE (PARAM_MAX_SCHED_EXTEND_REGIONS_ITERS) > 0; | |
787 if (extend_regions_p) | |
788 { | |
789 degree1 = XNEWVEC (int, last_basic_block); | |
790 extended_rgn_header = sbitmap_alloc (last_basic_block); | |
791 sbitmap_zero (extended_rgn_header); | |
792 } | |
793 | |
794 /* Find blocks which are inner loop headers. We still have non-reducible | |
795 loops to consider at this point. */ | |
796 FOR_EACH_BB (bb) | |
797 { | |
798 if (TEST_BIT (header, bb->index) && TEST_BIT (inner, bb->index)) | |
799 { | |
800 edge e; | |
801 edge_iterator ei; | |
802 basic_block jbb; | |
803 | |
804 /* Now check that the loop is reducible. We do this separate | |
805 from finding inner loops so that we do not find a reducible | |
806 loop which contains an inner non-reducible loop. | |
807 | |
808 A simple way to find reducible/natural loops is to verify | |
809 that each block in the loop is dominated by the loop | |
810 header. | |
811 | |
812 If there exists a block that is not dominated by the loop | |
813 header, then the block is reachable from outside the loop | |
814 and thus the loop is not a natural loop. */ | |
815 FOR_EACH_BB (jbb) | |
816 { | |
817 /* First identify blocks in the loop, except for the loop | |
818 entry block. */ | |
819 if (bb->index == max_hdr[jbb->index] && bb != jbb) | |
820 { | |
821 /* Now verify that the block is dominated by the loop | |
822 header. */ | |
823 if (!dominated_by_p (CDI_DOMINATORS, jbb, bb)) | |
824 break; | |
825 } | |
826 } | |
827 | |
828 /* If we exited the loop early, then I is the header of | |
829 a non-reducible loop and we should quit processing it | |
830 now. */ | |
831 if (jbb != EXIT_BLOCK_PTR) | |
832 continue; | |
833 | |
834 /* I is a header of an inner loop, or block 0 in a subroutine | |
835 with no loops at all. */ | |
836 head = tail = -1; | |
837 too_large_failure = 0; | |
838 loop_head = max_hdr[bb->index]; | |
839 | |
840 if (extend_regions_p) | |
841 /* We save degree in case when we meet a too_large region | |
842 and cancel it. We need a correct degree later when | |
843 calling extend_rgns. */ | |
844 memcpy (degree1, degree, last_basic_block * sizeof (int)); | |
845 | |
846 /* Decrease degree of all I's successors for topological | |
847 ordering. */ | |
848 FOR_EACH_EDGE (e, ei, bb->succs) | |
849 if (e->dest != EXIT_BLOCK_PTR) | |
850 --degree[e->dest->index]; | |
851 | |
852 /* Estimate # insns, and count # blocks in the region. */ | |
853 num_bbs = 1; | |
854 num_insns = common_sched_info->estimate_number_of_insns (bb); | |
855 | |
856 /* Find all loop latches (blocks with back edges to the loop | |
857 header) or all the leaf blocks in the cfg has no loops. | |
858 | |
859 Place those blocks into the queue. */ | |
860 if (no_loops) | |
861 { | |
862 FOR_EACH_BB (jbb) | |
863 /* Leaf nodes have only a single successor which must | |
864 be EXIT_BLOCK. */ | |
865 if (single_succ_p (jbb) | |
866 && single_succ (jbb) == EXIT_BLOCK_PTR) | |
867 { | |
868 queue[++tail] = jbb->index; | |
869 SET_BIT (in_queue, jbb->index); | |
870 | |
871 if (too_large (jbb->index, &num_bbs, &num_insns)) | |
872 { | |
873 too_large_failure = 1; | |
874 break; | |
875 } | |
876 } | |
877 } | |
878 else | |
879 { | |
880 edge e; | |
881 | |
882 FOR_EACH_EDGE (e, ei, bb->preds) | |
883 { | |
884 if (e->src == ENTRY_BLOCK_PTR) | |
885 continue; | |
886 | |
887 node = e->src->index; | |
888 | |
889 if (max_hdr[node] == loop_head && node != bb->index) | |
890 { | |
891 /* This is a loop latch. */ | |
892 queue[++tail] = node; | |
893 SET_BIT (in_queue, node); | |
894 | |
895 if (too_large (node, &num_bbs, &num_insns)) | |
896 { | |
897 too_large_failure = 1; | |
898 break; | |
899 } | |
900 } | |
901 } | |
902 } | |
903 | |
904 /* Now add all the blocks in the loop to the queue. | |
905 | |
906 We know the loop is a natural loop; however the algorithm | |
907 above will not always mark certain blocks as being in the | |
908 loop. Consider: | |
909 node children | |
910 a b,c | |
911 b c | |
912 c a,d | |
913 d b | |
914 | |
915 The algorithm in the DFS traversal may not mark B & D as part | |
916 of the loop (i.e. they will not have max_hdr set to A). | |
917 | |
918 We know they can not be loop latches (else they would have | |
919 had max_hdr set since they'd have a backedge to a dominator | |
920 block). So we don't need them on the initial queue. | |
921 | |
922 We know they are part of the loop because they are dominated | |
923 by the loop header and can be reached by a backwards walk of | |
924 the edges starting with nodes on the initial queue. | |
925 | |
926 It is safe and desirable to include those nodes in the | |
927 loop/scheduling region. To do so we would need to decrease | |
928 the degree of a node if it is the target of a backedge | |
929 within the loop itself as the node is placed in the queue. | |
930 | |
931 We do not do this because I'm not sure that the actual | |
932 scheduling code will properly handle this case. ?!? */ | |
933 | |
934 while (head < tail && !too_large_failure) | |
935 { | |
936 edge e; | |
937 child = queue[++head]; | |
938 | |
939 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (child)->preds) | |
940 { | |
941 node = e->src->index; | |
942 | |
943 /* See discussion above about nodes not marked as in | |
944 this loop during the initial DFS traversal. */ | |
945 if (e->src == ENTRY_BLOCK_PTR | |
946 || max_hdr[node] != loop_head) | |
947 { | |
948 tail = -1; | |
949 break; | |
950 } | |
951 else if (!TEST_BIT (in_queue, node) && node != bb->index) | |
952 { | |
953 queue[++tail] = node; | |
954 SET_BIT (in_queue, node); | |
955 | |
956 if (too_large (node, &num_bbs, &num_insns)) | |
957 { | |
958 too_large_failure = 1; | |
959 break; | |
960 } | |
961 } | |
962 } | |
963 } | |
964 | |
965 if (tail >= 0 && !too_large_failure) | |
966 { | |
967 /* Place the loop header into list of region blocks. */ | |
968 degree[bb->index] = -1; | |
969 rgn_bb_table[idx] = bb->index; | |
970 RGN_NR_BLOCKS (nr_regions) = num_bbs; | |
971 RGN_BLOCKS (nr_regions) = idx++; | |
972 RGN_DONT_CALC_DEPS (nr_regions) = 0; | |
973 RGN_HAS_REAL_EBB (nr_regions) = 0; | |
974 CONTAINING_RGN (bb->index) = nr_regions; | |
975 BLOCK_TO_BB (bb->index) = count = 0; | |
976 | |
977 /* Remove blocks from queue[] when their in degree | |
978 becomes zero. Repeat until no blocks are left on the | |
979 list. This produces a topological list of blocks in | |
980 the region. */ | |
981 while (tail >= 0) | |
982 { | |
983 if (head < 0) | |
984 head = tail; | |
985 child = queue[head]; | |
986 if (degree[child] == 0) | |
987 { | |
988 edge e; | |
989 | |
990 degree[child] = -1; | |
991 rgn_bb_table[idx++] = child; | |
992 BLOCK_TO_BB (child) = ++count; | |
993 CONTAINING_RGN (child) = nr_regions; | |
994 queue[head] = queue[tail--]; | |
995 | |
996 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (child)->succs) | |
997 if (e->dest != EXIT_BLOCK_PTR) | |
998 --degree[e->dest->index]; | |
999 } | |
1000 else | |
1001 --head; | |
1002 } | |
1003 ++nr_regions; | |
1004 } | |
1005 else if (extend_regions_p) | |
1006 { | |
1007 /* Restore DEGREE. */ | |
1008 int *t = degree; | |
1009 | |
1010 degree = degree1; | |
1011 degree1 = t; | |
1012 | |
1013 /* And force successors of BB to be region heads. | |
1014 This may provide several smaller regions instead | |
1015 of one too_large region. */ | |
1016 FOR_EACH_EDGE (e, ei, bb->succs) | |
1017 if (e->dest != EXIT_BLOCK_PTR) | |
1018 SET_BIT (extended_rgn_header, e->dest->index); | |
1019 } | |
1020 } | |
1021 } | |
1022 free (queue); | |
1023 | |
1024 if (extend_regions_p) | |
1025 { | |
1026 free (degree1); | |
1027 | |
1028 sbitmap_a_or_b (header, header, extended_rgn_header); | |
1029 sbitmap_free (extended_rgn_header); | |
1030 | |
1031 extend_rgns (degree, &idx, header, max_hdr); | |
1032 } | |
1033 } | |
1034 | |
1035 /* Any block that did not end up in a region is placed into a region | |
1036 by itself. */ | |
1037 FOR_EACH_BB (bb) | |
1038 if (degree[bb->index] >= 0) | |
1039 { | |
1040 rgn_bb_table[idx] = bb->index; | |
1041 RGN_NR_BLOCKS (nr_regions) = 1; | |
1042 RGN_BLOCKS (nr_regions) = idx++; | |
1043 RGN_DONT_CALC_DEPS (nr_regions) = 0; | |
1044 RGN_HAS_REAL_EBB (nr_regions) = 0; | |
1045 CONTAINING_RGN (bb->index) = nr_regions++; | |
1046 BLOCK_TO_BB (bb->index) = 0; | |
1047 } | |
1048 | |
1049 free (max_hdr); | |
1050 free (degree); | |
1051 free (stack); | |
1052 sbitmap_free (header); | |
1053 sbitmap_free (inner); | |
1054 sbitmap_free (in_queue); | |
1055 sbitmap_free (in_stack); | |
1056 } | |
1057 | |
1058 | |
1059 /* Wrapper function. | |
1060 If FLAG_SEL_SCHED_PIPELINING is set, then use custom function to form | |
1061 regions. Otherwise just call find_rgns_haifa. */ | |
1062 static void | |
1063 find_rgns (void) | |
1064 { | |
1065 if (sel_sched_p () && flag_sel_sched_pipelining) | |
1066 sel_find_rgns (); | |
1067 else | |
1068 haifa_find_rgns (); | |
1069 } | |
1070 | |
1071 static int gather_region_statistics (int **); | |
1072 static void print_region_statistics (int *, int, int *, int); | |
1073 | |
1074 /* Calculate the histogram that shows the number of regions having the | |
1075 given number of basic blocks, and store it in the RSP array. Return | |
1076 the size of this array. */ | |
1077 static int | |
1078 gather_region_statistics (int **rsp) | |
1079 { | |
1080 int i, *a = 0, a_sz = 0; | |
1081 | |
1082 /* a[i] is the number of regions that have (i + 1) basic blocks. */ | |
1083 for (i = 0; i < nr_regions; i++) | |
1084 { | |
1085 int nr_blocks = RGN_NR_BLOCKS (i); | |
1086 | |
1087 gcc_assert (nr_blocks >= 1); | |
1088 | |
1089 if (nr_blocks > a_sz) | |
1090 { | |
1091 a = XRESIZEVEC (int, a, nr_blocks); | |
1092 do | |
1093 a[a_sz++] = 0; | |
1094 while (a_sz != nr_blocks); | |
1095 } | |
1096 | |
1097 a[nr_blocks - 1]++; | |
1098 } | |
1099 | |
1100 *rsp = a; | |
1101 return a_sz; | |
1102 } | |
1103 | |
1104 /* Print regions statistics. S1 and S2 denote the data before and after | |
1105 calling extend_rgns, respectively. */ | |
1106 static void | |
1107 print_region_statistics (int *s1, int s1_sz, int *s2, int s2_sz) | |
1108 { | |
1109 int i; | |
1110 | |
1111 /* We iterate until s2_sz because extend_rgns does not decrease | |
1112 the maximal region size. */ | |
1113 for (i = 1; i < s2_sz; i++) | |
1114 { | |
1115 int n1, n2; | |
1116 | |
1117 n2 = s2[i]; | |
1118 | |
1119 if (n2 == 0) | |
1120 continue; | |
1121 | |
1122 if (i >= s1_sz) | |
1123 n1 = 0; | |
1124 else | |
1125 n1 = s1[i]; | |
1126 | |
1127 fprintf (sched_dump, ";; Region extension statistics: size %d: " \ | |
1128 "was %d + %d more\n", i + 1, n1, n2 - n1); | |
1129 } | |
1130 } | |
1131 | |
1132 /* Extend regions. | |
1133 DEGREE - Array of incoming edge count, considering only | |
1134 the edges, that don't have their sources in formed regions yet. | |
1135 IDXP - pointer to the next available index in rgn_bb_table. | |
1136 HEADER - set of all region heads. | |
1137 LOOP_HDR - mapping from block to the containing loop | |
1138 (two blocks can reside within one region if they have | |
1139 the same loop header). */ | |
1140 void | |
1141 extend_rgns (int *degree, int *idxp, sbitmap header, int *loop_hdr) | |
1142 { | |
1143 int *order, i, rescan = 0, idx = *idxp, iter = 0, max_iter, *max_hdr; | |
1144 int nblocks = n_basic_blocks - NUM_FIXED_BLOCKS; | |
1145 | |
1146 max_iter = PARAM_VALUE (PARAM_MAX_SCHED_EXTEND_REGIONS_ITERS); | |
1147 | |
1148 max_hdr = XNEWVEC (int, last_basic_block); | |
1149 | |
1150 order = XNEWVEC (int, last_basic_block); | |
1151 post_order_compute (order, false, false); | |
1152 | |
1153 for (i = nblocks - 1; i >= 0; i--) | |
1154 { | |
1155 int bbn = order[i]; | |
1156 if (degree[bbn] >= 0) | |
1157 { | |
1158 max_hdr[bbn] = bbn; | |
1159 rescan = 1; | |
1160 } | |
1161 else | |
1162 /* This block already was processed in find_rgns. */ | |
1163 max_hdr[bbn] = -1; | |
1164 } | |
1165 | |
1166 /* The idea is to topologically walk through CFG in top-down order. | |
1167 During the traversal, if all the predecessors of a node are | |
1168 marked to be in the same region (they all have the same max_hdr), | |
1169 then current node is also marked to be a part of that region. | |
1170 Otherwise the node starts its own region. | |
1171 CFG should be traversed until no further changes are made. On each | |
1172 iteration the set of the region heads is extended (the set of those | |
1173 blocks that have max_hdr[bbi] == bbi). This set is upper bounded by the | |
1174 set of all basic blocks, thus the algorithm is guaranteed to | |
1175 terminate. */ | |
1176 | |
1177 while (rescan && iter < max_iter) | |
1178 { | |
1179 rescan = 0; | |
1180 | |
1181 for (i = nblocks - 1; i >= 0; i--) | |
1182 { | |
1183 edge e; | |
1184 edge_iterator ei; | |
1185 int bbn = order[i]; | |
1186 | |
1187 if (max_hdr[bbn] != -1 && !TEST_BIT (header, bbn)) | |
1188 { | |
1189 int hdr = -1; | |
1190 | |
1191 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (bbn)->preds) | |
1192 { | |
1193 int predn = e->src->index; | |
1194 | |
1195 if (predn != ENTRY_BLOCK | |
1196 /* If pred wasn't processed in find_rgns. */ | |
1197 && max_hdr[predn] != -1 | |
1198 /* And pred and bb reside in the same loop. | |
1199 (Or out of any loop). */ | |
1200 && loop_hdr[bbn] == loop_hdr[predn]) | |
1201 { | |
1202 if (hdr == -1) | |
1203 /* Then bb extends the containing region of pred. */ | |
1204 hdr = max_hdr[predn]; | |
1205 else if (hdr != max_hdr[predn]) | |
1206 /* Too bad, there are at least two predecessors | |
1207 that reside in different regions. Thus, BB should | |
1208 begin its own region. */ | |
1209 { | |
1210 hdr = bbn; | |
1211 break; | |
1212 } | |
1213 } | |
1214 else | |
1215 /* BB starts its own region. */ | |
1216 { | |
1217 hdr = bbn; | |
1218 break; | |
1219 } | |
1220 } | |
1221 | |
1222 if (hdr == bbn) | |
1223 { | |
1224 /* If BB start its own region, | |
1225 update set of headers with BB. */ | |
1226 SET_BIT (header, bbn); | |
1227 rescan = 1; | |
1228 } | |
1229 else | |
1230 gcc_assert (hdr != -1); | |
1231 | |
1232 max_hdr[bbn] = hdr; | |
1233 } | |
1234 } | |
1235 | |
1236 iter++; | |
1237 } | |
1238 | |
1239 /* Statistics were gathered on the SPEC2000 package of tests with | |
1240 mainline weekly snapshot gcc-4.1-20051015 on ia64. | |
1241 | |
1242 Statistics for SPECint: | |
1243 1 iteration : 1751 cases (38.7%) | |
1244 2 iterations: 2770 cases (61.3%) | |
1245 Blocks wrapped in regions by find_rgns without extension: 18295 blocks | |
1246 Blocks wrapped in regions by 2 iterations in extend_rgns: 23821 blocks | |
1247 (We don't count single block regions here). | |
1248 | |
1249 Statistics for SPECfp: | |
1250 1 iteration : 621 cases (35.9%) | |
1251 2 iterations: 1110 cases (64.1%) | |
1252 Blocks wrapped in regions by find_rgns without extension: 6476 blocks | |
1253 Blocks wrapped in regions by 2 iterations in extend_rgns: 11155 blocks | |
1254 (We don't count single block regions here). | |
1255 | |
1256 By default we do at most 2 iterations. | |
1257 This can be overridden with max-sched-extend-regions-iters parameter: | |
1258 0 - disable region extension, | |
1259 N > 0 - do at most N iterations. */ | |
1260 | |
1261 if (sched_verbose && iter != 0) | |
1262 fprintf (sched_dump, ";; Region extension iterations: %d%s\n", iter, | |
1263 rescan ? "... failed" : ""); | |
1264 | |
1265 if (!rescan && iter != 0) | |
1266 { | |
1267 int *s1 = NULL, s1_sz = 0; | |
1268 | |
1269 /* Save the old statistics for later printout. */ | |
1270 if (sched_verbose >= 6) | |
1271 s1_sz = gather_region_statistics (&s1); | |
1272 | |
1273 /* We have succeeded. Now assemble the regions. */ | |
1274 for (i = nblocks - 1; i >= 0; i--) | |
1275 { | |
1276 int bbn = order[i]; | |
1277 | |
1278 if (max_hdr[bbn] == bbn) | |
1279 /* BBN is a region head. */ | |
1280 { | |
1281 edge e; | |
1282 edge_iterator ei; | |
1283 int num_bbs = 0, j, num_insns = 0, large; | |
1284 | |
1285 large = too_large (bbn, &num_bbs, &num_insns); | |
1286 | |
1287 degree[bbn] = -1; | |
1288 rgn_bb_table[idx] = bbn; | |
1289 RGN_BLOCKS (nr_regions) = idx++; | |
1290 RGN_DONT_CALC_DEPS (nr_regions) = 0; | |
1291 RGN_HAS_REAL_EBB (nr_regions) = 0; | |
1292 CONTAINING_RGN (bbn) = nr_regions; | |
1293 BLOCK_TO_BB (bbn) = 0; | |
1294 | |
1295 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (bbn)->succs) | |
1296 if (e->dest != EXIT_BLOCK_PTR) | |
1297 degree[e->dest->index]--; | |
1298 | |
1299 if (!large) | |
1300 /* Here we check whether the region is too_large. */ | |
1301 for (j = i - 1; j >= 0; j--) | |
1302 { | |
1303 int succn = order[j]; | |
1304 if (max_hdr[succn] == bbn) | |
1305 { | |
1306 if ((large = too_large (succn, &num_bbs, &num_insns))) | |
1307 break; | |
1308 } | |
1309 } | |
1310 | |
1311 if (large) | |
1312 /* If the region is too_large, then wrap every block of | |
1313 the region into single block region. | |
1314 Here we wrap region head only. Other blocks are | |
1315 processed in the below cycle. */ | |
1316 { | |
1317 RGN_NR_BLOCKS (nr_regions) = 1; | |
1318 nr_regions++; | |
1319 } | |
1320 | |
1321 num_bbs = 1; | |
1322 | |
1323 for (j = i - 1; j >= 0; j--) | |
1324 { | |
1325 int succn = order[j]; | |
1326 | |
1327 if (max_hdr[succn] == bbn) | |
1328 /* This cycle iterates over all basic blocks, that | |
1329 are supposed to be in the region with head BBN, | |
1330 and wraps them into that region (or in single | |
1331 block region). */ | |
1332 { | |
1333 gcc_assert (degree[succn] == 0); | |
1334 | |
1335 degree[succn] = -1; | |
1336 rgn_bb_table[idx] = succn; | |
1337 BLOCK_TO_BB (succn) = large ? 0 : num_bbs++; | |
1338 CONTAINING_RGN (succn) = nr_regions; | |
1339 | |
1340 if (large) | |
1341 /* Wrap SUCCN into single block region. */ | |
1342 { | |
1343 RGN_BLOCKS (nr_regions) = idx; | |
1344 RGN_NR_BLOCKS (nr_regions) = 1; | |
1345 RGN_DONT_CALC_DEPS (nr_regions) = 0; | |
1346 RGN_HAS_REAL_EBB (nr_regions) = 0; | |
1347 nr_regions++; | |
1348 } | |
1349 | |
1350 idx++; | |
1351 | |
1352 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (succn)->succs) | |
1353 if (e->dest != EXIT_BLOCK_PTR) | |
1354 degree[e->dest->index]--; | |
1355 } | |
1356 } | |
1357 | |
1358 if (!large) | |
1359 { | |
1360 RGN_NR_BLOCKS (nr_regions) = num_bbs; | |
1361 nr_regions++; | |
1362 } | |
1363 } | |
1364 } | |
1365 | |
1366 if (sched_verbose >= 6) | |
1367 { | |
1368 int *s2, s2_sz; | |
1369 | |
1370 /* Get the new statistics and print the comparison with the | |
1371 one before calling this function. */ | |
1372 s2_sz = gather_region_statistics (&s2); | |
1373 print_region_statistics (s1, s1_sz, s2, s2_sz); | |
1374 free (s1); | |
1375 free (s2); | |
1376 } | |
1377 } | |
1378 | |
1379 free (order); | |
1380 free (max_hdr); | |
1381 | |
1382 *idxp = idx; | |
1383 } | |
1384 | |
1385 /* Functions for regions scheduling information. */ | |
1386 | |
1387 /* Compute dominators, probability, and potential-split-edges of bb. | |
1388 Assume that these values were already computed for bb's predecessors. */ | |
1389 | |
1390 static void | |
1391 compute_dom_prob_ps (int bb) | |
1392 { | |
1393 edge_iterator in_ei; | |
1394 edge in_edge; | |
1395 | |
1396 /* We shouldn't have any real ebbs yet. */ | |
1397 gcc_assert (ebb_head [bb] == bb + current_blocks); | |
1398 | |
1399 if (IS_RGN_ENTRY (bb)) | |
1400 { | |
1401 SET_BIT (dom[bb], 0); | |
1402 prob[bb] = REG_BR_PROB_BASE; | |
1403 return; | |
1404 } | |
1405 | |
1406 prob[bb] = 0; | |
1407 | |
1408 /* Initialize dom[bb] to '111..1'. */ | |
1409 sbitmap_ones (dom[bb]); | |
1410 | |
1411 FOR_EACH_EDGE (in_edge, in_ei, BASIC_BLOCK (BB_TO_BLOCK (bb))->preds) | |
1412 { | |
1413 int pred_bb; | |
1414 edge out_edge; | |
1415 edge_iterator out_ei; | |
1416 | |
1417 if (in_edge->src == ENTRY_BLOCK_PTR) | |
1418 continue; | |
1419 | |
1420 pred_bb = BLOCK_TO_BB (in_edge->src->index); | |
1421 sbitmap_a_and_b (dom[bb], dom[bb], dom[pred_bb]); | |
1422 sbitmap_a_or_b (ancestor_edges[bb], | |
1423 ancestor_edges[bb], ancestor_edges[pred_bb]); | |
1424 | |
1425 SET_BIT (ancestor_edges[bb], EDGE_TO_BIT (in_edge)); | |
1426 | |
1427 sbitmap_a_or_b (pot_split[bb], pot_split[bb], pot_split[pred_bb]); | |
1428 | |
1429 FOR_EACH_EDGE (out_edge, out_ei, in_edge->src->succs) | |
1430 SET_BIT (pot_split[bb], EDGE_TO_BIT (out_edge)); | |
1431 | |
1432 prob[bb] += ((prob[pred_bb] * in_edge->probability) / REG_BR_PROB_BASE); | |
1433 } | |
1434 | |
1435 SET_BIT (dom[bb], bb); | |
1436 sbitmap_difference (pot_split[bb], pot_split[bb], ancestor_edges[bb]); | |
1437 | |
1438 if (sched_verbose >= 2) | |
1439 fprintf (sched_dump, ";; bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb), | |
1440 (100 * prob[bb]) / REG_BR_PROB_BASE); | |
1441 } | |
1442 | |
1443 /* Functions for target info. */ | |
1444 | |
1445 /* Compute in BL the list of split-edges of bb_src relatively to bb_trg. | |
1446 Note that bb_trg dominates bb_src. */ | |
1447 | |
1448 static void | |
1449 split_edges (int bb_src, int bb_trg, edgelst *bl) | |
1450 { | |
1451 sbitmap src = sbitmap_alloc (pot_split[bb_src]->n_bits); | |
1452 sbitmap_copy (src, pot_split[bb_src]); | |
1453 | |
1454 sbitmap_difference (src, src, pot_split[bb_trg]); | |
1455 extract_edgelst (src, bl); | |
1456 sbitmap_free (src); | |
1457 } | |
1458 | |
1459 /* Find the valid candidate-source-blocks for the target block TRG, compute | |
1460 their probability, and check if they are speculative or not. | |
1461 For speculative sources, compute their update-blocks and split-blocks. */ | |
1462 | |
1463 static void | |
1464 compute_trg_info (int trg) | |
1465 { | |
1466 candidate *sp; | |
1467 edgelst el = { NULL, 0 }; | |
1468 int i, j, k, update_idx; | |
1469 basic_block block; | |
1470 sbitmap visited; | |
1471 edge_iterator ei; | |
1472 edge e; | |
1473 | |
1474 candidate_table = XNEWVEC (candidate, current_nr_blocks); | |
1475 | |
1476 bblst_last = 0; | |
1477 /* bblst_table holds split blocks and update blocks for each block after | |
1478 the current one in the region. split blocks and update blocks are | |
1479 the TO blocks of region edges, so there can be at most rgn_nr_edges | |
1480 of them. */ | |
1481 bblst_size = (current_nr_blocks - target_bb) * rgn_nr_edges; | |
1482 bblst_table = XNEWVEC (basic_block, bblst_size); | |
1483 | |
1484 edgelst_last = 0; | |
1485 edgelst_table = XNEWVEC (edge, rgn_nr_edges); | |
1486 | |
1487 /* Define some of the fields for the target bb as well. */ | |
1488 sp = candidate_table + trg; | |
1489 sp->is_valid = 1; | |
1490 sp->is_speculative = 0; | |
1491 sp->src_prob = REG_BR_PROB_BASE; | |
1492 | |
1493 visited = sbitmap_alloc (last_basic_block); | |
1494 | |
1495 for (i = trg + 1; i < current_nr_blocks; i++) | |
1496 { | |
1497 sp = candidate_table + i; | |
1498 | |
1499 sp->is_valid = IS_DOMINATED (i, trg); | |
1500 if (sp->is_valid) | |
1501 { | |
1502 int tf = prob[trg], cf = prob[i]; | |
1503 | |
1504 /* In CFGs with low probability edges TF can possibly be zero. */ | |
1505 sp->src_prob = (tf ? ((cf * REG_BR_PROB_BASE) / tf) : 0); | |
1506 sp->is_valid = (sp->src_prob >= min_spec_prob); | |
1507 } | |
1508 | |
1509 if (sp->is_valid) | |
1510 { | |
1511 split_edges (i, trg, &el); | |
1512 sp->is_speculative = (el.nr_members) ? 1 : 0; | |
1513 if (sp->is_speculative && !flag_schedule_speculative) | |
1514 sp->is_valid = 0; | |
1515 } | |
1516 | |
1517 if (sp->is_valid) | |
1518 { | |
1519 /* Compute split blocks and store them in bblst_table. | |
1520 The TO block of every split edge is a split block. */ | |
1521 sp->split_bbs.first_member = &bblst_table[bblst_last]; | |
1522 sp->split_bbs.nr_members = el.nr_members; | |
1523 for (j = 0; j < el.nr_members; bblst_last++, j++) | |
1524 bblst_table[bblst_last] = el.first_member[j]->dest; | |
1525 sp->update_bbs.first_member = &bblst_table[bblst_last]; | |
1526 | |
1527 /* Compute update blocks and store them in bblst_table. | |
1528 For every split edge, look at the FROM block, and check | |
1529 all out edges. For each out edge that is not a split edge, | |
1530 add the TO block to the update block list. This list can end | |
1531 up with a lot of duplicates. We need to weed them out to avoid | |
1532 overrunning the end of the bblst_table. */ | |
1533 | |
1534 update_idx = 0; | |
1535 sbitmap_zero (visited); | |
1536 for (j = 0; j < el.nr_members; j++) | |
1537 { | |
1538 block = el.first_member[j]->src; | |
1539 FOR_EACH_EDGE (e, ei, block->succs) | |
1540 { | |
1541 if (!TEST_BIT (visited, e->dest->index)) | |
1542 { | |
1543 for (k = 0; k < el.nr_members; k++) | |
1544 if (e == el.first_member[k]) | |
1545 break; | |
1546 | |
1547 if (k >= el.nr_members) | |
1548 { | |
1549 bblst_table[bblst_last++] = e->dest; | |
1550 SET_BIT (visited, e->dest->index); | |
1551 update_idx++; | |
1552 } | |
1553 } | |
1554 } | |
1555 } | |
1556 sp->update_bbs.nr_members = update_idx; | |
1557 | |
1558 /* Make sure we didn't overrun the end of bblst_table. */ | |
1559 gcc_assert (bblst_last <= bblst_size); | |
1560 } | |
1561 else | |
1562 { | |
1563 sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0; | |
1564 | |
1565 sp->is_speculative = 0; | |
1566 sp->src_prob = 0; | |
1567 } | |
1568 } | |
1569 | |
1570 sbitmap_free (visited); | |
1571 } | |
1572 | |
1573 /* Free the computed target info. */ | |
1574 static void | |
1575 free_trg_info (void) | |
1576 { | |
1577 free (candidate_table); | |
1578 free (bblst_table); | |
1579 free (edgelst_table); | |
1580 } | |
1581 | |
1582 /* Print candidates info, for debugging purposes. Callable from debugger. */ | |
1583 | |
1584 void | |
1585 debug_candidate (int i) | |
1586 { | |
1587 if (!candidate_table[i].is_valid) | |
1588 return; | |
1589 | |
1590 if (candidate_table[i].is_speculative) | |
1591 { | |
1592 int j; | |
1593 fprintf (sched_dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i); | |
1594 | |
1595 fprintf (sched_dump, "split path: "); | |
1596 for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++) | |
1597 { | |
1598 int b = candidate_table[i].split_bbs.first_member[j]->index; | |
1599 | |
1600 fprintf (sched_dump, " %d ", b); | |
1601 } | |
1602 fprintf (sched_dump, "\n"); | |
1603 | |
1604 fprintf (sched_dump, "update path: "); | |
1605 for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++) | |
1606 { | |
1607 int b = candidate_table[i].update_bbs.first_member[j]->index; | |
1608 | |
1609 fprintf (sched_dump, " %d ", b); | |
1610 } | |
1611 fprintf (sched_dump, "\n"); | |
1612 } | |
1613 else | |
1614 { | |
1615 fprintf (sched_dump, " src %d equivalent\n", BB_TO_BLOCK (i)); | |
1616 } | |
1617 } | |
1618 | |
1619 /* Print candidates info, for debugging purposes. Callable from debugger. */ | |
1620 | |
1621 void | |
1622 debug_candidates (int trg) | |
1623 { | |
1624 int i; | |
1625 | |
1626 fprintf (sched_dump, "----------- candidate table: target: b=%d bb=%d ---\n", | |
1627 BB_TO_BLOCK (trg), trg); | |
1628 for (i = trg + 1; i < current_nr_blocks; i++) | |
1629 debug_candidate (i); | |
1630 } | |
1631 | |
1632 /* Functions for speculative scheduling. */ | |
1633 | |
1634 static bitmap_head not_in_df; | |
1635 | |
1636 /* Return 0 if x is a set of a register alive in the beginning of one | |
1637 of the split-blocks of src, otherwise return 1. */ | |
1638 | |
1639 static int | |
1640 check_live_1 (int src, rtx x) | |
1641 { | |
1642 int i; | |
1643 int regno; | |
1644 rtx reg = SET_DEST (x); | |
1645 | |
1646 if (reg == 0) | |
1647 return 1; | |
1648 | |
1649 while (GET_CODE (reg) == SUBREG | |
1650 || GET_CODE (reg) == ZERO_EXTRACT | |
1651 || GET_CODE (reg) == STRICT_LOW_PART) | |
1652 reg = XEXP (reg, 0); | |
1653 | |
1654 if (GET_CODE (reg) == PARALLEL) | |
1655 { | |
1656 int i; | |
1657 | |
1658 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--) | |
1659 if (XEXP (XVECEXP (reg, 0, i), 0) != 0) | |
1660 if (check_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0))) | |
1661 return 1; | |
1662 | |
1663 return 0; | |
1664 } | |
1665 | |
1666 if (!REG_P (reg)) | |
1667 return 1; | |
1668 | |
1669 regno = REGNO (reg); | |
1670 | |
1671 if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno]) | |
1672 { | |
1673 /* Global registers are assumed live. */ | |
1674 return 0; | |
1675 } | |
1676 else | |
1677 { | |
1678 if (regno < FIRST_PSEUDO_REGISTER) | |
1679 { | |
1680 /* Check for hard registers. */ | |
1681 int j = hard_regno_nregs[regno][GET_MODE (reg)]; | |
1682 while (--j >= 0) | |
1683 { | |
1684 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++) | |
1685 { | |
1686 basic_block b = candidate_table[src].split_bbs.first_member[i]; | |
1687 int t = bitmap_bit_p (¬_in_df, b->index); | |
1688 | |
1689 /* We can have split blocks, that were recently generated. | |
1690 Such blocks are always outside current region. */ | |
1691 gcc_assert (!t || (CONTAINING_RGN (b->index) | |
1692 != CONTAINING_RGN (BB_TO_BLOCK (src)))); | |
1693 | |
1694 if (t || REGNO_REG_SET_P (df_get_live_in (b), regno + j)) | |
1695 return 0; | |
1696 } | |
1697 } | |
1698 } | |
1699 else | |
1700 { | |
1701 /* Check for pseudo registers. */ | |
1702 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++) | |
1703 { | |
1704 basic_block b = candidate_table[src].split_bbs.first_member[i]; | |
1705 int t = bitmap_bit_p (¬_in_df, b->index); | |
1706 | |
1707 gcc_assert (!t || (CONTAINING_RGN (b->index) | |
1708 != CONTAINING_RGN (BB_TO_BLOCK (src)))); | |
1709 | |
1710 if (t || REGNO_REG_SET_P (df_get_live_in (b), regno)) | |
1711 return 0; | |
1712 } | |
1713 } | |
1714 } | |
1715 | |
1716 return 1; | |
1717 } | |
1718 | |
1719 /* If x is a set of a register R, mark that R is alive in the beginning | |
1720 of every update-block of src. */ | |
1721 | |
1722 static void | |
1723 update_live_1 (int src, rtx x) | |
1724 { | |
1725 int i; | |
1726 int regno; | |
1727 rtx reg = SET_DEST (x); | |
1728 | |
1729 if (reg == 0) | |
1730 return; | |
1731 | |
1732 while (GET_CODE (reg) == SUBREG | |
1733 || GET_CODE (reg) == ZERO_EXTRACT | |
1734 || GET_CODE (reg) == STRICT_LOW_PART) | |
1735 reg = XEXP (reg, 0); | |
1736 | |
1737 if (GET_CODE (reg) == PARALLEL) | |
1738 { | |
1739 int i; | |
1740 | |
1741 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--) | |
1742 if (XEXP (XVECEXP (reg, 0, i), 0) != 0) | |
1743 update_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0)); | |
1744 | |
1745 return; | |
1746 } | |
1747 | |
1748 if (!REG_P (reg)) | |
1749 return; | |
1750 | |
1751 /* Global registers are always live, so the code below does not apply | |
1752 to them. */ | |
1753 | |
1754 regno = REGNO (reg); | |
1755 | |
1756 if (regno >= FIRST_PSEUDO_REGISTER || !global_regs[regno]) | |
1757 { | |
1758 if (regno < FIRST_PSEUDO_REGISTER) | |
1759 { | |
1760 int j = hard_regno_nregs[regno][GET_MODE (reg)]; | |
1761 while (--j >= 0) | |
1762 { | |
1763 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++) | |
1764 { | |
1765 basic_block b = candidate_table[src].update_bbs.first_member[i]; | |
1766 | |
1767 SET_REGNO_REG_SET (df_get_live_in (b), regno + j); | |
1768 } | |
1769 } | |
1770 } | |
1771 else | |
1772 { | |
1773 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++) | |
1774 { | |
1775 basic_block b = candidate_table[src].update_bbs.first_member[i]; | |
1776 | |
1777 SET_REGNO_REG_SET (df_get_live_in (b), regno); | |
1778 } | |
1779 } | |
1780 } | |
1781 } | |
1782 | |
1783 /* Return 1 if insn can be speculatively moved from block src to trg, | |
1784 otherwise return 0. Called before first insertion of insn to | |
1785 ready-list or before the scheduling. */ | |
1786 | |
1787 static int | |
1788 check_live (rtx insn, int src) | |
1789 { | |
1790 /* Find the registers set by instruction. */ | |
1791 if (GET_CODE (PATTERN (insn)) == SET | |
1792 || GET_CODE (PATTERN (insn)) == CLOBBER) | |
1793 return check_live_1 (src, PATTERN (insn)); | |
1794 else if (GET_CODE (PATTERN (insn)) == PARALLEL) | |
1795 { | |
1796 int j; | |
1797 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--) | |
1798 if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET | |
1799 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER) | |
1800 && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j))) | |
1801 return 0; | |
1802 | |
1803 return 1; | |
1804 } | |
1805 | |
1806 return 1; | |
1807 } | |
1808 | |
1809 /* Update the live registers info after insn was moved speculatively from | |
1810 block src to trg. */ | |
1811 | |
1812 static void | |
1813 update_live (rtx insn, int src) | |
1814 { | |
1815 /* Find the registers set by instruction. */ | |
1816 if (GET_CODE (PATTERN (insn)) == SET | |
1817 || GET_CODE (PATTERN (insn)) == CLOBBER) | |
1818 update_live_1 (src, PATTERN (insn)); | |
1819 else if (GET_CODE (PATTERN (insn)) == PARALLEL) | |
1820 { | |
1821 int j; | |
1822 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--) | |
1823 if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET | |
1824 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER) | |
1825 update_live_1 (src, XVECEXP (PATTERN (insn), 0, j)); | |
1826 } | |
1827 } | |
1828 | |
1829 /* Nonzero if block bb_to is equal to, or reachable from block bb_from. */ | |
1830 #define IS_REACHABLE(bb_from, bb_to) \ | |
1831 (bb_from == bb_to \ | |
1832 || IS_RGN_ENTRY (bb_from) \ | |
1833 || (TEST_BIT (ancestor_edges[bb_to], \ | |
1834 EDGE_TO_BIT (single_pred_edge (BASIC_BLOCK (BB_TO_BLOCK (bb_from))))))) | |
1835 | |
1836 /* Turns on the fed_by_spec_load flag for insns fed by load_insn. */ | |
1837 | |
1838 static void | |
1839 set_spec_fed (rtx load_insn) | |
1840 { | |
1841 sd_iterator_def sd_it; | |
1842 dep_t dep; | |
1843 | |
1844 FOR_EACH_DEP (load_insn, SD_LIST_FORW, sd_it, dep) | |
1845 if (DEP_TYPE (dep) == REG_DEP_TRUE) | |
1846 FED_BY_SPEC_LOAD (DEP_CON (dep)) = 1; | |
1847 } | |
1848 | |
1849 /* On the path from the insn to load_insn_bb, find a conditional | |
1850 branch depending on insn, that guards the speculative load. */ | |
1851 | |
1852 static int | |
1853 find_conditional_protection (rtx insn, int load_insn_bb) | |
1854 { | |
1855 sd_iterator_def sd_it; | |
1856 dep_t dep; | |
1857 | |
1858 /* Iterate through DEF-USE forward dependences. */ | |
1859 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep) | |
1860 { | |
1861 rtx next = DEP_CON (dep); | |
1862 | |
1863 if ((CONTAINING_RGN (BLOCK_NUM (next)) == | |
1864 CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb))) | |
1865 && IS_REACHABLE (INSN_BB (next), load_insn_bb) | |
1866 && load_insn_bb != INSN_BB (next) | |
1867 && DEP_TYPE (dep) == REG_DEP_TRUE | |
1868 && (JUMP_P (next) | |
1869 || find_conditional_protection (next, load_insn_bb))) | |
1870 return 1; | |
1871 } | |
1872 return 0; | |
1873 } /* find_conditional_protection */ | |
1874 | |
1875 /* Returns 1 if the same insn1 that participates in the computation | |
1876 of load_insn's address is feeding a conditional branch that is | |
1877 guarding on load_insn. This is true if we find two DEF-USE | |
1878 chains: | |
1879 insn1 -> ... -> conditional-branch | |
1880 insn1 -> ... -> load_insn, | |
1881 and if a flow path exists: | |
1882 insn1 -> ... -> conditional-branch -> ... -> load_insn, | |
1883 and if insn1 is on the path | |
1884 region-entry -> ... -> bb_trg -> ... load_insn. | |
1885 | |
1886 Locate insn1 by climbing on INSN_BACK_DEPS from load_insn. | |
1887 Locate the branch by following INSN_FORW_DEPS from insn1. */ | |
1888 | |
1889 static int | |
1890 is_conditionally_protected (rtx load_insn, int bb_src, int bb_trg) | |
1891 { | |
1892 sd_iterator_def sd_it; | |
1893 dep_t dep; | |
1894 | |
1895 FOR_EACH_DEP (load_insn, SD_LIST_BACK, sd_it, dep) | |
1896 { | |
1897 rtx insn1 = DEP_PRO (dep); | |
1898 | |
1899 /* Must be a DEF-USE dependence upon non-branch. */ | |
1900 if (DEP_TYPE (dep) != REG_DEP_TRUE | |
1901 || JUMP_P (insn1)) | |
1902 continue; | |
1903 | |
1904 /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn. */ | |
1905 if (INSN_BB (insn1) == bb_src | |
1906 || (CONTAINING_RGN (BLOCK_NUM (insn1)) | |
1907 != CONTAINING_RGN (BB_TO_BLOCK (bb_src))) | |
1908 || (!IS_REACHABLE (bb_trg, INSN_BB (insn1)) | |
1909 && !IS_REACHABLE (INSN_BB (insn1), bb_trg))) | |
1910 continue; | |
1911 | |
1912 /* Now search for the conditional-branch. */ | |
1913 if (find_conditional_protection (insn1, bb_src)) | |
1914 return 1; | |
1915 | |
1916 /* Recursive step: search another insn1, "above" current insn1. */ | |
1917 return is_conditionally_protected (insn1, bb_src, bb_trg); | |
1918 } | |
1919 | |
1920 /* The chain does not exist. */ | |
1921 return 0; | |
1922 } /* is_conditionally_protected */ | |
1923 | |
1924 /* Returns 1 if a clue for "similar load" 'insn2' is found, and hence | |
1925 load_insn can move speculatively from bb_src to bb_trg. All the | |
1926 following must hold: | |
1927 | |
1928 (1) both loads have 1 base register (PFREE_CANDIDATEs). | |
1929 (2) load_insn and load1 have a def-use dependence upon | |
1930 the same insn 'insn1'. | |
1931 (3) either load2 is in bb_trg, or: | |
1932 - there's only one split-block, and | |
1933 - load1 is on the escape path, and | |
1934 | |
1935 From all these we can conclude that the two loads access memory | |
1936 addresses that differ at most by a constant, and hence if moving | |
1937 load_insn would cause an exception, it would have been caused by | |
1938 load2 anyhow. */ | |
1939 | |
1940 static int | |
1941 is_pfree (rtx load_insn, int bb_src, int bb_trg) | |
1942 { | |
1943 sd_iterator_def back_sd_it; | |
1944 dep_t back_dep; | |
1945 candidate *candp = candidate_table + bb_src; | |
1946 | |
1947 if (candp->split_bbs.nr_members != 1) | |
1948 /* Must have exactly one escape block. */ | |
1949 return 0; | |
1950 | |
1951 FOR_EACH_DEP (load_insn, SD_LIST_BACK, back_sd_it, back_dep) | |
1952 { | |
1953 rtx insn1 = DEP_PRO (back_dep); | |
1954 | |
1955 if (DEP_TYPE (back_dep) == REG_DEP_TRUE) | |
1956 /* Found a DEF-USE dependence (insn1, load_insn). */ | |
1957 { | |
1958 sd_iterator_def fore_sd_it; | |
1959 dep_t fore_dep; | |
1960 | |
1961 FOR_EACH_DEP (insn1, SD_LIST_FORW, fore_sd_it, fore_dep) | |
1962 { | |
1963 rtx insn2 = DEP_CON (fore_dep); | |
1964 | |
1965 if (DEP_TYPE (fore_dep) == REG_DEP_TRUE) | |
1966 { | |
1967 /* Found a DEF-USE dependence (insn1, insn2). */ | |
1968 if (haifa_classify_insn (insn2) != PFREE_CANDIDATE) | |
1969 /* insn2 not guaranteed to be a 1 base reg load. */ | |
1970 continue; | |
1971 | |
1972 if (INSN_BB (insn2) == bb_trg) | |
1973 /* insn2 is the similar load, in the target block. */ | |
1974 return 1; | |
1975 | |
1976 if (*(candp->split_bbs.first_member) == BLOCK_FOR_INSN (insn2)) | |
1977 /* insn2 is a similar load, in a split-block. */ | |
1978 return 1; | |
1979 } | |
1980 } | |
1981 } | |
1982 } | |
1983 | |
1984 /* Couldn't find a similar load. */ | |
1985 return 0; | |
1986 } /* is_pfree */ | |
1987 | |
1988 /* Return 1 if load_insn is prisky (i.e. if load_insn is fed by | |
1989 a load moved speculatively, or if load_insn is protected by | |
1990 a compare on load_insn's address). */ | |
1991 | |
1992 static int | |
1993 is_prisky (rtx load_insn, int bb_src, int bb_trg) | |
1994 { | |
1995 if (FED_BY_SPEC_LOAD (load_insn)) | |
1996 return 1; | |
1997 | |
1998 if (sd_lists_empty_p (load_insn, SD_LIST_BACK)) | |
1999 /* Dependence may 'hide' out of the region. */ | |
2000 return 1; | |
2001 | |
2002 if (is_conditionally_protected (load_insn, bb_src, bb_trg)) | |
2003 return 1; | |
2004 | |
2005 return 0; | |
2006 } | |
2007 | |
2008 /* Insn is a candidate to be moved speculatively from bb_src to bb_trg. | |
2009 Return 1 if insn is exception-free (and the motion is valid) | |
2010 and 0 otherwise. */ | |
2011 | |
2012 static int | |
2013 is_exception_free (rtx insn, int bb_src, int bb_trg) | |
2014 { | |
2015 int insn_class = haifa_classify_insn (insn); | |
2016 | |
2017 /* Handle non-load insns. */ | |
2018 switch (insn_class) | |
2019 { | |
2020 case TRAP_FREE: | |
2021 return 1; | |
2022 case TRAP_RISKY: | |
2023 return 0; | |
2024 default:; | |
2025 } | |
2026 | |
2027 /* Handle loads. */ | |
2028 if (!flag_schedule_speculative_load) | |
2029 return 0; | |
2030 IS_LOAD_INSN (insn) = 1; | |
2031 switch (insn_class) | |
2032 { | |
2033 case IFREE: | |
2034 return (1); | |
2035 case IRISKY: | |
2036 return 0; | |
2037 case PFREE_CANDIDATE: | |
2038 if (is_pfree (insn, bb_src, bb_trg)) | |
2039 return 1; | |
2040 /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate. */ | |
2041 case PRISKY_CANDIDATE: | |
2042 if (!flag_schedule_speculative_load_dangerous | |
2043 || is_prisky (insn, bb_src, bb_trg)) | |
2044 return 0; | |
2045 break; | |
2046 default:; | |
2047 } | |
2048 | |
2049 return flag_schedule_speculative_load_dangerous; | |
2050 } | |
2051 | |
2052 /* The number of insns from the current block scheduled so far. */ | |
2053 static int sched_target_n_insns; | |
2054 /* The number of insns from the current block to be scheduled in total. */ | |
2055 static int target_n_insns; | |
2056 /* The number of insns from the entire region scheduled so far. */ | |
2057 static int sched_n_insns; | |
2058 | |
2059 /* Implementations of the sched_info functions for region scheduling. */ | |
2060 static void init_ready_list (void); | |
2061 static int can_schedule_ready_p (rtx); | |
2062 static void begin_schedule_ready (rtx, rtx); | |
2063 static ds_t new_ready (rtx, ds_t); | |
2064 static int schedule_more_p (void); | |
2065 static const char *rgn_print_insn (const_rtx, int); | |
2066 static int rgn_rank (rtx, rtx); | |
2067 static void compute_jump_reg_dependencies (rtx, regset, regset, regset); | |
2068 | |
2069 /* Functions for speculative scheduling. */ | |
2070 static void rgn_add_remove_insn (rtx, int); | |
2071 static void rgn_add_block (basic_block, basic_block); | |
2072 static void rgn_fix_recovery_cfg (int, int, int); | |
2073 static basic_block advance_target_bb (basic_block, rtx); | |
2074 | |
2075 /* Return nonzero if there are more insns that should be scheduled. */ | |
2076 | |
2077 static int | |
2078 schedule_more_p (void) | |
2079 { | |
2080 return sched_target_n_insns < target_n_insns; | |
2081 } | |
2082 | |
2083 /* Add all insns that are initially ready to the ready list READY. Called | |
2084 once before scheduling a set of insns. */ | |
2085 | |
2086 static void | |
2087 init_ready_list (void) | |
2088 { | |
2089 rtx prev_head = current_sched_info->prev_head; | |
2090 rtx next_tail = current_sched_info->next_tail; | |
2091 int bb_src; | |
2092 rtx insn; | |
2093 | |
2094 target_n_insns = 0; | |
2095 sched_target_n_insns = 0; | |
2096 sched_n_insns = 0; | |
2097 | |
2098 /* Print debugging information. */ | |
2099 if (sched_verbose >= 5) | |
2100 debug_rgn_dependencies (target_bb); | |
2101 | |
2102 /* Prepare current target block info. */ | |
2103 if (current_nr_blocks > 1) | |
2104 compute_trg_info (target_bb); | |
2105 | |
2106 /* Initialize ready list with all 'ready' insns in target block. | |
2107 Count number of insns in the target block being scheduled. */ | |
2108 for (insn = NEXT_INSN (prev_head); insn != next_tail; insn = NEXT_INSN (insn)) | |
2109 { | |
2110 try_ready (insn); | |
2111 target_n_insns++; | |
2112 | |
2113 gcc_assert (!(TODO_SPEC (insn) & BEGIN_CONTROL)); | |
2114 } | |
2115 | |
2116 /* Add to ready list all 'ready' insns in valid source blocks. | |
2117 For speculative insns, check-live, exception-free, and | |
2118 issue-delay. */ | |
2119 for (bb_src = target_bb + 1; bb_src < current_nr_blocks; bb_src++) | |
2120 if (IS_VALID (bb_src)) | |
2121 { | |
2122 rtx src_head; | |
2123 rtx src_next_tail; | |
2124 rtx tail, head; | |
2125 | |
2126 get_ebb_head_tail (EBB_FIRST_BB (bb_src), EBB_LAST_BB (bb_src), | |
2127 &head, &tail); | |
2128 src_next_tail = NEXT_INSN (tail); | |
2129 src_head = head; | |
2130 | |
2131 for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn)) | |
2132 if (INSN_P (insn)) | |
2133 try_ready (insn); | |
2134 } | |
2135 } | |
2136 | |
2137 /* Called after taking INSN from the ready list. Returns nonzero if this | |
2138 insn can be scheduled, nonzero if we should silently discard it. */ | |
2139 | |
2140 static int | |
2141 can_schedule_ready_p (rtx insn) | |
2142 { | |
2143 /* An interblock motion? */ | |
2144 if (INSN_BB (insn) != target_bb | |
2145 && IS_SPECULATIVE_INSN (insn) | |
2146 && !check_live (insn, INSN_BB (insn))) | |
2147 return 0; | |
2148 else | |
2149 return 1; | |
2150 } | |
2151 | |
2152 /* Updates counter and other information. Split from can_schedule_ready_p () | |
2153 because when we schedule insn speculatively then insn passed to | |
2154 can_schedule_ready_p () differs from the one passed to | |
2155 begin_schedule_ready (). */ | |
2156 static void | |
2157 begin_schedule_ready (rtx insn, rtx last ATTRIBUTE_UNUSED) | |
2158 { | |
2159 /* An interblock motion? */ | |
2160 if (INSN_BB (insn) != target_bb) | |
2161 { | |
2162 if (IS_SPECULATIVE_INSN (insn)) | |
2163 { | |
2164 gcc_assert (check_live (insn, INSN_BB (insn))); | |
2165 | |
2166 update_live (insn, INSN_BB (insn)); | |
2167 | |
2168 /* For speculative load, mark insns fed by it. */ | |
2169 if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn)) | |
2170 set_spec_fed (insn); | |
2171 | |
2172 nr_spec++; | |
2173 } | |
2174 nr_inter++; | |
2175 } | |
2176 else | |
2177 { | |
2178 /* In block motion. */ | |
2179 sched_target_n_insns++; | |
2180 } | |
2181 sched_n_insns++; | |
2182 } | |
2183 | |
2184 /* Called after INSN has all its hard dependencies resolved and the speculation | |
2185 of type TS is enough to overcome them all. | |
2186 Return nonzero if it should be moved to the ready list or the queue, or zero | |
2187 if we should silently discard it. */ | |
2188 static ds_t | |
2189 new_ready (rtx next, ds_t ts) | |
2190 { | |
2191 if (INSN_BB (next) != target_bb) | |
2192 { | |
2193 int not_ex_free = 0; | |
2194 | |
2195 /* For speculative insns, before inserting to ready/queue, | |
2196 check live, exception-free, and issue-delay. */ | |
2197 if (!IS_VALID (INSN_BB (next)) | |
2198 || CANT_MOVE (next) | |
2199 || (IS_SPECULATIVE_INSN (next) | |
2200 && ((recog_memoized (next) >= 0 | |
2201 && min_insn_conflict_delay (curr_state, next, next) | |
2202 > PARAM_VALUE (PARAM_MAX_SCHED_INSN_CONFLICT_DELAY)) | |
2203 || IS_SPECULATION_CHECK_P (next) | |
2204 || !check_live (next, INSN_BB (next)) | |
2205 || (not_ex_free = !is_exception_free (next, INSN_BB (next), | |
2206 target_bb))))) | |
2207 { | |
2208 if (not_ex_free | |
2209 /* We are here because is_exception_free () == false. | |
2210 But we possibly can handle that with control speculation. */ | |
2211 && sched_deps_info->generate_spec_deps | |
2212 && spec_info->mask & BEGIN_CONTROL) | |
2213 { | |
2214 ds_t new_ds; | |
2215 | |
2216 /* Add control speculation to NEXT's dependency type. */ | |
2217 new_ds = set_dep_weak (ts, BEGIN_CONTROL, MAX_DEP_WEAK); | |
2218 | |
2219 /* Check if NEXT can be speculated with new dependency type. */ | |
2220 if (sched_insn_is_legitimate_for_speculation_p (next, new_ds)) | |
2221 /* Here we got new control-speculative instruction. */ | |
2222 ts = new_ds; | |
2223 else | |
2224 /* NEXT isn't ready yet. */ | |
2225 ts = (ts & ~SPECULATIVE) | HARD_DEP; | |
2226 } | |
2227 else | |
2228 /* NEXT isn't ready yet. */ | |
2229 ts = (ts & ~SPECULATIVE) | HARD_DEP; | |
2230 } | |
2231 } | |
2232 | |
2233 return ts; | |
2234 } | |
2235 | |
2236 /* Return a string that contains the insn uid and optionally anything else | |
2237 necessary to identify this insn in an output. It's valid to use a | |
2238 static buffer for this. The ALIGNED parameter should cause the string | |
2239 to be formatted so that multiple output lines will line up nicely. */ | |
2240 | |
2241 static const char * | |
2242 rgn_print_insn (const_rtx insn, int aligned) | |
2243 { | |
2244 static char tmp[80]; | |
2245 | |
2246 if (aligned) | |
2247 sprintf (tmp, "b%3d: i%4d", INSN_BB (insn), INSN_UID (insn)); | |
2248 else | |
2249 { | |
2250 if (current_nr_blocks > 1 && INSN_BB (insn) != target_bb) | |
2251 sprintf (tmp, "%d/b%d", INSN_UID (insn), INSN_BB (insn)); | |
2252 else | |
2253 sprintf (tmp, "%d", INSN_UID (insn)); | |
2254 } | |
2255 return tmp; | |
2256 } | |
2257 | |
2258 /* Compare priority of two insns. Return a positive number if the second | |
2259 insn is to be preferred for scheduling, and a negative one if the first | |
2260 is to be preferred. Zero if they are equally good. */ | |
2261 | |
2262 static int | |
2263 rgn_rank (rtx insn1, rtx insn2) | |
2264 { | |
2265 /* Some comparison make sense in interblock scheduling only. */ | |
2266 if (INSN_BB (insn1) != INSN_BB (insn2)) | |
2267 { | |
2268 int spec_val, prob_val; | |
2269 | |
2270 /* Prefer an inblock motion on an interblock motion. */ | |
2271 if ((INSN_BB (insn2) == target_bb) && (INSN_BB (insn1) != target_bb)) | |
2272 return 1; | |
2273 if ((INSN_BB (insn1) == target_bb) && (INSN_BB (insn2) != target_bb)) | |
2274 return -1; | |
2275 | |
2276 /* Prefer a useful motion on a speculative one. */ | |
2277 spec_val = IS_SPECULATIVE_INSN (insn1) - IS_SPECULATIVE_INSN (insn2); | |
2278 if (spec_val) | |
2279 return spec_val; | |
2280 | |
2281 /* Prefer a more probable (speculative) insn. */ | |
2282 prob_val = INSN_PROBABILITY (insn2) - INSN_PROBABILITY (insn1); | |
2283 if (prob_val) | |
2284 return prob_val; | |
2285 } | |
2286 return 0; | |
2287 } | |
2288 | |
2289 /* NEXT is an instruction that depends on INSN (a backward dependence); | |
2290 return nonzero if we should include this dependence in priority | |
2291 calculations. */ | |
2292 | |
2293 int | |
2294 contributes_to_priority (rtx next, rtx insn) | |
2295 { | |
2296 /* NEXT and INSN reside in one ebb. */ | |
2297 return BLOCK_TO_BB (BLOCK_NUM (next)) == BLOCK_TO_BB (BLOCK_NUM (insn)); | |
2298 } | |
2299 | |
2300 /* INSN is a JUMP_INSN, COND_SET is the set of registers that are | |
2301 conditionally set before INSN. Store the set of registers that | |
2302 must be considered as used by this jump in USED and that of | |
2303 registers that must be considered as set in SET. */ | |
2304 | |
2305 static void | |
2306 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED, | |
2307 regset cond_exec ATTRIBUTE_UNUSED, | |
2308 regset used ATTRIBUTE_UNUSED, | |
2309 regset set ATTRIBUTE_UNUSED) | |
2310 { | |
2311 /* Nothing to do here, since we postprocess jumps in | |
2312 add_branch_dependences. */ | |
2313 } | |
2314 | |
2315 /* This variable holds common_sched_info hooks and data relevant to | |
2316 the interblock scheduler. */ | |
2317 static struct common_sched_info_def rgn_common_sched_info; | |
2318 | |
2319 | |
2320 /* This holds data for the dependence analysis relevant to | |
2321 the interblock scheduler. */ | |
2322 static struct sched_deps_info_def rgn_sched_deps_info; | |
2323 | |
2324 /* This holds constant data used for initializing the above structure | |
2325 for the Haifa scheduler. */ | |
2326 static const struct sched_deps_info_def rgn_const_sched_deps_info = | |
2327 { | |
2328 compute_jump_reg_dependencies, | |
2329 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | |
2330 0, 0, 0 | |
2331 }; | |
2332 | |
2333 /* Same as above, but for the selective scheduler. */ | |
2334 static const struct sched_deps_info_def rgn_const_sel_sched_deps_info = | |
2335 { | |
2336 compute_jump_reg_dependencies, | |
2337 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | |
2338 0, 0, 0 | |
2339 }; | |
2340 | |
2341 /* Used in schedule_insns to initialize current_sched_info for scheduling | |
2342 regions (or single basic blocks). */ | |
2343 | |
2344 static const struct haifa_sched_info rgn_const_sched_info = | |
2345 { | |
2346 init_ready_list, | |
2347 can_schedule_ready_p, | |
2348 schedule_more_p, | |
2349 new_ready, | |
2350 rgn_rank, | |
2351 rgn_print_insn, | |
2352 contributes_to_priority, | |
2353 | |
2354 NULL, NULL, | |
2355 NULL, NULL, | |
2356 0, 0, | |
2357 | |
2358 rgn_add_remove_insn, | |
2359 begin_schedule_ready, | |
2360 advance_target_bb, | |
2361 SCHED_RGN | |
2362 }; | |
2363 | |
2364 /* This variable holds the data and hooks needed to the Haifa scheduler backend | |
2365 for the interblock scheduler frontend. */ | |
2366 static struct haifa_sched_info rgn_sched_info; | |
2367 | |
2368 /* Returns maximum priority that an insn was assigned to. */ | |
2369 | |
2370 int | |
2371 get_rgn_sched_max_insns_priority (void) | |
2372 { | |
2373 return rgn_sched_info.sched_max_insns_priority; | |
2374 } | |
2375 | |
2376 /* Determine if PAT sets a CLASS_LIKELY_SPILLED_P register. */ | |
2377 | |
2378 static bool | |
2379 sets_likely_spilled (rtx pat) | |
2380 { | |
2381 bool ret = false; | |
2382 note_stores (pat, sets_likely_spilled_1, &ret); | |
2383 return ret; | |
2384 } | |
2385 | |
2386 static void | |
2387 sets_likely_spilled_1 (rtx x, const_rtx pat, void *data) | |
2388 { | |
2389 bool *ret = (bool *) data; | |
2390 | |
2391 if (GET_CODE (pat) == SET | |
2392 && REG_P (x) | |
2393 && REGNO (x) < FIRST_PSEUDO_REGISTER | |
2394 && CLASS_LIKELY_SPILLED_P (REGNO_REG_CLASS (REGNO (x)))) | |
2395 *ret = true; | |
2396 } | |
2397 | |
2398 /* A bitmap to note insns that participate in any dependency. Used in | |
2399 add_branch_dependences. */ | |
2400 static sbitmap insn_referenced; | |
2401 | |
2402 /* Add dependences so that branches are scheduled to run last in their | |
2403 block. */ | |
2404 static void | |
2405 add_branch_dependences (rtx head, rtx tail) | |
2406 { | |
2407 rtx insn, last; | |
2408 | |
2409 /* For all branches, calls, uses, clobbers, cc0 setters, and instructions | |
2410 that can throw exceptions, force them to remain in order at the end of | |
2411 the block by adding dependencies and giving the last a high priority. | |
2412 There may be notes present, and prev_head may also be a note. | |
2413 | |
2414 Branches must obviously remain at the end. Calls should remain at the | |
2415 end since moving them results in worse register allocation. Uses remain | |
2416 at the end to ensure proper register allocation. | |
2417 | |
2418 cc0 setters remain at the end because they can't be moved away from | |
2419 their cc0 user. | |
2420 | |
2421 COND_EXEC insns cannot be moved past a branch (see e.g. PR17808). | |
2422 | |
2423 Insns setting CLASS_LIKELY_SPILLED_P registers (usually return values) | |
2424 are not moved before reload because we can wind up with register | |
2425 allocation failures. */ | |
2426 | |
2427 insn = tail; | |
2428 last = 0; | |
2429 while (CALL_P (insn) | |
2430 || JUMP_P (insn) | |
2431 || (NONJUMP_INSN_P (insn) | |
2432 && (GET_CODE (PATTERN (insn)) == USE | |
2433 || GET_CODE (PATTERN (insn)) == CLOBBER | |
2434 || can_throw_internal (insn) | |
2435 #ifdef HAVE_cc0 | |
2436 || sets_cc0_p (PATTERN (insn)) | |
2437 #endif | |
2438 || (!reload_completed | |
2439 && sets_likely_spilled (PATTERN (insn))))) | |
2440 || NOTE_P (insn)) | |
2441 { | |
2442 if (!NOTE_P (insn)) | |
2443 { | |
2444 if (last != 0 | |
2445 && sd_find_dep_between (insn, last, false) == NULL) | |
2446 { | |
2447 if (! sched_insns_conditions_mutex_p (last, insn)) | |
2448 add_dependence (last, insn, REG_DEP_ANTI); | |
2449 SET_BIT (insn_referenced, INSN_LUID (insn)); | |
2450 } | |
2451 | |
2452 CANT_MOVE (insn) = 1; | |
2453 | |
2454 last = insn; | |
2455 } | |
2456 | |
2457 /* Don't overrun the bounds of the basic block. */ | |
2458 if (insn == head) | |
2459 break; | |
2460 | |
2461 insn = PREV_INSN (insn); | |
2462 } | |
2463 | |
2464 /* Make sure these insns are scheduled last in their block. */ | |
2465 insn = last; | |
2466 if (insn != 0) | |
2467 while (insn != head) | |
2468 { | |
2469 insn = prev_nonnote_insn (insn); | |
2470 | |
2471 if (TEST_BIT (insn_referenced, INSN_LUID (insn))) | |
2472 continue; | |
2473 | |
2474 if (! sched_insns_conditions_mutex_p (last, insn)) | |
2475 add_dependence (last, insn, REG_DEP_ANTI); | |
2476 } | |
2477 | |
2478 #ifdef HAVE_conditional_execution | |
2479 /* Finally, if the block ends in a jump, and we are doing intra-block | |
2480 scheduling, make sure that the branch depends on any COND_EXEC insns | |
2481 inside the block to avoid moving the COND_EXECs past the branch insn. | |
2482 | |
2483 We only have to do this after reload, because (1) before reload there | |
2484 are no COND_EXEC insns, and (2) the region scheduler is an intra-block | |
2485 scheduler after reload. | |
2486 | |
2487 FIXME: We could in some cases move COND_EXEC insns past the branch if | |
2488 this scheduler would be a little smarter. Consider this code: | |
2489 | |
2490 T = [addr] | |
2491 C ? addr += 4 | |
2492 !C ? X += 12 | |
2493 C ? T += 1 | |
2494 C ? jump foo | |
2495 | |
2496 On a target with a one cycle stall on a memory access the optimal | |
2497 sequence would be: | |
2498 | |
2499 T = [addr] | |
2500 C ? addr += 4 | |
2501 C ? T += 1 | |
2502 C ? jump foo | |
2503 !C ? X += 12 | |
2504 | |
2505 We don't want to put the 'X += 12' before the branch because it just | |
2506 wastes a cycle of execution time when the branch is taken. | |
2507 | |
2508 Note that in the example "!C" will always be true. That is another | |
2509 possible improvement for handling COND_EXECs in this scheduler: it | |
2510 could remove always-true predicates. */ | |
2511 | |
2512 if (!reload_completed || ! JUMP_P (tail)) | |
2513 return; | |
2514 | |
2515 insn = tail; | |
2516 while (insn != head) | |
2517 { | |
2518 insn = PREV_INSN (insn); | |
2519 | |
2520 /* Note that we want to add this dependency even when | |
2521 sched_insns_conditions_mutex_p returns true. The whole point | |
2522 is that we _want_ this dependency, even if these insns really | |
2523 are independent. */ | |
2524 if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == COND_EXEC) | |
2525 add_dependence (tail, insn, REG_DEP_ANTI); | |
2526 } | |
2527 #endif | |
2528 } | |
2529 | |
2530 /* Data structures for the computation of data dependences in a regions. We | |
2531 keep one `deps' structure for every basic block. Before analyzing the | |
2532 data dependences for a bb, its variables are initialized as a function of | |
2533 the variables of its predecessors. When the analysis for a bb completes, | |
2534 we save the contents to the corresponding bb_deps[bb] variable. */ | |
2535 | |
2536 static struct deps *bb_deps; | |
2537 | |
2538 /* Duplicate the INSN_LIST elements of COPY and prepend them to OLD. */ | |
2539 | |
2540 static rtx | |
2541 concat_INSN_LIST (rtx copy, rtx old) | |
2542 { | |
2543 rtx new_rtx = old; | |
2544 for (; copy ; copy = XEXP (copy, 1)) | |
2545 new_rtx = alloc_INSN_LIST (XEXP (copy, 0), new_rtx); | |
2546 return new_rtx; | |
2547 } | |
2548 | |
2549 static void | |
2550 concat_insn_mem_list (rtx copy_insns, rtx copy_mems, rtx *old_insns_p, | |
2551 rtx *old_mems_p) | |
2552 { | |
2553 rtx new_insns = *old_insns_p; | |
2554 rtx new_mems = *old_mems_p; | |
2555 | |
2556 while (copy_insns) | |
2557 { | |
2558 new_insns = alloc_INSN_LIST (XEXP (copy_insns, 0), new_insns); | |
2559 new_mems = alloc_EXPR_LIST (VOIDmode, XEXP (copy_mems, 0), new_mems); | |
2560 copy_insns = XEXP (copy_insns, 1); | |
2561 copy_mems = XEXP (copy_mems, 1); | |
2562 } | |
2563 | |
2564 *old_insns_p = new_insns; | |
2565 *old_mems_p = new_mems; | |
2566 } | |
2567 | |
2568 /* Join PRED_DEPS to the SUCC_DEPS. */ | |
2569 void | |
2570 deps_join (struct deps *succ_deps, struct deps *pred_deps) | |
2571 { | |
2572 unsigned reg; | |
2573 reg_set_iterator rsi; | |
2574 | |
2575 /* The reg_last lists are inherited by successor. */ | |
2576 EXECUTE_IF_SET_IN_REG_SET (&pred_deps->reg_last_in_use, 0, reg, rsi) | |
2577 { | |
2578 struct deps_reg *pred_rl = &pred_deps->reg_last[reg]; | |
2579 struct deps_reg *succ_rl = &succ_deps->reg_last[reg]; | |
2580 | |
2581 succ_rl->uses = concat_INSN_LIST (pred_rl->uses, succ_rl->uses); | |
2582 succ_rl->sets = concat_INSN_LIST (pred_rl->sets, succ_rl->sets); | |
2583 succ_rl->clobbers = concat_INSN_LIST (pred_rl->clobbers, | |
2584 succ_rl->clobbers); | |
2585 succ_rl->uses_length += pred_rl->uses_length; | |
2586 succ_rl->clobbers_length += pred_rl->clobbers_length; | |
2587 } | |
2588 IOR_REG_SET (&succ_deps->reg_last_in_use, &pred_deps->reg_last_in_use); | |
2589 | |
2590 /* Mem read/write lists are inherited by successor. */ | |
2591 concat_insn_mem_list (pred_deps->pending_read_insns, | |
2592 pred_deps->pending_read_mems, | |
2593 &succ_deps->pending_read_insns, | |
2594 &succ_deps->pending_read_mems); | |
2595 concat_insn_mem_list (pred_deps->pending_write_insns, | |
2596 pred_deps->pending_write_mems, | |
2597 &succ_deps->pending_write_insns, | |
2598 &succ_deps->pending_write_mems); | |
2599 | |
2600 succ_deps->last_pending_memory_flush | |
2601 = concat_INSN_LIST (pred_deps->last_pending_memory_flush, | |
2602 succ_deps->last_pending_memory_flush); | |
2603 | |
2604 succ_deps->pending_read_list_length += pred_deps->pending_read_list_length; | |
2605 succ_deps->pending_write_list_length += pred_deps->pending_write_list_length; | |
2606 succ_deps->pending_flush_length += pred_deps->pending_flush_length; | |
2607 | |
2608 /* last_function_call is inherited by successor. */ | |
2609 succ_deps->last_function_call | |
2610 = concat_INSN_LIST (pred_deps->last_function_call, | |
2611 succ_deps->last_function_call); | |
2612 | |
2613 /* sched_before_next_call is inherited by successor. */ | |
2614 succ_deps->sched_before_next_call | |
2615 = concat_INSN_LIST (pred_deps->sched_before_next_call, | |
2616 succ_deps->sched_before_next_call); | |
2617 } | |
2618 | |
2619 /* After computing the dependencies for block BB, propagate the dependencies | |
2620 found in TMP_DEPS to the successors of the block. */ | |
2621 static void | |
2622 propagate_deps (int bb, struct deps *pred_deps) | |
2623 { | |
2624 basic_block block = BASIC_BLOCK (BB_TO_BLOCK (bb)); | |
2625 edge_iterator ei; | |
2626 edge e; | |
2627 | |
2628 /* bb's structures are inherited by its successors. */ | |
2629 FOR_EACH_EDGE (e, ei, block->succs) | |
2630 { | |
2631 /* Only bbs "below" bb, in the same region, are interesting. */ | |
2632 if (e->dest == EXIT_BLOCK_PTR | |
2633 || CONTAINING_RGN (block->index) != CONTAINING_RGN (e->dest->index) | |
2634 || BLOCK_TO_BB (e->dest->index) <= bb) | |
2635 continue; | |
2636 | |
2637 deps_join (bb_deps + BLOCK_TO_BB (e->dest->index), pred_deps); | |
2638 } | |
2639 | |
2640 /* These lists should point to the right place, for correct | |
2641 freeing later. */ | |
2642 bb_deps[bb].pending_read_insns = pred_deps->pending_read_insns; | |
2643 bb_deps[bb].pending_read_mems = pred_deps->pending_read_mems; | |
2644 bb_deps[bb].pending_write_insns = pred_deps->pending_write_insns; | |
2645 bb_deps[bb].pending_write_mems = pred_deps->pending_write_mems; | |
2646 | |
2647 /* Can't allow these to be freed twice. */ | |
2648 pred_deps->pending_read_insns = 0; | |
2649 pred_deps->pending_read_mems = 0; | |
2650 pred_deps->pending_write_insns = 0; | |
2651 pred_deps->pending_write_mems = 0; | |
2652 } | |
2653 | |
2654 /* Compute dependences inside bb. In a multiple blocks region: | |
2655 (1) a bb is analyzed after its predecessors, and (2) the lists in | |
2656 effect at the end of bb (after analyzing for bb) are inherited by | |
2657 bb's successors. | |
2658 | |
2659 Specifically for reg-reg data dependences, the block insns are | |
2660 scanned by sched_analyze () top-to-bottom. Two lists are | |
2661 maintained by sched_analyze (): reg_last[].sets for register DEFs, | |
2662 and reg_last[].uses for register USEs. | |
2663 | |
2664 When analysis is completed for bb, we update for its successors: | |
2665 ; - DEFS[succ] = Union (DEFS [succ], DEFS [bb]) | |
2666 ; - USES[succ] = Union (USES [succ], DEFS [bb]) | |
2667 | |
2668 The mechanism for computing mem-mem data dependence is very | |
2669 similar, and the result is interblock dependences in the region. */ | |
2670 | |
2671 static void | |
2672 compute_block_dependences (int bb) | |
2673 { | |
2674 rtx head, tail; | |
2675 struct deps tmp_deps; | |
2676 | |
2677 tmp_deps = bb_deps[bb]; | |
2678 | |
2679 /* Do the analysis for this block. */ | |
2680 gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb)); | |
2681 get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail); | |
2682 | |
2683 sched_analyze (&tmp_deps, head, tail); | |
2684 | |
2685 /* Selective scheduling handles control dependencies by itself. */ | |
2686 if (!sel_sched_p ()) | |
2687 add_branch_dependences (head, tail); | |
2688 | |
2689 if (current_nr_blocks > 1) | |
2690 propagate_deps (bb, &tmp_deps); | |
2691 | |
2692 /* Free up the INSN_LISTs. */ | |
2693 free_deps (&tmp_deps); | |
2694 | |
2695 if (targetm.sched.dependencies_evaluation_hook) | |
2696 targetm.sched.dependencies_evaluation_hook (head, tail); | |
2697 } | |
2698 | |
2699 /* Free dependencies of instructions inside BB. */ | |
2700 static void | |
2701 free_block_dependencies (int bb) | |
2702 { | |
2703 rtx head; | |
2704 rtx tail; | |
2705 | |
2706 get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail); | |
2707 | |
2708 sched_free_deps (head, tail, true); | |
2709 } | |
2710 | |
2711 /* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add | |
2712 them to the unused_*_list variables, so that they can be reused. */ | |
2713 | |
2714 static void | |
2715 free_pending_lists (void) | |
2716 { | |
2717 int bb; | |
2718 | |
2719 for (bb = 0; bb < current_nr_blocks; bb++) | |
2720 { | |
2721 free_INSN_LIST_list (&bb_deps[bb].pending_read_insns); | |
2722 free_INSN_LIST_list (&bb_deps[bb].pending_write_insns); | |
2723 free_EXPR_LIST_list (&bb_deps[bb].pending_read_mems); | |
2724 free_EXPR_LIST_list (&bb_deps[bb].pending_write_mems); | |
2725 } | |
2726 } | |
2727 | |
2728 /* Print dependences for debugging starting from FROM_BB. | |
2729 Callable from debugger. */ | |
2730 /* Print dependences for debugging starting from FROM_BB. | |
2731 Callable from debugger. */ | |
2732 void | |
2733 debug_rgn_dependencies (int from_bb) | |
2734 { | |
2735 int bb; | |
2736 | |
2737 fprintf (sched_dump, | |
2738 ";; --------------- forward dependences: ------------ \n"); | |
2739 | |
2740 for (bb = from_bb; bb < current_nr_blocks; bb++) | |
2741 { | |
2742 rtx head, tail; | |
2743 | |
2744 get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail); | |
2745 fprintf (sched_dump, "\n;; --- Region Dependences --- b %d bb %d \n", | |
2746 BB_TO_BLOCK (bb), bb); | |
2747 | |
2748 debug_dependencies (head, tail); | |
2749 } | |
2750 } | |
2751 | |
2752 /* Print dependencies information for instructions between HEAD and TAIL. | |
2753 ??? This function would probably fit best in haifa-sched.c. */ | |
2754 void debug_dependencies (rtx head, rtx tail) | |
2755 { | |
2756 rtx insn; | |
2757 rtx next_tail = NEXT_INSN (tail); | |
2758 | |
2759 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n", | |
2760 "insn", "code", "bb", "dep", "prio", "cost", | |
2761 "reservation"); | |
2762 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n", | |
2763 "----", "----", "--", "---", "----", "----", | |
2764 "-----------"); | |
2765 | |
2766 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn)) | |
2767 { | |
2768 if (! INSN_P (insn)) | |
2769 { | |
2770 int n; | |
2771 fprintf (sched_dump, ";; %6d ", INSN_UID (insn)); | |
2772 if (NOTE_P (insn)) | |
2773 { | |
2774 n = NOTE_KIND (insn); | |
2775 fprintf (sched_dump, "%s\n", GET_NOTE_INSN_NAME (n)); | |
2776 } | |
2777 else | |
2778 fprintf (sched_dump, " {%s}\n", GET_RTX_NAME (GET_CODE (insn))); | |
2779 continue; | |
2780 } | |
2781 | |
2782 fprintf (sched_dump, | |
2783 ";; %s%5d%6d%6d%6d%6d%6d ", | |
2784 (SCHED_GROUP_P (insn) ? "+" : " "), | |
2785 INSN_UID (insn), | |
2786 INSN_CODE (insn), | |
2787 BLOCK_NUM (insn), | |
2788 sched_emulate_haifa_p ? -1 : sd_lists_size (insn, SD_LIST_BACK), | |
2789 (sel_sched_p () ? (sched_emulate_haifa_p ? -1 | |
2790 : INSN_PRIORITY (insn)) | |
2791 : INSN_PRIORITY (insn)), | |
2792 (sel_sched_p () ? (sched_emulate_haifa_p ? -1 | |
2793 : insn_cost (insn)) | |
2794 : insn_cost (insn))); | |
2795 | |
2796 if (recog_memoized (insn) < 0) | |
2797 fprintf (sched_dump, "nothing"); | |
2798 else | |
2799 print_reservation (sched_dump, insn); | |
2800 | |
2801 fprintf (sched_dump, "\t: "); | |
2802 { | |
2803 sd_iterator_def sd_it; | |
2804 dep_t dep; | |
2805 | |
2806 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep) | |
2807 fprintf (sched_dump, "%d ", INSN_UID (DEP_CON (dep))); | |
2808 } | |
2809 fprintf (sched_dump, "\n"); | |
2810 } | |
2811 | |
2812 fprintf (sched_dump, "\n"); | |
2813 } | |
2814 | |
2815 /* Returns true if all the basic blocks of the current region have | |
2816 NOTE_DISABLE_SCHED_OF_BLOCK which means not to schedule that region. */ | |
2817 bool | |
2818 sched_is_disabled_for_current_region_p (void) | |
2819 { | |
2820 int bb; | |
2821 | |
2822 for (bb = 0; bb < current_nr_blocks; bb++) | |
2823 if (!(BASIC_BLOCK (BB_TO_BLOCK (bb))->flags & BB_DISABLE_SCHEDULE)) | |
2824 return false; | |
2825 | |
2826 return true; | |
2827 } | |
2828 | |
2829 /* Free all region dependencies saved in INSN_BACK_DEPS and | |
2830 INSN_RESOLVED_BACK_DEPS. The Haifa scheduler does this on the fly | |
2831 when scheduling, so this function is supposed to be called from | |
2832 the selective scheduling only. */ | |
2833 void | |
2834 free_rgn_deps (void) | |
2835 { | |
2836 int bb; | |
2837 | |
2838 for (bb = 0; bb < current_nr_blocks; bb++) | |
2839 { | |
2840 rtx head, tail; | |
2841 | |
2842 gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb)); | |
2843 get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail); | |
2844 | |
2845 sched_free_deps (head, tail, false); | |
2846 } | |
2847 } | |
2848 | |
2849 static int rgn_n_insns; | |
2850 | |
2851 /* Compute insn priority for a current region. */ | |
2852 void | |
2853 compute_priorities (void) | |
2854 { | |
2855 int bb; | |
2856 | |
2857 current_sched_info->sched_max_insns_priority = 0; | |
2858 for (bb = 0; bb < current_nr_blocks; bb++) | |
2859 { | |
2860 rtx head, tail; | |
2861 | |
2862 gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb)); | |
2863 get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail); | |
2864 | |
2865 rgn_n_insns += set_priorities (head, tail); | |
2866 } | |
2867 current_sched_info->sched_max_insns_priority++; | |
2868 } | |
2869 | |
2870 /* Schedule a region. A region is either an inner loop, a loop-free | |
2871 subroutine, or a single basic block. Each bb in the region is | |
2872 scheduled after its flow predecessors. */ | |
2873 | |
2874 static void | |
2875 schedule_region (int rgn) | |
2876 { | |
2877 int bb; | |
2878 int sched_rgn_n_insns = 0; | |
2879 | |
2880 rgn_n_insns = 0; | |
2881 | |
2882 rgn_setup_region (rgn); | |
2883 | |
2884 /* Don't schedule region that is marked by | |
2885 NOTE_DISABLE_SCHED_OF_BLOCK. */ | |
2886 if (sched_is_disabled_for_current_region_p ()) | |
2887 return; | |
2888 | |
2889 sched_rgn_compute_dependencies (rgn); | |
2890 | |
2891 sched_rgn_local_init (rgn); | |
2892 | |
2893 /* Set priorities. */ | |
2894 compute_priorities (); | |
2895 | |
2896 sched_extend_ready_list (rgn_n_insns); | |
2897 | |
2898 /* Now we can schedule all blocks. */ | |
2899 for (bb = 0; bb < current_nr_blocks; bb++) | |
2900 { | |
2901 basic_block first_bb, last_bb, curr_bb; | |
2902 rtx head, tail; | |
2903 | |
2904 first_bb = EBB_FIRST_BB (bb); | |
2905 last_bb = EBB_LAST_BB (bb); | |
2906 | |
2907 get_ebb_head_tail (first_bb, last_bb, &head, &tail); | |
2908 | |
2909 if (no_real_insns_p (head, tail)) | |
2910 { | |
2911 gcc_assert (first_bb == last_bb); | |
2912 continue; | |
2913 } | |
2914 | |
2915 current_sched_info->prev_head = PREV_INSN (head); | |
2916 current_sched_info->next_tail = NEXT_INSN (tail); | |
2917 | |
2918 remove_notes (head, tail); | |
2919 | |
2920 unlink_bb_notes (first_bb, last_bb); | |
2921 | |
2922 target_bb = bb; | |
2923 | |
2924 gcc_assert (flag_schedule_interblock || current_nr_blocks == 1); | |
2925 current_sched_info->queue_must_finish_empty = current_nr_blocks == 1; | |
2926 | |
2927 curr_bb = first_bb; | |
2928 if (dbg_cnt (sched_block)) | |
2929 { | |
2930 schedule_block (&curr_bb); | |
2931 gcc_assert (EBB_FIRST_BB (bb) == first_bb); | |
2932 sched_rgn_n_insns += sched_n_insns; | |
2933 } | |
2934 else | |
2935 { | |
2936 sched_rgn_n_insns += rgn_n_insns; | |
2937 } | |
2938 | |
2939 /* Clean up. */ | |
2940 if (current_nr_blocks > 1) | |
2941 free_trg_info (); | |
2942 } | |
2943 | |
2944 /* Sanity check: verify that all region insns were scheduled. */ | |
2945 gcc_assert (sched_rgn_n_insns == rgn_n_insns); | |
2946 | |
2947 sched_finish_ready_list (); | |
2948 | |
2949 /* Done with this region. */ | |
2950 sched_rgn_local_finish (); | |
2951 | |
2952 /* Free dependencies. */ | |
2953 for (bb = 0; bb < current_nr_blocks; ++bb) | |
2954 free_block_dependencies (bb); | |
2955 | |
2956 gcc_assert (haifa_recovery_bb_ever_added_p | |
2957 || deps_pools_are_empty_p ()); | |
2958 } | |
2959 | |
2960 /* Initialize data structures for region scheduling. */ | |
2961 | |
2962 void | |
2963 sched_rgn_init (bool single_blocks_p) | |
2964 { | |
2965 min_spec_prob = ((PARAM_VALUE (PARAM_MIN_SPEC_PROB) * REG_BR_PROB_BASE) | |
2966 / 100); | |
2967 | |
2968 nr_inter = 0; | |
2969 nr_spec = 0; | |
2970 | |
2971 extend_regions (); | |
2972 | |
2973 CONTAINING_RGN (ENTRY_BLOCK) = -1; | |
2974 CONTAINING_RGN (EXIT_BLOCK) = -1; | |
2975 | |
2976 /* Compute regions for scheduling. */ | |
2977 if (single_blocks_p | |
2978 || n_basic_blocks == NUM_FIXED_BLOCKS + 1 | |
2979 || !flag_schedule_interblock | |
2980 || is_cfg_nonregular ()) | |
2981 { | |
2982 find_single_block_region (sel_sched_p ()); | |
2983 } | |
2984 else | |
2985 { | |
2986 /* Compute the dominators and post dominators. */ | |
2987 if (!sel_sched_p ()) | |
2988 calculate_dominance_info (CDI_DOMINATORS); | |
2989 | |
2990 /* Find regions. */ | |
2991 find_rgns (); | |
2992 | |
2993 if (sched_verbose >= 3) | |
2994 debug_regions (); | |
2995 | |
2996 /* For now. This will move as more and more of haifa is converted | |
2997 to using the cfg code. */ | |
2998 if (!sel_sched_p ()) | |
2999 free_dominance_info (CDI_DOMINATORS); | |
3000 } | |
3001 | |
3002 gcc_assert (0 < nr_regions && nr_regions <= n_basic_blocks); | |
3003 | |
3004 RGN_BLOCKS (nr_regions) = (RGN_BLOCKS (nr_regions - 1) + | |
3005 RGN_NR_BLOCKS (nr_regions - 1)); | |
3006 } | |
3007 | |
3008 /* Free data structures for region scheduling. */ | |
3009 void | |
3010 sched_rgn_finish (void) | |
3011 { | |
3012 /* Reposition the prologue and epilogue notes in case we moved the | |
3013 prologue/epilogue insns. */ | |
3014 if (reload_completed) | |
3015 reposition_prologue_and_epilogue_notes (); | |
3016 | |
3017 if (sched_verbose) | |
3018 { | |
3019 if (reload_completed == 0 | |
3020 && flag_schedule_interblock) | |
3021 { | |
3022 fprintf (sched_dump, | |
3023 "\n;; Procedure interblock/speculative motions == %d/%d \n", | |
3024 nr_inter, nr_spec); | |
3025 } | |
3026 else | |
3027 gcc_assert (nr_inter <= 0); | |
3028 fprintf (sched_dump, "\n\n"); | |
3029 } | |
3030 | |
3031 nr_regions = 0; | |
3032 | |
3033 free (rgn_table); | |
3034 rgn_table = NULL; | |
3035 | |
3036 free (rgn_bb_table); | |
3037 rgn_bb_table = NULL; | |
3038 | |
3039 free (block_to_bb); | |
3040 block_to_bb = NULL; | |
3041 | |
3042 free (containing_rgn); | |
3043 containing_rgn = NULL; | |
3044 | |
3045 free (ebb_head); | |
3046 ebb_head = NULL; | |
3047 } | |
3048 | |
3049 /* Setup global variables like CURRENT_BLOCKS and CURRENT_NR_BLOCK to | |
3050 point to the region RGN. */ | |
3051 void | |
3052 rgn_setup_region (int rgn) | |
3053 { | |
3054 int bb; | |
3055 | |
3056 /* Set variables for the current region. */ | |
3057 current_nr_blocks = RGN_NR_BLOCKS (rgn); | |
3058 current_blocks = RGN_BLOCKS (rgn); | |
3059 | |
3060 /* EBB_HEAD is a region-scope structure. But we realloc it for | |
3061 each region to save time/memory/something else. | |
3062 See comments in add_block1, for what reasons we allocate +1 element. */ | |
3063 ebb_head = XRESIZEVEC (int, ebb_head, current_nr_blocks + 1); | |
3064 for (bb = 0; bb <= current_nr_blocks; bb++) | |
3065 ebb_head[bb] = current_blocks + bb; | |
3066 } | |
3067 | |
3068 /* Compute instruction dependencies in region RGN. */ | |
3069 void | |
3070 sched_rgn_compute_dependencies (int rgn) | |
3071 { | |
3072 if (!RGN_DONT_CALC_DEPS (rgn)) | |
3073 { | |
3074 int bb; | |
3075 | |
3076 if (sel_sched_p ()) | |
3077 sched_emulate_haifa_p = 1; | |
3078 | |
3079 init_deps_global (); | |
3080 | |
3081 /* Initializations for region data dependence analysis. */ | |
3082 bb_deps = XNEWVEC (struct deps, current_nr_blocks); | |
3083 for (bb = 0; bb < current_nr_blocks; bb++) | |
3084 init_deps (bb_deps + bb); | |
3085 | |
3086 /* Initialize bitmap used in add_branch_dependences. */ | |
3087 insn_referenced = sbitmap_alloc (sched_max_luid); | |
3088 sbitmap_zero (insn_referenced); | |
3089 | |
3090 /* Compute backward dependencies. */ | |
3091 for (bb = 0; bb < current_nr_blocks; bb++) | |
3092 compute_block_dependences (bb); | |
3093 | |
3094 sbitmap_free (insn_referenced); | |
3095 free_pending_lists (); | |
3096 finish_deps_global (); | |
3097 free (bb_deps); | |
3098 | |
3099 /* We don't want to recalculate this twice. */ | |
3100 RGN_DONT_CALC_DEPS (rgn) = 1; | |
3101 | |
3102 if (sel_sched_p ()) | |
3103 sched_emulate_haifa_p = 0; | |
3104 } | |
3105 else | |
3106 /* (This is a recovery block. It is always a single block region.) | |
3107 OR (We use selective scheduling.) */ | |
3108 gcc_assert (current_nr_blocks == 1 || sel_sched_p ()); | |
3109 } | |
3110 | |
3111 /* Init region data structures. Returns true if this region should | |
3112 not be scheduled. */ | |
3113 void | |
3114 sched_rgn_local_init (int rgn) | |
3115 { | |
3116 int bb; | |
3117 | |
3118 /* Compute interblock info: probabilities, split-edges, dominators, etc. */ | |
3119 if (current_nr_blocks > 1) | |
3120 { | |
3121 basic_block block; | |
3122 edge e; | |
3123 edge_iterator ei; | |
3124 | |
3125 prob = XNEWVEC (int, current_nr_blocks); | |
3126 | |
3127 dom = sbitmap_vector_alloc (current_nr_blocks, current_nr_blocks); | |
3128 sbitmap_vector_zero (dom, current_nr_blocks); | |
3129 | |
3130 /* Use ->aux to implement EDGE_TO_BIT mapping. */ | |
3131 rgn_nr_edges = 0; | |
3132 FOR_EACH_BB (block) | |
3133 { | |
3134 if (CONTAINING_RGN (block->index) != rgn) | |
3135 continue; | |
3136 FOR_EACH_EDGE (e, ei, block->succs) | |
3137 SET_EDGE_TO_BIT (e, rgn_nr_edges++); | |
3138 } | |
3139 | |
3140 rgn_edges = XNEWVEC (edge, rgn_nr_edges); | |
3141 rgn_nr_edges = 0; | |
3142 FOR_EACH_BB (block) | |
3143 { | |
3144 if (CONTAINING_RGN (block->index) != rgn) | |
3145 continue; | |
3146 FOR_EACH_EDGE (e, ei, block->succs) | |
3147 rgn_edges[rgn_nr_edges++] = e; | |
3148 } | |
3149 | |
3150 /* Split edges. */ | |
3151 pot_split = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges); | |
3152 sbitmap_vector_zero (pot_split, current_nr_blocks); | |
3153 ancestor_edges = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges); | |
3154 sbitmap_vector_zero (ancestor_edges, current_nr_blocks); | |
3155 | |
3156 /* Compute probabilities, dominators, split_edges. */ | |
3157 for (bb = 0; bb < current_nr_blocks; bb++) | |
3158 compute_dom_prob_ps (bb); | |
3159 | |
3160 /* Cleanup ->aux used for EDGE_TO_BIT mapping. */ | |
3161 /* We don't need them anymore. But we want to avoid duplication of | |
3162 aux fields in the newly created edges. */ | |
3163 FOR_EACH_BB (block) | |
3164 { | |
3165 if (CONTAINING_RGN (block->index) != rgn) | |
3166 continue; | |
3167 FOR_EACH_EDGE (e, ei, block->succs) | |
3168 e->aux = NULL; | |
3169 } | |
3170 } | |
3171 } | |
3172 | |
3173 /* Free data computed for the finished region. */ | |
3174 void | |
3175 sched_rgn_local_free (void) | |
3176 { | |
3177 free (prob); | |
3178 sbitmap_vector_free (dom); | |
3179 sbitmap_vector_free (pot_split); | |
3180 sbitmap_vector_free (ancestor_edges); | |
3181 free (rgn_edges); | |
3182 } | |
3183 | |
3184 /* Free data computed for the finished region. */ | |
3185 void | |
3186 sched_rgn_local_finish (void) | |
3187 { | |
3188 if (current_nr_blocks > 1 && !sel_sched_p ()) | |
3189 { | |
3190 sched_rgn_local_free (); | |
3191 } | |
3192 } | |
3193 | |
3194 /* Setup scheduler infos. */ | |
3195 void | |
3196 rgn_setup_common_sched_info (void) | |
3197 { | |
3198 memcpy (&rgn_common_sched_info, &haifa_common_sched_info, | |
3199 sizeof (rgn_common_sched_info)); | |
3200 | |
3201 rgn_common_sched_info.fix_recovery_cfg = rgn_fix_recovery_cfg; | |
3202 rgn_common_sched_info.add_block = rgn_add_block; | |
3203 rgn_common_sched_info.estimate_number_of_insns | |
3204 = rgn_estimate_number_of_insns; | |
3205 rgn_common_sched_info.sched_pass_id = SCHED_RGN_PASS; | |
3206 | |
3207 common_sched_info = &rgn_common_sched_info; | |
3208 } | |
3209 | |
3210 /* Setup all *_sched_info structures (for the Haifa frontend | |
3211 and for the dependence analysis) in the interblock scheduler. */ | |
3212 void | |
3213 rgn_setup_sched_infos (void) | |
3214 { | |
3215 if (!sel_sched_p ()) | |
3216 memcpy (&rgn_sched_deps_info, &rgn_const_sched_deps_info, | |
3217 sizeof (rgn_sched_deps_info)); | |
3218 else | |
3219 memcpy (&rgn_sched_deps_info, &rgn_const_sel_sched_deps_info, | |
3220 sizeof (rgn_sched_deps_info)); | |
3221 | |
3222 sched_deps_info = &rgn_sched_deps_info; | |
3223 | |
3224 memcpy (&rgn_sched_info, &rgn_const_sched_info, sizeof (rgn_sched_info)); | |
3225 current_sched_info = &rgn_sched_info; | |
3226 } | |
3227 | |
3228 /* The one entry point in this file. */ | |
3229 void | |
3230 schedule_insns (void) | |
3231 { | |
3232 int rgn; | |
3233 | |
3234 /* Taking care of this degenerate case makes the rest of | |
3235 this code simpler. */ | |
3236 if (n_basic_blocks == NUM_FIXED_BLOCKS) | |
3237 return; | |
3238 | |
3239 rgn_setup_common_sched_info (); | |
3240 rgn_setup_sched_infos (); | |
3241 | |
3242 haifa_sched_init (); | |
3243 sched_rgn_init (reload_completed); | |
3244 | |
3245 bitmap_initialize (¬_in_df, 0); | |
3246 bitmap_clear (¬_in_df); | |
3247 | |
3248 /* Schedule every region in the subroutine. */ | |
3249 for (rgn = 0; rgn < nr_regions; rgn++) | |
3250 if (dbg_cnt (sched_region)) | |
3251 schedule_region (rgn); | |
3252 | |
3253 /* Clean up. */ | |
3254 sched_rgn_finish (); | |
3255 bitmap_clear (¬_in_df); | |
3256 | |
3257 haifa_sched_finish (); | |
3258 } | |
3259 | |
3260 /* INSN has been added to/removed from current region. */ | |
3261 static void | |
3262 rgn_add_remove_insn (rtx insn, int remove_p) | |
3263 { | |
3264 if (!remove_p) | |
3265 rgn_n_insns++; | |
3266 else | |
3267 rgn_n_insns--; | |
3268 | |
3269 if (INSN_BB (insn) == target_bb) | |
3270 { | |
3271 if (!remove_p) | |
3272 target_n_insns++; | |
3273 else | |
3274 target_n_insns--; | |
3275 } | |
3276 } | |
3277 | |
3278 /* Extend internal data structures. */ | |
3279 void | |
3280 extend_regions (void) | |
3281 { | |
3282 rgn_table = XRESIZEVEC (region, rgn_table, n_basic_blocks); | |
3283 rgn_bb_table = XRESIZEVEC (int, rgn_bb_table, n_basic_blocks); | |
3284 block_to_bb = XRESIZEVEC (int, block_to_bb, last_basic_block); | |
3285 containing_rgn = XRESIZEVEC (int, containing_rgn, last_basic_block); | |
3286 } | |
3287 | |
3288 void | |
3289 rgn_make_new_region_out_of_new_block (basic_block bb) | |
3290 { | |
3291 int i; | |
3292 | |
3293 i = RGN_BLOCKS (nr_regions); | |
3294 /* I - first free position in rgn_bb_table. */ | |
3295 | |
3296 rgn_bb_table[i] = bb->index; | |
3297 RGN_NR_BLOCKS (nr_regions) = 1; | |
3298 RGN_HAS_REAL_EBB (nr_regions) = 0; | |
3299 RGN_DONT_CALC_DEPS (nr_regions) = 0; | |
3300 CONTAINING_RGN (bb->index) = nr_regions; | |
3301 BLOCK_TO_BB (bb->index) = 0; | |
3302 | |
3303 nr_regions++; | |
3304 | |
3305 RGN_BLOCKS (nr_regions) = i + 1; | |
3306 } | |
3307 | |
3308 /* BB was added to ebb after AFTER. */ | |
3309 static void | |
3310 rgn_add_block (basic_block bb, basic_block after) | |
3311 { | |
3312 extend_regions (); | |
3313 bitmap_set_bit (¬_in_df, bb->index); | |
3314 | |
3315 if (after == 0 || after == EXIT_BLOCK_PTR) | |
3316 { | |
3317 rgn_make_new_region_out_of_new_block (bb); | |
3318 RGN_DONT_CALC_DEPS (nr_regions - 1) = (after == EXIT_BLOCK_PTR); | |
3319 } | |
3320 else | |
3321 { | |
3322 int i, pos; | |
3323 | |
3324 /* We need to fix rgn_table, block_to_bb, containing_rgn | |
3325 and ebb_head. */ | |
3326 | |
3327 BLOCK_TO_BB (bb->index) = BLOCK_TO_BB (after->index); | |
3328 | |
3329 /* We extend ebb_head to one more position to | |
3330 easily find the last position of the last ebb in | |
3331 the current region. Thus, ebb_head[BLOCK_TO_BB (after) + 1] | |
3332 is _always_ valid for access. */ | |
3333 | |
3334 i = BLOCK_TO_BB (after->index) + 1; | |
3335 pos = ebb_head[i] - 1; | |
3336 /* Now POS is the index of the last block in the region. */ | |
3337 | |
3338 /* Find index of basic block AFTER. */ | |
3339 for (; rgn_bb_table[pos] != after->index; pos--); | |
3340 | |
3341 pos++; | |
3342 gcc_assert (pos > ebb_head[i - 1]); | |
3343 | |
3344 /* i - ebb right after "AFTER". */ | |
3345 /* ebb_head[i] - VALID. */ | |
3346 | |
3347 /* Source position: ebb_head[i] | |
3348 Destination position: ebb_head[i] + 1 | |
3349 Last position: | |
3350 RGN_BLOCKS (nr_regions) - 1 | |
3351 Number of elements to copy: (last_position) - (source_position) + 1 | |
3352 */ | |
3353 | |
3354 memmove (rgn_bb_table + pos + 1, | |
3355 rgn_bb_table + pos, | |
3356 ((RGN_BLOCKS (nr_regions) - 1) - (pos) + 1) | |
3357 * sizeof (*rgn_bb_table)); | |
3358 | |
3359 rgn_bb_table[pos] = bb->index; | |
3360 | |
3361 for (; i <= current_nr_blocks; i++) | |
3362 ebb_head [i]++; | |
3363 | |
3364 i = CONTAINING_RGN (after->index); | |
3365 CONTAINING_RGN (bb->index) = i; | |
3366 | |
3367 RGN_HAS_REAL_EBB (i) = 1; | |
3368 | |
3369 for (++i; i <= nr_regions; i++) | |
3370 RGN_BLOCKS (i)++; | |
3371 } | |
3372 } | |
3373 | |
3374 /* Fix internal data after interblock movement of jump instruction. | |
3375 For parameter meaning please refer to | |
3376 sched-int.h: struct sched_info: fix_recovery_cfg. */ | |
3377 static void | |
3378 rgn_fix_recovery_cfg (int bbi, int check_bbi, int check_bb_nexti) | |
3379 { | |
3380 int old_pos, new_pos, i; | |
3381 | |
3382 BLOCK_TO_BB (check_bb_nexti) = BLOCK_TO_BB (bbi); | |
3383 | |
3384 for (old_pos = ebb_head[BLOCK_TO_BB (check_bbi) + 1] - 1; | |
3385 rgn_bb_table[old_pos] != check_bb_nexti; | |
3386 old_pos--); | |
3387 gcc_assert (old_pos > ebb_head[BLOCK_TO_BB (check_bbi)]); | |
3388 | |
3389 for (new_pos = ebb_head[BLOCK_TO_BB (bbi) + 1] - 1; | |
3390 rgn_bb_table[new_pos] != bbi; | |
3391 new_pos--); | |
3392 new_pos++; | |
3393 gcc_assert (new_pos > ebb_head[BLOCK_TO_BB (bbi)]); | |
3394 | |
3395 gcc_assert (new_pos < old_pos); | |
3396 | |
3397 memmove (rgn_bb_table + new_pos + 1, | |
3398 rgn_bb_table + new_pos, | |
3399 (old_pos - new_pos) * sizeof (*rgn_bb_table)); | |
3400 | |
3401 rgn_bb_table[new_pos] = check_bb_nexti; | |
3402 | |
3403 for (i = BLOCK_TO_BB (bbi) + 1; i <= BLOCK_TO_BB (check_bbi); i++) | |
3404 ebb_head[i]++; | |
3405 } | |
3406 | |
3407 /* Return next block in ebb chain. For parameter meaning please refer to | |
3408 sched-int.h: struct sched_info: advance_target_bb. */ | |
3409 static basic_block | |
3410 advance_target_bb (basic_block bb, rtx insn) | |
3411 { | |
3412 if (insn) | |
3413 return 0; | |
3414 | |
3415 gcc_assert (BLOCK_TO_BB (bb->index) == target_bb | |
3416 && BLOCK_TO_BB (bb->next_bb->index) == target_bb); | |
3417 return bb->next_bb; | |
3418 } | |
3419 | |
3420 #endif | |
3421 | |
3422 static bool | |
3423 gate_handle_sched (void) | |
3424 { | |
3425 #ifdef INSN_SCHEDULING | |
3426 return flag_schedule_insns && dbg_cnt (sched_func); | |
3427 #else | |
3428 return 0; | |
3429 #endif | |
3430 } | |
3431 | |
3432 /* Run instruction scheduler. */ | |
3433 static unsigned int | |
3434 rest_of_handle_sched (void) | |
3435 { | |
3436 #ifdef INSN_SCHEDULING | |
3437 if (flag_selective_scheduling | |
3438 && ! maybe_skip_selective_scheduling ()) | |
3439 run_selective_scheduling (); | |
3440 else | |
3441 schedule_insns (); | |
3442 #endif | |
3443 return 0; | |
3444 } | |
3445 | |
3446 static bool | |
3447 gate_handle_sched2 (void) | |
3448 { | |
3449 #ifdef INSN_SCHEDULING | |
3450 return optimize > 0 && flag_schedule_insns_after_reload | |
3451 && dbg_cnt (sched2_func); | |
3452 #else | |
3453 return 0; | |
3454 #endif | |
3455 } | |
3456 | |
3457 /* Run second scheduling pass after reload. */ | |
3458 static unsigned int | |
3459 rest_of_handle_sched2 (void) | |
3460 { | |
3461 #ifdef INSN_SCHEDULING | |
3462 if (flag_selective_scheduling2 | |
3463 && ! maybe_skip_selective_scheduling ()) | |
3464 run_selective_scheduling (); | |
3465 else | |
3466 { | |
3467 /* Do control and data sched analysis again, | |
3468 and write some more of the results to dump file. */ | |
3469 if (flag_sched2_use_superblocks || flag_sched2_use_traces) | |
3470 schedule_ebbs (); | |
3471 else | |
3472 schedule_insns (); | |
3473 } | |
3474 #endif | |
3475 return 0; | |
3476 } | |
3477 | |
3478 struct rtl_opt_pass pass_sched = | |
3479 { | |
3480 { | |
3481 RTL_PASS, | |
3482 "sched1", /* name */ | |
3483 gate_handle_sched, /* gate */ | |
3484 rest_of_handle_sched, /* execute */ | |
3485 NULL, /* sub */ | |
3486 NULL, /* next */ | |
3487 0, /* static_pass_number */ | |
3488 TV_SCHED, /* tv_id */ | |
3489 0, /* properties_required */ | |
3490 0, /* properties_provided */ | |
3491 0, /* properties_destroyed */ | |
3492 0, /* todo_flags_start */ | |
3493 TODO_df_finish | TODO_verify_rtl_sharing | | |
3494 TODO_dump_func | | |
3495 TODO_verify_flow | | |
3496 TODO_ggc_collect /* todo_flags_finish */ | |
3497 } | |
3498 }; | |
3499 | |
3500 struct rtl_opt_pass pass_sched2 = | |
3501 { | |
3502 { | |
3503 RTL_PASS, | |
3504 "sched2", /* name */ | |
3505 gate_handle_sched2, /* gate */ | |
3506 rest_of_handle_sched2, /* execute */ | |
3507 NULL, /* sub */ | |
3508 NULL, /* next */ | |
3509 0, /* static_pass_number */ | |
3510 TV_SCHED2, /* tv_id */ | |
3511 0, /* properties_required */ | |
3512 0, /* properties_provided */ | |
3513 0, /* properties_destroyed */ | |
3514 0, /* todo_flags_start */ | |
3515 TODO_df_finish | TODO_verify_rtl_sharing | | |
3516 TODO_dump_func | | |
3517 TODO_verify_flow | | |
3518 TODO_ggc_collect /* todo_flags_finish */ | |
3519 } | |
3520 }; | |
3521 |