0
|
1 /* Instruction scheduling pass.
|
|
2 Copyright (C) 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000,
|
|
3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
|
|
4 Free Software Foundation, Inc.
|
|
5 Contributed by Michael Tiemann (tiemann@cygnus.com) Enhanced by,
|
|
6 and currently maintained by, Jim Wilson (wilson@cygnus.com)
|
|
7
|
|
8 This file is part of GCC.
|
|
9
|
|
10 GCC is free software; you can redistribute it and/or modify it under
|
|
11 the terms of the GNU General Public License as published by the Free
|
|
12 Software Foundation; either version 3, or (at your option) any later
|
|
13 version.
|
|
14
|
|
15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
17 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
18 for more details.
|
|
19
|
|
20 You should have received a copy of the GNU General Public License
|
|
21 along with GCC; see the file COPYING3. If not see
|
|
22 <http://www.gnu.org/licenses/>. */
|
|
23
|
|
24 /* This pass implements list scheduling within basic blocks. It is
|
|
25 run twice: (1) after flow analysis, but before register allocation,
|
|
26 and (2) after register allocation.
|
|
27
|
|
28 The first run performs interblock scheduling, moving insns between
|
|
29 different blocks in the same "region", and the second runs only
|
|
30 basic block scheduling.
|
|
31
|
|
32 Interblock motions performed are useful motions and speculative
|
|
33 motions, including speculative loads. Motions requiring code
|
|
34 duplication are not supported. The identification of motion type
|
|
35 and the check for validity of speculative motions requires
|
|
36 construction and analysis of the function's control flow graph.
|
|
37
|
|
38 The main entry point for this pass is schedule_insns(), called for
|
|
39 each function. The work of the scheduler is organized in three
|
|
40 levels: (1) function level: insns are subject to splitting,
|
|
41 control-flow-graph is constructed, regions are computed (after
|
|
42 reload, each region is of one block), (2) region level: control
|
|
43 flow graph attributes required for interblock scheduling are
|
|
44 computed (dominators, reachability, etc.), data dependences and
|
|
45 priorities are computed, and (3) block level: insns in the block
|
|
46 are actually scheduled. */
|
|
47
|
|
48 #include "config.h"
|
|
49 #include "system.h"
|
|
50 #include "coretypes.h"
|
|
51 #include "tm.h"
|
|
52 #include "toplev.h"
|
|
53 #include "rtl.h"
|
|
54 #include "tm_p.h"
|
|
55 #include "hard-reg-set.h"
|
|
56 #include "regs.h"
|
|
57 #include "function.h"
|
|
58 #include "flags.h"
|
|
59 #include "insn-config.h"
|
|
60 #include "insn-attr.h"
|
|
61 #include "except.h"
|
|
62 #include "toplev.h"
|
|
63 #include "recog.h"
|
|
64 #include "cfglayout.h"
|
|
65 #include "params.h"
|
|
66 #include "sched-int.h"
|
|
67 #include "sel-sched.h"
|
|
68 #include "target.h"
|
|
69 #include "timevar.h"
|
|
70 #include "tree-pass.h"
|
|
71 #include "dbgcnt.h"
|
|
72
|
|
73 #ifdef INSN_SCHEDULING
|
|
74
|
|
75 /* Some accessor macros for h_i_d members only used within this file. */
|
|
76 #define FED_BY_SPEC_LOAD(INSN) (HID (INSN)->fed_by_spec_load)
|
|
77 #define IS_LOAD_INSN(INSN) (HID (insn)->is_load_insn)
|
|
78
|
|
79 /* nr_inter/spec counts interblock/speculative motion for the function. */
|
|
80 static int nr_inter, nr_spec;
|
|
81
|
|
82 static int is_cfg_nonregular (void);
|
|
83
|
|
84 /* Number of regions in the procedure. */
|
|
85 int nr_regions = 0;
|
|
86
|
|
87 /* Table of region descriptions. */
|
|
88 region *rgn_table = NULL;
|
|
89
|
|
90 /* Array of lists of regions' blocks. */
|
|
91 int *rgn_bb_table = NULL;
|
|
92
|
|
93 /* Topological order of blocks in the region (if b2 is reachable from
|
|
94 b1, block_to_bb[b2] > block_to_bb[b1]). Note: A basic block is
|
|
95 always referred to by either block or b, while its topological
|
|
96 order name (in the region) is referred to by bb. */
|
|
97 int *block_to_bb = NULL;
|
|
98
|
|
99 /* The number of the region containing a block. */
|
|
100 int *containing_rgn = NULL;
|
|
101
|
|
102 /* ebb_head [i] - is index in rgn_bb_table of the head basic block of i'th ebb.
|
|
103 Currently we can get a ebb only through splitting of currently
|
|
104 scheduling block, therefore, we don't need ebb_head array for every region,
|
|
105 hence, its sufficient to hold it for current one only. */
|
|
106 int *ebb_head = NULL;
|
|
107
|
|
108 /* The minimum probability of reaching a source block so that it will be
|
|
109 considered for speculative scheduling. */
|
|
110 static int min_spec_prob;
|
|
111
|
|
112 static void find_single_block_region (bool);
|
|
113 static void find_rgns (void);
|
|
114 static bool too_large (int, int *, int *);
|
|
115
|
|
116 /* Blocks of the current region being scheduled. */
|
|
117 int current_nr_blocks;
|
|
118 int current_blocks;
|
|
119
|
|
120 /* A speculative motion requires checking live information on the path
|
|
121 from 'source' to 'target'. The split blocks are those to be checked.
|
|
122 After a speculative motion, live information should be modified in
|
|
123 the 'update' blocks.
|
|
124
|
|
125 Lists of split and update blocks for each candidate of the current
|
|
126 target are in array bblst_table. */
|
|
127 static basic_block *bblst_table;
|
|
128 static int bblst_size, bblst_last;
|
|
129
|
|
130 /* Target info declarations.
|
|
131
|
|
132 The block currently being scheduled is referred to as the "target" block,
|
|
133 while other blocks in the region from which insns can be moved to the
|
|
134 target are called "source" blocks. The candidate structure holds info
|
|
135 about such sources: are they valid? Speculative? Etc. */
|
|
136 typedef struct
|
|
137 {
|
|
138 basic_block *first_member;
|
|
139 int nr_members;
|
|
140 }
|
|
141 bblst;
|
|
142
|
|
143 typedef struct
|
|
144 {
|
|
145 char is_valid;
|
|
146 char is_speculative;
|
|
147 int src_prob;
|
|
148 bblst split_bbs;
|
|
149 bblst update_bbs;
|
|
150 }
|
|
151 candidate;
|
|
152
|
|
153 static candidate *candidate_table;
|
|
154 #define IS_VALID(src) (candidate_table[src].is_valid)
|
|
155 #define IS_SPECULATIVE(src) (candidate_table[src].is_speculative)
|
|
156 #define IS_SPECULATIVE_INSN(INSN) \
|
|
157 (IS_SPECULATIVE (BLOCK_TO_BB (BLOCK_NUM (INSN))))
|
|
158 #define SRC_PROB(src) ( candidate_table[src].src_prob )
|
|
159
|
|
160 /* The bb being currently scheduled. */
|
|
161 int target_bb;
|
|
162
|
|
163 /* List of edges. */
|
|
164 typedef struct
|
|
165 {
|
|
166 edge *first_member;
|
|
167 int nr_members;
|
|
168 }
|
|
169 edgelst;
|
|
170
|
|
171 static edge *edgelst_table;
|
|
172 static int edgelst_last;
|
|
173
|
|
174 static void extract_edgelst (sbitmap, edgelst *);
|
|
175
|
|
176 /* Target info functions. */
|
|
177 static void split_edges (int, int, edgelst *);
|
|
178 static void compute_trg_info (int);
|
|
179 void debug_candidate (int);
|
|
180 void debug_candidates (int);
|
|
181
|
|
182 /* Dominators array: dom[i] contains the sbitmap of dominators of
|
|
183 bb i in the region. */
|
|
184 static sbitmap *dom;
|
|
185
|
|
186 /* bb 0 is the only region entry. */
|
|
187 #define IS_RGN_ENTRY(bb) (!bb)
|
|
188
|
|
189 /* Is bb_src dominated by bb_trg. */
|
|
190 #define IS_DOMINATED(bb_src, bb_trg) \
|
|
191 ( TEST_BIT (dom[bb_src], bb_trg) )
|
|
192
|
|
193 /* Probability: Prob[i] is an int in [0, REG_BR_PROB_BASE] which is
|
|
194 the probability of bb i relative to the region entry. */
|
|
195 static int *prob;
|
|
196
|
|
197 /* Bit-set of edges, where bit i stands for edge i. */
|
|
198 typedef sbitmap edgeset;
|
|
199
|
|
200 /* Number of edges in the region. */
|
|
201 static int rgn_nr_edges;
|
|
202
|
|
203 /* Array of size rgn_nr_edges. */
|
|
204 static edge *rgn_edges;
|
|
205
|
|
206 /* Mapping from each edge in the graph to its number in the rgn. */
|
|
207 #define EDGE_TO_BIT(edge) ((int)(size_t)(edge)->aux)
|
|
208 #define SET_EDGE_TO_BIT(edge,nr) ((edge)->aux = (void *)(size_t)(nr))
|
|
209
|
|
210 /* The split edges of a source bb is different for each target
|
|
211 bb. In order to compute this efficiently, the 'potential-split edges'
|
|
212 are computed for each bb prior to scheduling a region. This is actually
|
|
213 the split edges of each bb relative to the region entry.
|
|
214
|
|
215 pot_split[bb] is the set of potential split edges of bb. */
|
|
216 static edgeset *pot_split;
|
|
217
|
|
218 /* For every bb, a set of its ancestor edges. */
|
|
219 static edgeset *ancestor_edges;
|
|
220
|
|
221 #define INSN_PROBABILITY(INSN) (SRC_PROB (BLOCK_TO_BB (BLOCK_NUM (INSN))))
|
|
222
|
|
223 /* Speculative scheduling functions. */
|
|
224 static int check_live_1 (int, rtx);
|
|
225 static void update_live_1 (int, rtx);
|
|
226 static int is_pfree (rtx, int, int);
|
|
227 static int find_conditional_protection (rtx, int);
|
|
228 static int is_conditionally_protected (rtx, int, int);
|
|
229 static int is_prisky (rtx, int, int);
|
|
230 static int is_exception_free (rtx, int, int);
|
|
231
|
|
232 static bool sets_likely_spilled (rtx);
|
|
233 static void sets_likely_spilled_1 (rtx, const_rtx, void *);
|
|
234 static void add_branch_dependences (rtx, rtx);
|
|
235 static void compute_block_dependences (int);
|
|
236
|
|
237 static void schedule_region (int);
|
|
238 static rtx concat_INSN_LIST (rtx, rtx);
|
|
239 static void concat_insn_mem_list (rtx, rtx, rtx *, rtx *);
|
|
240 static void propagate_deps (int, struct deps *);
|
|
241 static void free_pending_lists (void);
|
|
242
|
|
243 /* Functions for construction of the control flow graph. */
|
|
244
|
|
245 /* Return 1 if control flow graph should not be constructed, 0 otherwise.
|
|
246
|
|
247 We decide not to build the control flow graph if there is possibly more
|
|
248 than one entry to the function, if computed branches exist, if we
|
|
249 have nonlocal gotos, or if we have an unreachable loop. */
|
|
250
|
|
251 static int
|
|
252 is_cfg_nonregular (void)
|
|
253 {
|
|
254 basic_block b;
|
|
255 rtx insn;
|
|
256
|
|
257 /* If we have a label that could be the target of a nonlocal goto, then
|
|
258 the cfg is not well structured. */
|
|
259 if (nonlocal_goto_handler_labels)
|
|
260 return 1;
|
|
261
|
|
262 /* If we have any forced labels, then the cfg is not well structured. */
|
|
263 if (forced_labels)
|
|
264 return 1;
|
|
265
|
|
266 /* If we have exception handlers, then we consider the cfg not well
|
|
267 structured. ?!? We should be able to handle this now that we
|
|
268 compute an accurate cfg for EH. */
|
|
269 if (current_function_has_exception_handlers ())
|
|
270 return 1;
|
|
271
|
|
272 /* If we have insns which refer to labels as non-jumped-to operands,
|
|
273 then we consider the cfg not well structured. */
|
|
274 FOR_EACH_BB (b)
|
|
275 FOR_BB_INSNS (b, insn)
|
|
276 {
|
|
277 rtx note, next, set, dest;
|
|
278
|
|
279 /* If this function has a computed jump, then we consider the cfg
|
|
280 not well structured. */
|
|
281 if (JUMP_P (insn) && computed_jump_p (insn))
|
|
282 return 1;
|
|
283
|
|
284 if (!INSN_P (insn))
|
|
285 continue;
|
|
286
|
|
287 note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX);
|
|
288 if (note == NULL_RTX)
|
|
289 continue;
|
|
290
|
|
291 /* For that label not to be seen as a referred-to label, this
|
|
292 must be a single-set which is feeding a jump *only*. This
|
|
293 could be a conditional jump with the label split off for
|
|
294 machine-specific reasons or a casesi/tablejump. */
|
|
295 next = next_nonnote_insn (insn);
|
|
296 if (next == NULL_RTX
|
|
297 || !JUMP_P (next)
|
|
298 || (JUMP_LABEL (next) != XEXP (note, 0)
|
|
299 && find_reg_note (next, REG_LABEL_TARGET,
|
|
300 XEXP (note, 0)) == NULL_RTX)
|
|
301 || BLOCK_FOR_INSN (insn) != BLOCK_FOR_INSN (next))
|
|
302 return 1;
|
|
303
|
|
304 set = single_set (insn);
|
|
305 if (set == NULL_RTX)
|
|
306 return 1;
|
|
307
|
|
308 dest = SET_DEST (set);
|
|
309 if (!REG_P (dest) || !dead_or_set_p (next, dest))
|
|
310 return 1;
|
|
311 }
|
|
312
|
|
313 /* Unreachable loops with more than one basic block are detected
|
|
314 during the DFS traversal in find_rgns.
|
|
315
|
|
316 Unreachable loops with a single block are detected here. This
|
|
317 test is redundant with the one in find_rgns, but it's much
|
|
318 cheaper to go ahead and catch the trivial case here. */
|
|
319 FOR_EACH_BB (b)
|
|
320 {
|
|
321 if (EDGE_COUNT (b->preds) == 0
|
|
322 || (single_pred_p (b)
|
|
323 && single_pred (b) == b))
|
|
324 return 1;
|
|
325 }
|
|
326
|
|
327 /* All the tests passed. Consider the cfg well structured. */
|
|
328 return 0;
|
|
329 }
|
|
330
|
|
331 /* Extract list of edges from a bitmap containing EDGE_TO_BIT bits. */
|
|
332
|
|
333 static void
|
|
334 extract_edgelst (sbitmap set, edgelst *el)
|
|
335 {
|
|
336 unsigned int i = 0;
|
|
337 sbitmap_iterator sbi;
|
|
338
|
|
339 /* edgelst table space is reused in each call to extract_edgelst. */
|
|
340 edgelst_last = 0;
|
|
341
|
|
342 el->first_member = &edgelst_table[edgelst_last];
|
|
343 el->nr_members = 0;
|
|
344
|
|
345 /* Iterate over each word in the bitset. */
|
|
346 EXECUTE_IF_SET_IN_SBITMAP (set, 0, i, sbi)
|
|
347 {
|
|
348 edgelst_table[edgelst_last++] = rgn_edges[i];
|
|
349 el->nr_members++;
|
|
350 }
|
|
351 }
|
|
352
|
|
353 /* Functions for the construction of regions. */
|
|
354
|
|
355 /* Print the regions, for debugging purposes. Callable from debugger. */
|
|
356
|
|
357 void
|
|
358 debug_regions (void)
|
|
359 {
|
|
360 int rgn, bb;
|
|
361
|
|
362 fprintf (sched_dump, "\n;; ------------ REGIONS ----------\n\n");
|
|
363 for (rgn = 0; rgn < nr_regions; rgn++)
|
|
364 {
|
|
365 fprintf (sched_dump, ";;\trgn %d nr_blocks %d:\n", rgn,
|
|
366 rgn_table[rgn].rgn_nr_blocks);
|
|
367 fprintf (sched_dump, ";;\tbb/block: ");
|
|
368
|
|
369 /* We don't have ebb_head initialized yet, so we can't use
|
|
370 BB_TO_BLOCK (). */
|
|
371 current_blocks = RGN_BLOCKS (rgn);
|
|
372
|
|
373 for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
|
|
374 fprintf (sched_dump, " %d/%d ", bb, rgn_bb_table[current_blocks + bb]);
|
|
375
|
|
376 fprintf (sched_dump, "\n\n");
|
|
377 }
|
|
378 }
|
|
379
|
|
380 /* Print the region's basic blocks. */
|
|
381
|
|
382 void
|
|
383 debug_region (int rgn)
|
|
384 {
|
|
385 int bb;
|
|
386
|
|
387 fprintf (stderr, "\n;; ------------ REGION %d ----------\n\n", rgn);
|
|
388 fprintf (stderr, ";;\trgn %d nr_blocks %d:\n", rgn,
|
|
389 rgn_table[rgn].rgn_nr_blocks);
|
|
390 fprintf (stderr, ";;\tbb/block: ");
|
|
391
|
|
392 /* We don't have ebb_head initialized yet, so we can't use
|
|
393 BB_TO_BLOCK (). */
|
|
394 current_blocks = RGN_BLOCKS (rgn);
|
|
395
|
|
396 for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
|
|
397 fprintf (stderr, " %d/%d ", bb, rgn_bb_table[current_blocks + bb]);
|
|
398
|
|
399 fprintf (stderr, "\n\n");
|
|
400
|
|
401 for (bb = 0; bb < rgn_table[rgn].rgn_nr_blocks; bb++)
|
|
402 {
|
|
403 debug_bb_n_slim (rgn_bb_table[current_blocks + bb]);
|
|
404 fprintf (stderr, "\n");
|
|
405 }
|
|
406
|
|
407 fprintf (stderr, "\n");
|
|
408
|
|
409 }
|
|
410
|
|
411 /* True when a bb with index BB_INDEX contained in region RGN. */
|
|
412 static bool
|
|
413 bb_in_region_p (int bb_index, int rgn)
|
|
414 {
|
|
415 int i;
|
|
416
|
|
417 for (i = 0; i < rgn_table[rgn].rgn_nr_blocks; i++)
|
|
418 if (rgn_bb_table[current_blocks + i] == bb_index)
|
|
419 return true;
|
|
420
|
|
421 return false;
|
|
422 }
|
|
423
|
|
424 /* Dump region RGN to file F using dot syntax. */
|
|
425 void
|
|
426 dump_region_dot (FILE *f, int rgn)
|
|
427 {
|
|
428 int i;
|
|
429
|
|
430 fprintf (f, "digraph Region_%d {\n", rgn);
|
|
431
|
|
432 /* We don't have ebb_head initialized yet, so we can't use
|
|
433 BB_TO_BLOCK (). */
|
|
434 current_blocks = RGN_BLOCKS (rgn);
|
|
435
|
|
436 for (i = 0; i < rgn_table[rgn].rgn_nr_blocks; i++)
|
|
437 {
|
|
438 edge e;
|
|
439 edge_iterator ei;
|
|
440 int src_bb_num = rgn_bb_table[current_blocks + i];
|
|
441 struct basic_block_def *bb = BASIC_BLOCK (src_bb_num);
|
|
442
|
|
443 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
444 if (bb_in_region_p (e->dest->index, rgn))
|
|
445 fprintf (f, "\t%d -> %d\n", src_bb_num, e->dest->index);
|
|
446 }
|
|
447 fprintf (f, "}\n");
|
|
448 }
|
|
449
|
|
450 /* The same, but first open a file specified by FNAME. */
|
|
451 void
|
|
452 dump_region_dot_file (const char *fname, int rgn)
|
|
453 {
|
|
454 FILE *f = fopen (fname, "wt");
|
|
455 dump_region_dot (f, rgn);
|
|
456 fclose (f);
|
|
457 }
|
|
458
|
|
459 /* Build a single block region for each basic block in the function.
|
|
460 This allows for using the same code for interblock and basic block
|
|
461 scheduling. */
|
|
462
|
|
463 static void
|
|
464 find_single_block_region (bool ebbs_p)
|
|
465 {
|
|
466 basic_block bb, ebb_start;
|
|
467 int i = 0;
|
|
468
|
|
469 nr_regions = 0;
|
|
470
|
|
471 if (ebbs_p) {
|
|
472 int probability_cutoff;
|
|
473 if (profile_info && flag_branch_probabilities)
|
|
474 probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY_FEEDBACK);
|
|
475 else
|
|
476 probability_cutoff = PARAM_VALUE (TRACER_MIN_BRANCH_PROBABILITY);
|
|
477 probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff;
|
|
478
|
|
479 FOR_EACH_BB (ebb_start)
|
|
480 {
|
|
481 RGN_NR_BLOCKS (nr_regions) = 0;
|
|
482 RGN_BLOCKS (nr_regions) = i;
|
|
483 RGN_DONT_CALC_DEPS (nr_regions) = 0;
|
|
484 RGN_HAS_REAL_EBB (nr_regions) = 0;
|
|
485
|
|
486 for (bb = ebb_start; ; bb = bb->next_bb)
|
|
487 {
|
|
488 edge e;
|
|
489 edge_iterator ei;
|
|
490
|
|
491 rgn_bb_table[i] = bb->index;
|
|
492 RGN_NR_BLOCKS (nr_regions)++;
|
|
493 CONTAINING_RGN (bb->index) = nr_regions;
|
|
494 BLOCK_TO_BB (bb->index) = i - RGN_BLOCKS (nr_regions);
|
|
495 i++;
|
|
496
|
|
497 if (bb->next_bb == EXIT_BLOCK_PTR
|
|
498 || LABEL_P (BB_HEAD (bb->next_bb)))
|
|
499 break;
|
|
500
|
|
501 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
502 if ((e->flags & EDGE_FALLTHRU) != 0)
|
|
503 break;
|
|
504 if (! e)
|
|
505 break;
|
|
506 if (e->probability <= probability_cutoff)
|
|
507 break;
|
|
508 }
|
|
509
|
|
510 ebb_start = bb;
|
|
511 nr_regions++;
|
|
512 }
|
|
513 }
|
|
514 else
|
|
515 FOR_EACH_BB (bb)
|
|
516 {
|
|
517 rgn_bb_table[nr_regions] = bb->index;
|
|
518 RGN_NR_BLOCKS (nr_regions) = 1;
|
|
519 RGN_BLOCKS (nr_regions) = nr_regions;
|
|
520 RGN_DONT_CALC_DEPS (nr_regions) = 0;
|
|
521 RGN_HAS_REAL_EBB (nr_regions) = 0;
|
|
522
|
|
523 CONTAINING_RGN (bb->index) = nr_regions;
|
|
524 BLOCK_TO_BB (bb->index) = 0;
|
|
525 nr_regions++;
|
|
526 }
|
|
527 }
|
|
528
|
|
529 /* Estimate number of the insns in the BB. */
|
|
530 static int
|
|
531 rgn_estimate_number_of_insns (basic_block bb)
|
|
532 {
|
|
533 return INSN_LUID (BB_END (bb)) - INSN_LUID (BB_HEAD (bb));
|
|
534 }
|
|
535
|
|
536 /* Update number of blocks and the estimate for number of insns
|
|
537 in the region. Return true if the region is "too large" for interblock
|
|
538 scheduling (compile time considerations). */
|
|
539
|
|
540 static bool
|
|
541 too_large (int block, int *num_bbs, int *num_insns)
|
|
542 {
|
|
543 (*num_bbs)++;
|
|
544 (*num_insns) += (common_sched_info->estimate_number_of_insns
|
|
545 (BASIC_BLOCK (block)));
|
|
546
|
|
547 return ((*num_bbs > PARAM_VALUE (PARAM_MAX_SCHED_REGION_BLOCKS))
|
|
548 || (*num_insns > PARAM_VALUE (PARAM_MAX_SCHED_REGION_INSNS)));
|
|
549 }
|
|
550
|
|
551 /* Update_loop_relations(blk, hdr): Check if the loop headed by max_hdr[blk]
|
|
552 is still an inner loop. Put in max_hdr[blk] the header of the most inner
|
|
553 loop containing blk. */
|
|
554 #define UPDATE_LOOP_RELATIONS(blk, hdr) \
|
|
555 { \
|
|
556 if (max_hdr[blk] == -1) \
|
|
557 max_hdr[blk] = hdr; \
|
|
558 else if (dfs_nr[max_hdr[blk]] > dfs_nr[hdr]) \
|
|
559 RESET_BIT (inner, hdr); \
|
|
560 else if (dfs_nr[max_hdr[blk]] < dfs_nr[hdr]) \
|
|
561 { \
|
|
562 RESET_BIT (inner,max_hdr[blk]); \
|
|
563 max_hdr[blk] = hdr; \
|
|
564 } \
|
|
565 }
|
|
566
|
|
567 /* Find regions for interblock scheduling.
|
|
568
|
|
569 A region for scheduling can be:
|
|
570
|
|
571 * A loop-free procedure, or
|
|
572
|
|
573 * A reducible inner loop, or
|
|
574
|
|
575 * A basic block not contained in any other region.
|
|
576
|
|
577 ?!? In theory we could build other regions based on extended basic
|
|
578 blocks or reverse extended basic blocks. Is it worth the trouble?
|
|
579
|
|
580 Loop blocks that form a region are put into the region's block list
|
|
581 in topological order.
|
|
582
|
|
583 This procedure stores its results into the following global (ick) variables
|
|
584
|
|
585 * rgn_nr
|
|
586 * rgn_table
|
|
587 * rgn_bb_table
|
|
588 * block_to_bb
|
|
589 * containing region
|
|
590
|
|
591 We use dominator relationships to avoid making regions out of non-reducible
|
|
592 loops.
|
|
593
|
|
594 This procedure needs to be converted to work on pred/succ lists instead
|
|
595 of edge tables. That would simplify it somewhat. */
|
|
596
|
|
597 static void
|
|
598 haifa_find_rgns (void)
|
|
599 {
|
|
600 int *max_hdr, *dfs_nr, *degree;
|
|
601 char no_loops = 1;
|
|
602 int node, child, loop_head, i, head, tail;
|
|
603 int count = 0, sp, idx = 0;
|
|
604 edge_iterator current_edge;
|
|
605 edge_iterator *stack;
|
|
606 int num_bbs, num_insns, unreachable;
|
|
607 int too_large_failure;
|
|
608 basic_block bb;
|
|
609
|
|
610 /* Note if a block is a natural loop header. */
|
|
611 sbitmap header;
|
|
612
|
|
613 /* Note if a block is a natural inner loop header. */
|
|
614 sbitmap inner;
|
|
615
|
|
616 /* Note if a block is in the block queue. */
|
|
617 sbitmap in_queue;
|
|
618
|
|
619 /* Note if a block is in the block queue. */
|
|
620 sbitmap in_stack;
|
|
621
|
|
622 /* Perform a DFS traversal of the cfg. Identify loop headers, inner loops
|
|
623 and a mapping from block to its loop header (if the block is contained
|
|
624 in a loop, else -1).
|
|
625
|
|
626 Store results in HEADER, INNER, and MAX_HDR respectively, these will
|
|
627 be used as inputs to the second traversal.
|
|
628
|
|
629 STACK, SP and DFS_NR are only used during the first traversal. */
|
|
630
|
|
631 /* Allocate and initialize variables for the first traversal. */
|
|
632 max_hdr = XNEWVEC (int, last_basic_block);
|
|
633 dfs_nr = XCNEWVEC (int, last_basic_block);
|
|
634 stack = XNEWVEC (edge_iterator, n_edges);
|
|
635
|
|
636 inner = sbitmap_alloc (last_basic_block);
|
|
637 sbitmap_ones (inner);
|
|
638
|
|
639 header = sbitmap_alloc (last_basic_block);
|
|
640 sbitmap_zero (header);
|
|
641
|
|
642 in_queue = sbitmap_alloc (last_basic_block);
|
|
643 sbitmap_zero (in_queue);
|
|
644
|
|
645 in_stack = sbitmap_alloc (last_basic_block);
|
|
646 sbitmap_zero (in_stack);
|
|
647
|
|
648 for (i = 0; i < last_basic_block; i++)
|
|
649 max_hdr[i] = -1;
|
|
650
|
|
651 #define EDGE_PASSED(E) (ei_end_p ((E)) || ei_edge ((E))->aux)
|
|
652 #define SET_EDGE_PASSED(E) (ei_edge ((E))->aux = ei_edge ((E)))
|
|
653
|
|
654 /* DFS traversal to find inner loops in the cfg. */
|
|
655
|
|
656 current_edge = ei_start (single_succ (ENTRY_BLOCK_PTR)->succs);
|
|
657 sp = -1;
|
|
658
|
|
659 while (1)
|
|
660 {
|
|
661 if (EDGE_PASSED (current_edge))
|
|
662 {
|
|
663 /* We have reached a leaf node or a node that was already
|
|
664 processed. Pop edges off the stack until we find
|
|
665 an edge that has not yet been processed. */
|
|
666 while (sp >= 0 && EDGE_PASSED (current_edge))
|
|
667 {
|
|
668 /* Pop entry off the stack. */
|
|
669 current_edge = stack[sp--];
|
|
670 node = ei_edge (current_edge)->src->index;
|
|
671 gcc_assert (node != ENTRY_BLOCK);
|
|
672 child = ei_edge (current_edge)->dest->index;
|
|
673 gcc_assert (child != EXIT_BLOCK);
|
|
674 RESET_BIT (in_stack, child);
|
|
675 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
|
|
676 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
|
|
677 ei_next (¤t_edge);
|
|
678 }
|
|
679
|
|
680 /* See if have finished the DFS tree traversal. */
|
|
681 if (sp < 0 && EDGE_PASSED (current_edge))
|
|
682 break;
|
|
683
|
|
684 /* Nope, continue the traversal with the popped node. */
|
|
685 continue;
|
|
686 }
|
|
687
|
|
688 /* Process a node. */
|
|
689 node = ei_edge (current_edge)->src->index;
|
|
690 gcc_assert (node != ENTRY_BLOCK);
|
|
691 SET_BIT (in_stack, node);
|
|
692 dfs_nr[node] = ++count;
|
|
693
|
|
694 /* We don't traverse to the exit block. */
|
|
695 child = ei_edge (current_edge)->dest->index;
|
|
696 if (child == EXIT_BLOCK)
|
|
697 {
|
|
698 SET_EDGE_PASSED (current_edge);
|
|
699 ei_next (¤t_edge);
|
|
700 continue;
|
|
701 }
|
|
702
|
|
703 /* If the successor is in the stack, then we've found a loop.
|
|
704 Mark the loop, if it is not a natural loop, then it will
|
|
705 be rejected during the second traversal. */
|
|
706 if (TEST_BIT (in_stack, child))
|
|
707 {
|
|
708 no_loops = 0;
|
|
709 SET_BIT (header, child);
|
|
710 UPDATE_LOOP_RELATIONS (node, child);
|
|
711 SET_EDGE_PASSED (current_edge);
|
|
712 ei_next (¤t_edge);
|
|
713 continue;
|
|
714 }
|
|
715
|
|
716 /* If the child was already visited, then there is no need to visit
|
|
717 it again. Just update the loop relationships and restart
|
|
718 with a new edge. */
|
|
719 if (dfs_nr[child])
|
|
720 {
|
|
721 if (max_hdr[child] >= 0 && TEST_BIT (in_stack, max_hdr[child]))
|
|
722 UPDATE_LOOP_RELATIONS (node, max_hdr[child]);
|
|
723 SET_EDGE_PASSED (current_edge);
|
|
724 ei_next (¤t_edge);
|
|
725 continue;
|
|
726 }
|
|
727
|
|
728 /* Push an entry on the stack and continue DFS traversal. */
|
|
729 stack[++sp] = current_edge;
|
|
730 SET_EDGE_PASSED (current_edge);
|
|
731 current_edge = ei_start (ei_edge (current_edge)->dest->succs);
|
|
732 }
|
|
733
|
|
734 /* Reset ->aux field used by EDGE_PASSED. */
|
|
735 FOR_ALL_BB (bb)
|
|
736 {
|
|
737 edge_iterator ei;
|
|
738 edge e;
|
|
739 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
740 e->aux = NULL;
|
|
741 }
|
|
742
|
|
743
|
|
744 /* Another check for unreachable blocks. The earlier test in
|
|
745 is_cfg_nonregular only finds unreachable blocks that do not
|
|
746 form a loop.
|
|
747
|
|
748 The DFS traversal will mark every block that is reachable from
|
|
749 the entry node by placing a nonzero value in dfs_nr. Thus if
|
|
750 dfs_nr is zero for any block, then it must be unreachable. */
|
|
751 unreachable = 0;
|
|
752 FOR_EACH_BB (bb)
|
|
753 if (dfs_nr[bb->index] == 0)
|
|
754 {
|
|
755 unreachable = 1;
|
|
756 break;
|
|
757 }
|
|
758
|
|
759 /* Gross. To avoid wasting memory, the second pass uses the dfs_nr array
|
|
760 to hold degree counts. */
|
|
761 degree = dfs_nr;
|
|
762
|
|
763 FOR_EACH_BB (bb)
|
|
764 degree[bb->index] = EDGE_COUNT (bb->preds);
|
|
765
|
|
766 /* Do not perform region scheduling if there are any unreachable
|
|
767 blocks. */
|
|
768 if (!unreachable)
|
|
769 {
|
|
770 int *queue, *degree1 = NULL;
|
|
771 /* We use EXTENDED_RGN_HEADER as an addition to HEADER and put
|
|
772 there basic blocks, which are forced to be region heads.
|
|
773 This is done to try to assemble few smaller regions
|
|
774 from a too_large region. */
|
|
775 sbitmap extended_rgn_header = NULL;
|
|
776 bool extend_regions_p;
|
|
777
|
|
778 if (no_loops)
|
|
779 SET_BIT (header, 0);
|
|
780
|
|
781 /* Second traversal:find reducible inner loops and topologically sort
|
|
782 block of each region. */
|
|
783
|
|
784 queue = XNEWVEC (int, n_basic_blocks);
|
|
785
|
|
786 extend_regions_p = PARAM_VALUE (PARAM_MAX_SCHED_EXTEND_REGIONS_ITERS) > 0;
|
|
787 if (extend_regions_p)
|
|
788 {
|
|
789 degree1 = XNEWVEC (int, last_basic_block);
|
|
790 extended_rgn_header = sbitmap_alloc (last_basic_block);
|
|
791 sbitmap_zero (extended_rgn_header);
|
|
792 }
|
|
793
|
|
794 /* Find blocks which are inner loop headers. We still have non-reducible
|
|
795 loops to consider at this point. */
|
|
796 FOR_EACH_BB (bb)
|
|
797 {
|
|
798 if (TEST_BIT (header, bb->index) && TEST_BIT (inner, bb->index))
|
|
799 {
|
|
800 edge e;
|
|
801 edge_iterator ei;
|
|
802 basic_block jbb;
|
|
803
|
|
804 /* Now check that the loop is reducible. We do this separate
|
|
805 from finding inner loops so that we do not find a reducible
|
|
806 loop which contains an inner non-reducible loop.
|
|
807
|
|
808 A simple way to find reducible/natural loops is to verify
|
|
809 that each block in the loop is dominated by the loop
|
|
810 header.
|
|
811
|
|
812 If there exists a block that is not dominated by the loop
|
|
813 header, then the block is reachable from outside the loop
|
|
814 and thus the loop is not a natural loop. */
|
|
815 FOR_EACH_BB (jbb)
|
|
816 {
|
|
817 /* First identify blocks in the loop, except for the loop
|
|
818 entry block. */
|
|
819 if (bb->index == max_hdr[jbb->index] && bb != jbb)
|
|
820 {
|
|
821 /* Now verify that the block is dominated by the loop
|
|
822 header. */
|
|
823 if (!dominated_by_p (CDI_DOMINATORS, jbb, bb))
|
|
824 break;
|
|
825 }
|
|
826 }
|
|
827
|
|
828 /* If we exited the loop early, then I is the header of
|
|
829 a non-reducible loop and we should quit processing it
|
|
830 now. */
|
|
831 if (jbb != EXIT_BLOCK_PTR)
|
|
832 continue;
|
|
833
|
|
834 /* I is a header of an inner loop, or block 0 in a subroutine
|
|
835 with no loops at all. */
|
|
836 head = tail = -1;
|
|
837 too_large_failure = 0;
|
|
838 loop_head = max_hdr[bb->index];
|
|
839
|
|
840 if (extend_regions_p)
|
|
841 /* We save degree in case when we meet a too_large region
|
|
842 and cancel it. We need a correct degree later when
|
|
843 calling extend_rgns. */
|
|
844 memcpy (degree1, degree, last_basic_block * sizeof (int));
|
|
845
|
|
846 /* Decrease degree of all I's successors for topological
|
|
847 ordering. */
|
|
848 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
849 if (e->dest != EXIT_BLOCK_PTR)
|
|
850 --degree[e->dest->index];
|
|
851
|
|
852 /* Estimate # insns, and count # blocks in the region. */
|
|
853 num_bbs = 1;
|
|
854 num_insns = common_sched_info->estimate_number_of_insns (bb);
|
|
855
|
|
856 /* Find all loop latches (blocks with back edges to the loop
|
|
857 header) or all the leaf blocks in the cfg has no loops.
|
|
858
|
|
859 Place those blocks into the queue. */
|
|
860 if (no_loops)
|
|
861 {
|
|
862 FOR_EACH_BB (jbb)
|
|
863 /* Leaf nodes have only a single successor which must
|
|
864 be EXIT_BLOCK. */
|
|
865 if (single_succ_p (jbb)
|
|
866 && single_succ (jbb) == EXIT_BLOCK_PTR)
|
|
867 {
|
|
868 queue[++tail] = jbb->index;
|
|
869 SET_BIT (in_queue, jbb->index);
|
|
870
|
|
871 if (too_large (jbb->index, &num_bbs, &num_insns))
|
|
872 {
|
|
873 too_large_failure = 1;
|
|
874 break;
|
|
875 }
|
|
876 }
|
|
877 }
|
|
878 else
|
|
879 {
|
|
880 edge e;
|
|
881
|
|
882 FOR_EACH_EDGE (e, ei, bb->preds)
|
|
883 {
|
|
884 if (e->src == ENTRY_BLOCK_PTR)
|
|
885 continue;
|
|
886
|
|
887 node = e->src->index;
|
|
888
|
|
889 if (max_hdr[node] == loop_head && node != bb->index)
|
|
890 {
|
|
891 /* This is a loop latch. */
|
|
892 queue[++tail] = node;
|
|
893 SET_BIT (in_queue, node);
|
|
894
|
|
895 if (too_large (node, &num_bbs, &num_insns))
|
|
896 {
|
|
897 too_large_failure = 1;
|
|
898 break;
|
|
899 }
|
|
900 }
|
|
901 }
|
|
902 }
|
|
903
|
|
904 /* Now add all the blocks in the loop to the queue.
|
|
905
|
|
906 We know the loop is a natural loop; however the algorithm
|
|
907 above will not always mark certain blocks as being in the
|
|
908 loop. Consider:
|
|
909 node children
|
|
910 a b,c
|
|
911 b c
|
|
912 c a,d
|
|
913 d b
|
|
914
|
|
915 The algorithm in the DFS traversal may not mark B & D as part
|
|
916 of the loop (i.e. they will not have max_hdr set to A).
|
|
917
|
|
918 We know they can not be loop latches (else they would have
|
|
919 had max_hdr set since they'd have a backedge to a dominator
|
|
920 block). So we don't need them on the initial queue.
|
|
921
|
|
922 We know they are part of the loop because they are dominated
|
|
923 by the loop header and can be reached by a backwards walk of
|
|
924 the edges starting with nodes on the initial queue.
|
|
925
|
|
926 It is safe and desirable to include those nodes in the
|
|
927 loop/scheduling region. To do so we would need to decrease
|
|
928 the degree of a node if it is the target of a backedge
|
|
929 within the loop itself as the node is placed in the queue.
|
|
930
|
|
931 We do not do this because I'm not sure that the actual
|
|
932 scheduling code will properly handle this case. ?!? */
|
|
933
|
|
934 while (head < tail && !too_large_failure)
|
|
935 {
|
|
936 edge e;
|
|
937 child = queue[++head];
|
|
938
|
|
939 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (child)->preds)
|
|
940 {
|
|
941 node = e->src->index;
|
|
942
|
|
943 /* See discussion above about nodes not marked as in
|
|
944 this loop during the initial DFS traversal. */
|
|
945 if (e->src == ENTRY_BLOCK_PTR
|
|
946 || max_hdr[node] != loop_head)
|
|
947 {
|
|
948 tail = -1;
|
|
949 break;
|
|
950 }
|
|
951 else if (!TEST_BIT (in_queue, node) && node != bb->index)
|
|
952 {
|
|
953 queue[++tail] = node;
|
|
954 SET_BIT (in_queue, node);
|
|
955
|
|
956 if (too_large (node, &num_bbs, &num_insns))
|
|
957 {
|
|
958 too_large_failure = 1;
|
|
959 break;
|
|
960 }
|
|
961 }
|
|
962 }
|
|
963 }
|
|
964
|
|
965 if (tail >= 0 && !too_large_failure)
|
|
966 {
|
|
967 /* Place the loop header into list of region blocks. */
|
|
968 degree[bb->index] = -1;
|
|
969 rgn_bb_table[idx] = bb->index;
|
|
970 RGN_NR_BLOCKS (nr_regions) = num_bbs;
|
|
971 RGN_BLOCKS (nr_regions) = idx++;
|
|
972 RGN_DONT_CALC_DEPS (nr_regions) = 0;
|
|
973 RGN_HAS_REAL_EBB (nr_regions) = 0;
|
|
974 CONTAINING_RGN (bb->index) = nr_regions;
|
|
975 BLOCK_TO_BB (bb->index) = count = 0;
|
|
976
|
|
977 /* Remove blocks from queue[] when their in degree
|
|
978 becomes zero. Repeat until no blocks are left on the
|
|
979 list. This produces a topological list of blocks in
|
|
980 the region. */
|
|
981 while (tail >= 0)
|
|
982 {
|
|
983 if (head < 0)
|
|
984 head = tail;
|
|
985 child = queue[head];
|
|
986 if (degree[child] == 0)
|
|
987 {
|
|
988 edge e;
|
|
989
|
|
990 degree[child] = -1;
|
|
991 rgn_bb_table[idx++] = child;
|
|
992 BLOCK_TO_BB (child) = ++count;
|
|
993 CONTAINING_RGN (child) = nr_regions;
|
|
994 queue[head] = queue[tail--];
|
|
995
|
|
996 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (child)->succs)
|
|
997 if (e->dest != EXIT_BLOCK_PTR)
|
|
998 --degree[e->dest->index];
|
|
999 }
|
|
1000 else
|
|
1001 --head;
|
|
1002 }
|
|
1003 ++nr_regions;
|
|
1004 }
|
|
1005 else if (extend_regions_p)
|
|
1006 {
|
|
1007 /* Restore DEGREE. */
|
|
1008 int *t = degree;
|
|
1009
|
|
1010 degree = degree1;
|
|
1011 degree1 = t;
|
|
1012
|
|
1013 /* And force successors of BB to be region heads.
|
|
1014 This may provide several smaller regions instead
|
|
1015 of one too_large region. */
|
|
1016 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
1017 if (e->dest != EXIT_BLOCK_PTR)
|
|
1018 SET_BIT (extended_rgn_header, e->dest->index);
|
|
1019 }
|
|
1020 }
|
|
1021 }
|
|
1022 free (queue);
|
|
1023
|
|
1024 if (extend_regions_p)
|
|
1025 {
|
|
1026 free (degree1);
|
|
1027
|
|
1028 sbitmap_a_or_b (header, header, extended_rgn_header);
|
|
1029 sbitmap_free (extended_rgn_header);
|
|
1030
|
|
1031 extend_rgns (degree, &idx, header, max_hdr);
|
|
1032 }
|
|
1033 }
|
|
1034
|
|
1035 /* Any block that did not end up in a region is placed into a region
|
|
1036 by itself. */
|
|
1037 FOR_EACH_BB (bb)
|
|
1038 if (degree[bb->index] >= 0)
|
|
1039 {
|
|
1040 rgn_bb_table[idx] = bb->index;
|
|
1041 RGN_NR_BLOCKS (nr_regions) = 1;
|
|
1042 RGN_BLOCKS (nr_regions) = idx++;
|
|
1043 RGN_DONT_CALC_DEPS (nr_regions) = 0;
|
|
1044 RGN_HAS_REAL_EBB (nr_regions) = 0;
|
|
1045 CONTAINING_RGN (bb->index) = nr_regions++;
|
|
1046 BLOCK_TO_BB (bb->index) = 0;
|
|
1047 }
|
|
1048
|
|
1049 free (max_hdr);
|
|
1050 free (degree);
|
|
1051 free (stack);
|
|
1052 sbitmap_free (header);
|
|
1053 sbitmap_free (inner);
|
|
1054 sbitmap_free (in_queue);
|
|
1055 sbitmap_free (in_stack);
|
|
1056 }
|
|
1057
|
|
1058
|
|
1059 /* Wrapper function.
|
|
1060 If FLAG_SEL_SCHED_PIPELINING is set, then use custom function to form
|
|
1061 regions. Otherwise just call find_rgns_haifa. */
|
|
1062 static void
|
|
1063 find_rgns (void)
|
|
1064 {
|
|
1065 if (sel_sched_p () && flag_sel_sched_pipelining)
|
|
1066 sel_find_rgns ();
|
|
1067 else
|
|
1068 haifa_find_rgns ();
|
|
1069 }
|
|
1070
|
|
1071 static int gather_region_statistics (int **);
|
|
1072 static void print_region_statistics (int *, int, int *, int);
|
|
1073
|
|
1074 /* Calculate the histogram that shows the number of regions having the
|
|
1075 given number of basic blocks, and store it in the RSP array. Return
|
|
1076 the size of this array. */
|
|
1077 static int
|
|
1078 gather_region_statistics (int **rsp)
|
|
1079 {
|
|
1080 int i, *a = 0, a_sz = 0;
|
|
1081
|
|
1082 /* a[i] is the number of regions that have (i + 1) basic blocks. */
|
|
1083 for (i = 0; i < nr_regions; i++)
|
|
1084 {
|
|
1085 int nr_blocks = RGN_NR_BLOCKS (i);
|
|
1086
|
|
1087 gcc_assert (nr_blocks >= 1);
|
|
1088
|
|
1089 if (nr_blocks > a_sz)
|
|
1090 {
|
|
1091 a = XRESIZEVEC (int, a, nr_blocks);
|
|
1092 do
|
|
1093 a[a_sz++] = 0;
|
|
1094 while (a_sz != nr_blocks);
|
|
1095 }
|
|
1096
|
|
1097 a[nr_blocks - 1]++;
|
|
1098 }
|
|
1099
|
|
1100 *rsp = a;
|
|
1101 return a_sz;
|
|
1102 }
|
|
1103
|
|
1104 /* Print regions statistics. S1 and S2 denote the data before and after
|
|
1105 calling extend_rgns, respectively. */
|
|
1106 static void
|
|
1107 print_region_statistics (int *s1, int s1_sz, int *s2, int s2_sz)
|
|
1108 {
|
|
1109 int i;
|
|
1110
|
|
1111 /* We iterate until s2_sz because extend_rgns does not decrease
|
|
1112 the maximal region size. */
|
|
1113 for (i = 1; i < s2_sz; i++)
|
|
1114 {
|
|
1115 int n1, n2;
|
|
1116
|
|
1117 n2 = s2[i];
|
|
1118
|
|
1119 if (n2 == 0)
|
|
1120 continue;
|
|
1121
|
|
1122 if (i >= s1_sz)
|
|
1123 n1 = 0;
|
|
1124 else
|
|
1125 n1 = s1[i];
|
|
1126
|
|
1127 fprintf (sched_dump, ";; Region extension statistics: size %d: " \
|
|
1128 "was %d + %d more\n", i + 1, n1, n2 - n1);
|
|
1129 }
|
|
1130 }
|
|
1131
|
|
1132 /* Extend regions.
|
|
1133 DEGREE - Array of incoming edge count, considering only
|
|
1134 the edges, that don't have their sources in formed regions yet.
|
|
1135 IDXP - pointer to the next available index in rgn_bb_table.
|
|
1136 HEADER - set of all region heads.
|
|
1137 LOOP_HDR - mapping from block to the containing loop
|
|
1138 (two blocks can reside within one region if they have
|
|
1139 the same loop header). */
|
|
1140 void
|
|
1141 extend_rgns (int *degree, int *idxp, sbitmap header, int *loop_hdr)
|
|
1142 {
|
|
1143 int *order, i, rescan = 0, idx = *idxp, iter = 0, max_iter, *max_hdr;
|
|
1144 int nblocks = n_basic_blocks - NUM_FIXED_BLOCKS;
|
|
1145
|
|
1146 max_iter = PARAM_VALUE (PARAM_MAX_SCHED_EXTEND_REGIONS_ITERS);
|
|
1147
|
|
1148 max_hdr = XNEWVEC (int, last_basic_block);
|
|
1149
|
|
1150 order = XNEWVEC (int, last_basic_block);
|
|
1151 post_order_compute (order, false, false);
|
|
1152
|
|
1153 for (i = nblocks - 1; i >= 0; i--)
|
|
1154 {
|
|
1155 int bbn = order[i];
|
|
1156 if (degree[bbn] >= 0)
|
|
1157 {
|
|
1158 max_hdr[bbn] = bbn;
|
|
1159 rescan = 1;
|
|
1160 }
|
|
1161 else
|
|
1162 /* This block already was processed in find_rgns. */
|
|
1163 max_hdr[bbn] = -1;
|
|
1164 }
|
|
1165
|
|
1166 /* The idea is to topologically walk through CFG in top-down order.
|
|
1167 During the traversal, if all the predecessors of a node are
|
|
1168 marked to be in the same region (they all have the same max_hdr),
|
|
1169 then current node is also marked to be a part of that region.
|
|
1170 Otherwise the node starts its own region.
|
|
1171 CFG should be traversed until no further changes are made. On each
|
|
1172 iteration the set of the region heads is extended (the set of those
|
|
1173 blocks that have max_hdr[bbi] == bbi). This set is upper bounded by the
|
|
1174 set of all basic blocks, thus the algorithm is guaranteed to
|
|
1175 terminate. */
|
|
1176
|
|
1177 while (rescan && iter < max_iter)
|
|
1178 {
|
|
1179 rescan = 0;
|
|
1180
|
|
1181 for (i = nblocks - 1; i >= 0; i--)
|
|
1182 {
|
|
1183 edge e;
|
|
1184 edge_iterator ei;
|
|
1185 int bbn = order[i];
|
|
1186
|
|
1187 if (max_hdr[bbn] != -1 && !TEST_BIT (header, bbn))
|
|
1188 {
|
|
1189 int hdr = -1;
|
|
1190
|
|
1191 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (bbn)->preds)
|
|
1192 {
|
|
1193 int predn = e->src->index;
|
|
1194
|
|
1195 if (predn != ENTRY_BLOCK
|
|
1196 /* If pred wasn't processed in find_rgns. */
|
|
1197 && max_hdr[predn] != -1
|
|
1198 /* And pred and bb reside in the same loop.
|
|
1199 (Or out of any loop). */
|
|
1200 && loop_hdr[bbn] == loop_hdr[predn])
|
|
1201 {
|
|
1202 if (hdr == -1)
|
|
1203 /* Then bb extends the containing region of pred. */
|
|
1204 hdr = max_hdr[predn];
|
|
1205 else if (hdr != max_hdr[predn])
|
|
1206 /* Too bad, there are at least two predecessors
|
|
1207 that reside in different regions. Thus, BB should
|
|
1208 begin its own region. */
|
|
1209 {
|
|
1210 hdr = bbn;
|
|
1211 break;
|
|
1212 }
|
|
1213 }
|
|
1214 else
|
|
1215 /* BB starts its own region. */
|
|
1216 {
|
|
1217 hdr = bbn;
|
|
1218 break;
|
|
1219 }
|
|
1220 }
|
|
1221
|
|
1222 if (hdr == bbn)
|
|
1223 {
|
|
1224 /* If BB start its own region,
|
|
1225 update set of headers with BB. */
|
|
1226 SET_BIT (header, bbn);
|
|
1227 rescan = 1;
|
|
1228 }
|
|
1229 else
|
|
1230 gcc_assert (hdr != -1);
|
|
1231
|
|
1232 max_hdr[bbn] = hdr;
|
|
1233 }
|
|
1234 }
|
|
1235
|
|
1236 iter++;
|
|
1237 }
|
|
1238
|
|
1239 /* Statistics were gathered on the SPEC2000 package of tests with
|
|
1240 mainline weekly snapshot gcc-4.1-20051015 on ia64.
|
|
1241
|
|
1242 Statistics for SPECint:
|
|
1243 1 iteration : 1751 cases (38.7%)
|
|
1244 2 iterations: 2770 cases (61.3%)
|
|
1245 Blocks wrapped in regions by find_rgns without extension: 18295 blocks
|
|
1246 Blocks wrapped in regions by 2 iterations in extend_rgns: 23821 blocks
|
|
1247 (We don't count single block regions here).
|
|
1248
|
|
1249 Statistics for SPECfp:
|
|
1250 1 iteration : 621 cases (35.9%)
|
|
1251 2 iterations: 1110 cases (64.1%)
|
|
1252 Blocks wrapped in regions by find_rgns without extension: 6476 blocks
|
|
1253 Blocks wrapped in regions by 2 iterations in extend_rgns: 11155 blocks
|
|
1254 (We don't count single block regions here).
|
|
1255
|
|
1256 By default we do at most 2 iterations.
|
|
1257 This can be overridden with max-sched-extend-regions-iters parameter:
|
|
1258 0 - disable region extension,
|
|
1259 N > 0 - do at most N iterations. */
|
|
1260
|
|
1261 if (sched_verbose && iter != 0)
|
|
1262 fprintf (sched_dump, ";; Region extension iterations: %d%s\n", iter,
|
|
1263 rescan ? "... failed" : "");
|
|
1264
|
|
1265 if (!rescan && iter != 0)
|
|
1266 {
|
|
1267 int *s1 = NULL, s1_sz = 0;
|
|
1268
|
|
1269 /* Save the old statistics for later printout. */
|
|
1270 if (sched_verbose >= 6)
|
|
1271 s1_sz = gather_region_statistics (&s1);
|
|
1272
|
|
1273 /* We have succeeded. Now assemble the regions. */
|
|
1274 for (i = nblocks - 1; i >= 0; i--)
|
|
1275 {
|
|
1276 int bbn = order[i];
|
|
1277
|
|
1278 if (max_hdr[bbn] == bbn)
|
|
1279 /* BBN is a region head. */
|
|
1280 {
|
|
1281 edge e;
|
|
1282 edge_iterator ei;
|
|
1283 int num_bbs = 0, j, num_insns = 0, large;
|
|
1284
|
|
1285 large = too_large (bbn, &num_bbs, &num_insns);
|
|
1286
|
|
1287 degree[bbn] = -1;
|
|
1288 rgn_bb_table[idx] = bbn;
|
|
1289 RGN_BLOCKS (nr_regions) = idx++;
|
|
1290 RGN_DONT_CALC_DEPS (nr_regions) = 0;
|
|
1291 RGN_HAS_REAL_EBB (nr_regions) = 0;
|
|
1292 CONTAINING_RGN (bbn) = nr_regions;
|
|
1293 BLOCK_TO_BB (bbn) = 0;
|
|
1294
|
|
1295 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (bbn)->succs)
|
|
1296 if (e->dest != EXIT_BLOCK_PTR)
|
|
1297 degree[e->dest->index]--;
|
|
1298
|
|
1299 if (!large)
|
|
1300 /* Here we check whether the region is too_large. */
|
|
1301 for (j = i - 1; j >= 0; j--)
|
|
1302 {
|
|
1303 int succn = order[j];
|
|
1304 if (max_hdr[succn] == bbn)
|
|
1305 {
|
|
1306 if ((large = too_large (succn, &num_bbs, &num_insns)))
|
|
1307 break;
|
|
1308 }
|
|
1309 }
|
|
1310
|
|
1311 if (large)
|
|
1312 /* If the region is too_large, then wrap every block of
|
|
1313 the region into single block region.
|
|
1314 Here we wrap region head only. Other blocks are
|
|
1315 processed in the below cycle. */
|
|
1316 {
|
|
1317 RGN_NR_BLOCKS (nr_regions) = 1;
|
|
1318 nr_regions++;
|
|
1319 }
|
|
1320
|
|
1321 num_bbs = 1;
|
|
1322
|
|
1323 for (j = i - 1; j >= 0; j--)
|
|
1324 {
|
|
1325 int succn = order[j];
|
|
1326
|
|
1327 if (max_hdr[succn] == bbn)
|
|
1328 /* This cycle iterates over all basic blocks, that
|
|
1329 are supposed to be in the region with head BBN,
|
|
1330 and wraps them into that region (or in single
|
|
1331 block region). */
|
|
1332 {
|
|
1333 gcc_assert (degree[succn] == 0);
|
|
1334
|
|
1335 degree[succn] = -1;
|
|
1336 rgn_bb_table[idx] = succn;
|
|
1337 BLOCK_TO_BB (succn) = large ? 0 : num_bbs++;
|
|
1338 CONTAINING_RGN (succn) = nr_regions;
|
|
1339
|
|
1340 if (large)
|
|
1341 /* Wrap SUCCN into single block region. */
|
|
1342 {
|
|
1343 RGN_BLOCKS (nr_regions) = idx;
|
|
1344 RGN_NR_BLOCKS (nr_regions) = 1;
|
|
1345 RGN_DONT_CALC_DEPS (nr_regions) = 0;
|
|
1346 RGN_HAS_REAL_EBB (nr_regions) = 0;
|
|
1347 nr_regions++;
|
|
1348 }
|
|
1349
|
|
1350 idx++;
|
|
1351
|
|
1352 FOR_EACH_EDGE (e, ei, BASIC_BLOCK (succn)->succs)
|
|
1353 if (e->dest != EXIT_BLOCK_PTR)
|
|
1354 degree[e->dest->index]--;
|
|
1355 }
|
|
1356 }
|
|
1357
|
|
1358 if (!large)
|
|
1359 {
|
|
1360 RGN_NR_BLOCKS (nr_regions) = num_bbs;
|
|
1361 nr_regions++;
|
|
1362 }
|
|
1363 }
|
|
1364 }
|
|
1365
|
|
1366 if (sched_verbose >= 6)
|
|
1367 {
|
|
1368 int *s2, s2_sz;
|
|
1369
|
|
1370 /* Get the new statistics and print the comparison with the
|
|
1371 one before calling this function. */
|
|
1372 s2_sz = gather_region_statistics (&s2);
|
|
1373 print_region_statistics (s1, s1_sz, s2, s2_sz);
|
|
1374 free (s1);
|
|
1375 free (s2);
|
|
1376 }
|
|
1377 }
|
|
1378
|
|
1379 free (order);
|
|
1380 free (max_hdr);
|
|
1381
|
|
1382 *idxp = idx;
|
|
1383 }
|
|
1384
|
|
1385 /* Functions for regions scheduling information. */
|
|
1386
|
|
1387 /* Compute dominators, probability, and potential-split-edges of bb.
|
|
1388 Assume that these values were already computed for bb's predecessors. */
|
|
1389
|
|
1390 static void
|
|
1391 compute_dom_prob_ps (int bb)
|
|
1392 {
|
|
1393 edge_iterator in_ei;
|
|
1394 edge in_edge;
|
|
1395
|
|
1396 /* We shouldn't have any real ebbs yet. */
|
|
1397 gcc_assert (ebb_head [bb] == bb + current_blocks);
|
|
1398
|
|
1399 if (IS_RGN_ENTRY (bb))
|
|
1400 {
|
|
1401 SET_BIT (dom[bb], 0);
|
|
1402 prob[bb] = REG_BR_PROB_BASE;
|
|
1403 return;
|
|
1404 }
|
|
1405
|
|
1406 prob[bb] = 0;
|
|
1407
|
|
1408 /* Initialize dom[bb] to '111..1'. */
|
|
1409 sbitmap_ones (dom[bb]);
|
|
1410
|
|
1411 FOR_EACH_EDGE (in_edge, in_ei, BASIC_BLOCK (BB_TO_BLOCK (bb))->preds)
|
|
1412 {
|
|
1413 int pred_bb;
|
|
1414 edge out_edge;
|
|
1415 edge_iterator out_ei;
|
|
1416
|
|
1417 if (in_edge->src == ENTRY_BLOCK_PTR)
|
|
1418 continue;
|
|
1419
|
|
1420 pred_bb = BLOCK_TO_BB (in_edge->src->index);
|
|
1421 sbitmap_a_and_b (dom[bb], dom[bb], dom[pred_bb]);
|
|
1422 sbitmap_a_or_b (ancestor_edges[bb],
|
|
1423 ancestor_edges[bb], ancestor_edges[pred_bb]);
|
|
1424
|
|
1425 SET_BIT (ancestor_edges[bb], EDGE_TO_BIT (in_edge));
|
|
1426
|
|
1427 sbitmap_a_or_b (pot_split[bb], pot_split[bb], pot_split[pred_bb]);
|
|
1428
|
|
1429 FOR_EACH_EDGE (out_edge, out_ei, in_edge->src->succs)
|
|
1430 SET_BIT (pot_split[bb], EDGE_TO_BIT (out_edge));
|
|
1431
|
|
1432 prob[bb] += ((prob[pred_bb] * in_edge->probability) / REG_BR_PROB_BASE);
|
|
1433 }
|
|
1434
|
|
1435 SET_BIT (dom[bb], bb);
|
|
1436 sbitmap_difference (pot_split[bb], pot_split[bb], ancestor_edges[bb]);
|
|
1437
|
|
1438 if (sched_verbose >= 2)
|
|
1439 fprintf (sched_dump, ";; bb_prob(%d, %d) = %3d\n", bb, BB_TO_BLOCK (bb),
|
|
1440 (100 * prob[bb]) / REG_BR_PROB_BASE);
|
|
1441 }
|
|
1442
|
|
1443 /* Functions for target info. */
|
|
1444
|
|
1445 /* Compute in BL the list of split-edges of bb_src relatively to bb_trg.
|
|
1446 Note that bb_trg dominates bb_src. */
|
|
1447
|
|
1448 static void
|
|
1449 split_edges (int bb_src, int bb_trg, edgelst *bl)
|
|
1450 {
|
|
1451 sbitmap src = sbitmap_alloc (pot_split[bb_src]->n_bits);
|
|
1452 sbitmap_copy (src, pot_split[bb_src]);
|
|
1453
|
|
1454 sbitmap_difference (src, src, pot_split[bb_trg]);
|
|
1455 extract_edgelst (src, bl);
|
|
1456 sbitmap_free (src);
|
|
1457 }
|
|
1458
|
|
1459 /* Find the valid candidate-source-blocks for the target block TRG, compute
|
|
1460 their probability, and check if they are speculative or not.
|
|
1461 For speculative sources, compute their update-blocks and split-blocks. */
|
|
1462
|
|
1463 static void
|
|
1464 compute_trg_info (int trg)
|
|
1465 {
|
|
1466 candidate *sp;
|
|
1467 edgelst el = { NULL, 0 };
|
|
1468 int i, j, k, update_idx;
|
|
1469 basic_block block;
|
|
1470 sbitmap visited;
|
|
1471 edge_iterator ei;
|
|
1472 edge e;
|
|
1473
|
|
1474 candidate_table = XNEWVEC (candidate, current_nr_blocks);
|
|
1475
|
|
1476 bblst_last = 0;
|
|
1477 /* bblst_table holds split blocks and update blocks for each block after
|
|
1478 the current one in the region. split blocks and update blocks are
|
|
1479 the TO blocks of region edges, so there can be at most rgn_nr_edges
|
|
1480 of them. */
|
|
1481 bblst_size = (current_nr_blocks - target_bb) * rgn_nr_edges;
|
|
1482 bblst_table = XNEWVEC (basic_block, bblst_size);
|
|
1483
|
|
1484 edgelst_last = 0;
|
|
1485 edgelst_table = XNEWVEC (edge, rgn_nr_edges);
|
|
1486
|
|
1487 /* Define some of the fields for the target bb as well. */
|
|
1488 sp = candidate_table + trg;
|
|
1489 sp->is_valid = 1;
|
|
1490 sp->is_speculative = 0;
|
|
1491 sp->src_prob = REG_BR_PROB_BASE;
|
|
1492
|
|
1493 visited = sbitmap_alloc (last_basic_block);
|
|
1494
|
|
1495 for (i = trg + 1; i < current_nr_blocks; i++)
|
|
1496 {
|
|
1497 sp = candidate_table + i;
|
|
1498
|
|
1499 sp->is_valid = IS_DOMINATED (i, trg);
|
|
1500 if (sp->is_valid)
|
|
1501 {
|
|
1502 int tf = prob[trg], cf = prob[i];
|
|
1503
|
|
1504 /* In CFGs with low probability edges TF can possibly be zero. */
|
|
1505 sp->src_prob = (tf ? ((cf * REG_BR_PROB_BASE) / tf) : 0);
|
|
1506 sp->is_valid = (sp->src_prob >= min_spec_prob);
|
|
1507 }
|
|
1508
|
|
1509 if (sp->is_valid)
|
|
1510 {
|
|
1511 split_edges (i, trg, &el);
|
|
1512 sp->is_speculative = (el.nr_members) ? 1 : 0;
|
|
1513 if (sp->is_speculative && !flag_schedule_speculative)
|
|
1514 sp->is_valid = 0;
|
|
1515 }
|
|
1516
|
|
1517 if (sp->is_valid)
|
|
1518 {
|
|
1519 /* Compute split blocks and store them in bblst_table.
|
|
1520 The TO block of every split edge is a split block. */
|
|
1521 sp->split_bbs.first_member = &bblst_table[bblst_last];
|
|
1522 sp->split_bbs.nr_members = el.nr_members;
|
|
1523 for (j = 0; j < el.nr_members; bblst_last++, j++)
|
|
1524 bblst_table[bblst_last] = el.first_member[j]->dest;
|
|
1525 sp->update_bbs.first_member = &bblst_table[bblst_last];
|
|
1526
|
|
1527 /* Compute update blocks and store them in bblst_table.
|
|
1528 For every split edge, look at the FROM block, and check
|
|
1529 all out edges. For each out edge that is not a split edge,
|
|
1530 add the TO block to the update block list. This list can end
|
|
1531 up with a lot of duplicates. We need to weed them out to avoid
|
|
1532 overrunning the end of the bblst_table. */
|
|
1533
|
|
1534 update_idx = 0;
|
|
1535 sbitmap_zero (visited);
|
|
1536 for (j = 0; j < el.nr_members; j++)
|
|
1537 {
|
|
1538 block = el.first_member[j]->src;
|
|
1539 FOR_EACH_EDGE (e, ei, block->succs)
|
|
1540 {
|
|
1541 if (!TEST_BIT (visited, e->dest->index))
|
|
1542 {
|
|
1543 for (k = 0; k < el.nr_members; k++)
|
|
1544 if (e == el.first_member[k])
|
|
1545 break;
|
|
1546
|
|
1547 if (k >= el.nr_members)
|
|
1548 {
|
|
1549 bblst_table[bblst_last++] = e->dest;
|
|
1550 SET_BIT (visited, e->dest->index);
|
|
1551 update_idx++;
|
|
1552 }
|
|
1553 }
|
|
1554 }
|
|
1555 }
|
|
1556 sp->update_bbs.nr_members = update_idx;
|
|
1557
|
|
1558 /* Make sure we didn't overrun the end of bblst_table. */
|
|
1559 gcc_assert (bblst_last <= bblst_size);
|
|
1560 }
|
|
1561 else
|
|
1562 {
|
|
1563 sp->split_bbs.nr_members = sp->update_bbs.nr_members = 0;
|
|
1564
|
|
1565 sp->is_speculative = 0;
|
|
1566 sp->src_prob = 0;
|
|
1567 }
|
|
1568 }
|
|
1569
|
|
1570 sbitmap_free (visited);
|
|
1571 }
|
|
1572
|
|
1573 /* Free the computed target info. */
|
|
1574 static void
|
|
1575 free_trg_info (void)
|
|
1576 {
|
|
1577 free (candidate_table);
|
|
1578 free (bblst_table);
|
|
1579 free (edgelst_table);
|
|
1580 }
|
|
1581
|
|
1582 /* Print candidates info, for debugging purposes. Callable from debugger. */
|
|
1583
|
|
1584 void
|
|
1585 debug_candidate (int i)
|
|
1586 {
|
|
1587 if (!candidate_table[i].is_valid)
|
|
1588 return;
|
|
1589
|
|
1590 if (candidate_table[i].is_speculative)
|
|
1591 {
|
|
1592 int j;
|
|
1593 fprintf (sched_dump, "src b %d bb %d speculative \n", BB_TO_BLOCK (i), i);
|
|
1594
|
|
1595 fprintf (sched_dump, "split path: ");
|
|
1596 for (j = 0; j < candidate_table[i].split_bbs.nr_members; j++)
|
|
1597 {
|
|
1598 int b = candidate_table[i].split_bbs.first_member[j]->index;
|
|
1599
|
|
1600 fprintf (sched_dump, " %d ", b);
|
|
1601 }
|
|
1602 fprintf (sched_dump, "\n");
|
|
1603
|
|
1604 fprintf (sched_dump, "update path: ");
|
|
1605 for (j = 0; j < candidate_table[i].update_bbs.nr_members; j++)
|
|
1606 {
|
|
1607 int b = candidate_table[i].update_bbs.first_member[j]->index;
|
|
1608
|
|
1609 fprintf (sched_dump, " %d ", b);
|
|
1610 }
|
|
1611 fprintf (sched_dump, "\n");
|
|
1612 }
|
|
1613 else
|
|
1614 {
|
|
1615 fprintf (sched_dump, " src %d equivalent\n", BB_TO_BLOCK (i));
|
|
1616 }
|
|
1617 }
|
|
1618
|
|
1619 /* Print candidates info, for debugging purposes. Callable from debugger. */
|
|
1620
|
|
1621 void
|
|
1622 debug_candidates (int trg)
|
|
1623 {
|
|
1624 int i;
|
|
1625
|
|
1626 fprintf (sched_dump, "----------- candidate table: target: b=%d bb=%d ---\n",
|
|
1627 BB_TO_BLOCK (trg), trg);
|
|
1628 for (i = trg + 1; i < current_nr_blocks; i++)
|
|
1629 debug_candidate (i);
|
|
1630 }
|
|
1631
|
|
1632 /* Functions for speculative scheduling. */
|
|
1633
|
|
1634 static bitmap_head not_in_df;
|
|
1635
|
|
1636 /* Return 0 if x is a set of a register alive in the beginning of one
|
|
1637 of the split-blocks of src, otherwise return 1. */
|
|
1638
|
|
1639 static int
|
|
1640 check_live_1 (int src, rtx x)
|
|
1641 {
|
|
1642 int i;
|
|
1643 int regno;
|
|
1644 rtx reg = SET_DEST (x);
|
|
1645
|
|
1646 if (reg == 0)
|
|
1647 return 1;
|
|
1648
|
|
1649 while (GET_CODE (reg) == SUBREG
|
|
1650 || GET_CODE (reg) == ZERO_EXTRACT
|
|
1651 || GET_CODE (reg) == STRICT_LOW_PART)
|
|
1652 reg = XEXP (reg, 0);
|
|
1653
|
|
1654 if (GET_CODE (reg) == PARALLEL)
|
|
1655 {
|
|
1656 int i;
|
|
1657
|
|
1658 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
|
|
1659 if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
|
|
1660 if (check_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0)))
|
|
1661 return 1;
|
|
1662
|
|
1663 return 0;
|
|
1664 }
|
|
1665
|
|
1666 if (!REG_P (reg))
|
|
1667 return 1;
|
|
1668
|
|
1669 regno = REGNO (reg);
|
|
1670
|
|
1671 if (regno < FIRST_PSEUDO_REGISTER && global_regs[regno])
|
|
1672 {
|
|
1673 /* Global registers are assumed live. */
|
|
1674 return 0;
|
|
1675 }
|
|
1676 else
|
|
1677 {
|
|
1678 if (regno < FIRST_PSEUDO_REGISTER)
|
|
1679 {
|
|
1680 /* Check for hard registers. */
|
|
1681 int j = hard_regno_nregs[regno][GET_MODE (reg)];
|
|
1682 while (--j >= 0)
|
|
1683 {
|
|
1684 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
|
|
1685 {
|
|
1686 basic_block b = candidate_table[src].split_bbs.first_member[i];
|
|
1687 int t = bitmap_bit_p (¬_in_df, b->index);
|
|
1688
|
|
1689 /* We can have split blocks, that were recently generated.
|
|
1690 Such blocks are always outside current region. */
|
|
1691 gcc_assert (!t || (CONTAINING_RGN (b->index)
|
|
1692 != CONTAINING_RGN (BB_TO_BLOCK (src))));
|
|
1693
|
|
1694 if (t || REGNO_REG_SET_P (df_get_live_in (b), regno + j))
|
|
1695 return 0;
|
|
1696 }
|
|
1697 }
|
|
1698 }
|
|
1699 else
|
|
1700 {
|
|
1701 /* Check for pseudo registers. */
|
|
1702 for (i = 0; i < candidate_table[src].split_bbs.nr_members; i++)
|
|
1703 {
|
|
1704 basic_block b = candidate_table[src].split_bbs.first_member[i];
|
|
1705 int t = bitmap_bit_p (¬_in_df, b->index);
|
|
1706
|
|
1707 gcc_assert (!t || (CONTAINING_RGN (b->index)
|
|
1708 != CONTAINING_RGN (BB_TO_BLOCK (src))));
|
|
1709
|
|
1710 if (t || REGNO_REG_SET_P (df_get_live_in (b), regno))
|
|
1711 return 0;
|
|
1712 }
|
|
1713 }
|
|
1714 }
|
|
1715
|
|
1716 return 1;
|
|
1717 }
|
|
1718
|
|
1719 /* If x is a set of a register R, mark that R is alive in the beginning
|
|
1720 of every update-block of src. */
|
|
1721
|
|
1722 static void
|
|
1723 update_live_1 (int src, rtx x)
|
|
1724 {
|
|
1725 int i;
|
|
1726 int regno;
|
|
1727 rtx reg = SET_DEST (x);
|
|
1728
|
|
1729 if (reg == 0)
|
|
1730 return;
|
|
1731
|
|
1732 while (GET_CODE (reg) == SUBREG
|
|
1733 || GET_CODE (reg) == ZERO_EXTRACT
|
|
1734 || GET_CODE (reg) == STRICT_LOW_PART)
|
|
1735 reg = XEXP (reg, 0);
|
|
1736
|
|
1737 if (GET_CODE (reg) == PARALLEL)
|
|
1738 {
|
|
1739 int i;
|
|
1740
|
|
1741 for (i = XVECLEN (reg, 0) - 1; i >= 0; i--)
|
|
1742 if (XEXP (XVECEXP (reg, 0, i), 0) != 0)
|
|
1743 update_live_1 (src, XEXP (XVECEXP (reg, 0, i), 0));
|
|
1744
|
|
1745 return;
|
|
1746 }
|
|
1747
|
|
1748 if (!REG_P (reg))
|
|
1749 return;
|
|
1750
|
|
1751 /* Global registers are always live, so the code below does not apply
|
|
1752 to them. */
|
|
1753
|
|
1754 regno = REGNO (reg);
|
|
1755
|
|
1756 if (regno >= FIRST_PSEUDO_REGISTER || !global_regs[regno])
|
|
1757 {
|
|
1758 if (regno < FIRST_PSEUDO_REGISTER)
|
|
1759 {
|
|
1760 int j = hard_regno_nregs[regno][GET_MODE (reg)];
|
|
1761 while (--j >= 0)
|
|
1762 {
|
|
1763 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
|
|
1764 {
|
|
1765 basic_block b = candidate_table[src].update_bbs.first_member[i];
|
|
1766
|
|
1767 SET_REGNO_REG_SET (df_get_live_in (b), regno + j);
|
|
1768 }
|
|
1769 }
|
|
1770 }
|
|
1771 else
|
|
1772 {
|
|
1773 for (i = 0; i < candidate_table[src].update_bbs.nr_members; i++)
|
|
1774 {
|
|
1775 basic_block b = candidate_table[src].update_bbs.first_member[i];
|
|
1776
|
|
1777 SET_REGNO_REG_SET (df_get_live_in (b), regno);
|
|
1778 }
|
|
1779 }
|
|
1780 }
|
|
1781 }
|
|
1782
|
|
1783 /* Return 1 if insn can be speculatively moved from block src to trg,
|
|
1784 otherwise return 0. Called before first insertion of insn to
|
|
1785 ready-list or before the scheduling. */
|
|
1786
|
|
1787 static int
|
|
1788 check_live (rtx insn, int src)
|
|
1789 {
|
|
1790 /* Find the registers set by instruction. */
|
|
1791 if (GET_CODE (PATTERN (insn)) == SET
|
|
1792 || GET_CODE (PATTERN (insn)) == CLOBBER)
|
|
1793 return check_live_1 (src, PATTERN (insn));
|
|
1794 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
|
|
1795 {
|
|
1796 int j;
|
|
1797 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
|
|
1798 if ((GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
|
|
1799 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
|
|
1800 && !check_live_1 (src, XVECEXP (PATTERN (insn), 0, j)))
|
|
1801 return 0;
|
|
1802
|
|
1803 return 1;
|
|
1804 }
|
|
1805
|
|
1806 return 1;
|
|
1807 }
|
|
1808
|
|
1809 /* Update the live registers info after insn was moved speculatively from
|
|
1810 block src to trg. */
|
|
1811
|
|
1812 static void
|
|
1813 update_live (rtx insn, int src)
|
|
1814 {
|
|
1815 /* Find the registers set by instruction. */
|
|
1816 if (GET_CODE (PATTERN (insn)) == SET
|
|
1817 || GET_CODE (PATTERN (insn)) == CLOBBER)
|
|
1818 update_live_1 (src, PATTERN (insn));
|
|
1819 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
|
|
1820 {
|
|
1821 int j;
|
|
1822 for (j = XVECLEN (PATTERN (insn), 0) - 1; j >= 0; j--)
|
|
1823 if (GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == SET
|
|
1824 || GET_CODE (XVECEXP (PATTERN (insn), 0, j)) == CLOBBER)
|
|
1825 update_live_1 (src, XVECEXP (PATTERN (insn), 0, j));
|
|
1826 }
|
|
1827 }
|
|
1828
|
|
1829 /* Nonzero if block bb_to is equal to, or reachable from block bb_from. */
|
|
1830 #define IS_REACHABLE(bb_from, bb_to) \
|
|
1831 (bb_from == bb_to \
|
|
1832 || IS_RGN_ENTRY (bb_from) \
|
|
1833 || (TEST_BIT (ancestor_edges[bb_to], \
|
|
1834 EDGE_TO_BIT (single_pred_edge (BASIC_BLOCK (BB_TO_BLOCK (bb_from)))))))
|
|
1835
|
|
1836 /* Turns on the fed_by_spec_load flag for insns fed by load_insn. */
|
|
1837
|
|
1838 static void
|
|
1839 set_spec_fed (rtx load_insn)
|
|
1840 {
|
|
1841 sd_iterator_def sd_it;
|
|
1842 dep_t dep;
|
|
1843
|
|
1844 FOR_EACH_DEP (load_insn, SD_LIST_FORW, sd_it, dep)
|
|
1845 if (DEP_TYPE (dep) == REG_DEP_TRUE)
|
|
1846 FED_BY_SPEC_LOAD (DEP_CON (dep)) = 1;
|
|
1847 }
|
|
1848
|
|
1849 /* On the path from the insn to load_insn_bb, find a conditional
|
|
1850 branch depending on insn, that guards the speculative load. */
|
|
1851
|
|
1852 static int
|
|
1853 find_conditional_protection (rtx insn, int load_insn_bb)
|
|
1854 {
|
|
1855 sd_iterator_def sd_it;
|
|
1856 dep_t dep;
|
|
1857
|
|
1858 /* Iterate through DEF-USE forward dependences. */
|
|
1859 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
|
|
1860 {
|
|
1861 rtx next = DEP_CON (dep);
|
|
1862
|
|
1863 if ((CONTAINING_RGN (BLOCK_NUM (next)) ==
|
|
1864 CONTAINING_RGN (BB_TO_BLOCK (load_insn_bb)))
|
|
1865 && IS_REACHABLE (INSN_BB (next), load_insn_bb)
|
|
1866 && load_insn_bb != INSN_BB (next)
|
|
1867 && DEP_TYPE (dep) == REG_DEP_TRUE
|
|
1868 && (JUMP_P (next)
|
|
1869 || find_conditional_protection (next, load_insn_bb)))
|
|
1870 return 1;
|
|
1871 }
|
|
1872 return 0;
|
|
1873 } /* find_conditional_protection */
|
|
1874
|
|
1875 /* Returns 1 if the same insn1 that participates in the computation
|
|
1876 of load_insn's address is feeding a conditional branch that is
|
|
1877 guarding on load_insn. This is true if we find two DEF-USE
|
|
1878 chains:
|
|
1879 insn1 -> ... -> conditional-branch
|
|
1880 insn1 -> ... -> load_insn,
|
|
1881 and if a flow path exists:
|
|
1882 insn1 -> ... -> conditional-branch -> ... -> load_insn,
|
|
1883 and if insn1 is on the path
|
|
1884 region-entry -> ... -> bb_trg -> ... load_insn.
|
|
1885
|
|
1886 Locate insn1 by climbing on INSN_BACK_DEPS from load_insn.
|
|
1887 Locate the branch by following INSN_FORW_DEPS from insn1. */
|
|
1888
|
|
1889 static int
|
|
1890 is_conditionally_protected (rtx load_insn, int bb_src, int bb_trg)
|
|
1891 {
|
|
1892 sd_iterator_def sd_it;
|
|
1893 dep_t dep;
|
|
1894
|
|
1895 FOR_EACH_DEP (load_insn, SD_LIST_BACK, sd_it, dep)
|
|
1896 {
|
|
1897 rtx insn1 = DEP_PRO (dep);
|
|
1898
|
|
1899 /* Must be a DEF-USE dependence upon non-branch. */
|
|
1900 if (DEP_TYPE (dep) != REG_DEP_TRUE
|
|
1901 || JUMP_P (insn1))
|
|
1902 continue;
|
|
1903
|
|
1904 /* Must exist a path: region-entry -> ... -> bb_trg -> ... load_insn. */
|
|
1905 if (INSN_BB (insn1) == bb_src
|
|
1906 || (CONTAINING_RGN (BLOCK_NUM (insn1))
|
|
1907 != CONTAINING_RGN (BB_TO_BLOCK (bb_src)))
|
|
1908 || (!IS_REACHABLE (bb_trg, INSN_BB (insn1))
|
|
1909 && !IS_REACHABLE (INSN_BB (insn1), bb_trg)))
|
|
1910 continue;
|
|
1911
|
|
1912 /* Now search for the conditional-branch. */
|
|
1913 if (find_conditional_protection (insn1, bb_src))
|
|
1914 return 1;
|
|
1915
|
|
1916 /* Recursive step: search another insn1, "above" current insn1. */
|
|
1917 return is_conditionally_protected (insn1, bb_src, bb_trg);
|
|
1918 }
|
|
1919
|
|
1920 /* The chain does not exist. */
|
|
1921 return 0;
|
|
1922 } /* is_conditionally_protected */
|
|
1923
|
|
1924 /* Returns 1 if a clue for "similar load" 'insn2' is found, and hence
|
|
1925 load_insn can move speculatively from bb_src to bb_trg. All the
|
|
1926 following must hold:
|
|
1927
|
|
1928 (1) both loads have 1 base register (PFREE_CANDIDATEs).
|
|
1929 (2) load_insn and load1 have a def-use dependence upon
|
|
1930 the same insn 'insn1'.
|
|
1931 (3) either load2 is in bb_trg, or:
|
|
1932 - there's only one split-block, and
|
|
1933 - load1 is on the escape path, and
|
|
1934
|
|
1935 From all these we can conclude that the two loads access memory
|
|
1936 addresses that differ at most by a constant, and hence if moving
|
|
1937 load_insn would cause an exception, it would have been caused by
|
|
1938 load2 anyhow. */
|
|
1939
|
|
1940 static int
|
|
1941 is_pfree (rtx load_insn, int bb_src, int bb_trg)
|
|
1942 {
|
|
1943 sd_iterator_def back_sd_it;
|
|
1944 dep_t back_dep;
|
|
1945 candidate *candp = candidate_table + bb_src;
|
|
1946
|
|
1947 if (candp->split_bbs.nr_members != 1)
|
|
1948 /* Must have exactly one escape block. */
|
|
1949 return 0;
|
|
1950
|
|
1951 FOR_EACH_DEP (load_insn, SD_LIST_BACK, back_sd_it, back_dep)
|
|
1952 {
|
|
1953 rtx insn1 = DEP_PRO (back_dep);
|
|
1954
|
|
1955 if (DEP_TYPE (back_dep) == REG_DEP_TRUE)
|
|
1956 /* Found a DEF-USE dependence (insn1, load_insn). */
|
|
1957 {
|
|
1958 sd_iterator_def fore_sd_it;
|
|
1959 dep_t fore_dep;
|
|
1960
|
|
1961 FOR_EACH_DEP (insn1, SD_LIST_FORW, fore_sd_it, fore_dep)
|
|
1962 {
|
|
1963 rtx insn2 = DEP_CON (fore_dep);
|
|
1964
|
|
1965 if (DEP_TYPE (fore_dep) == REG_DEP_TRUE)
|
|
1966 {
|
|
1967 /* Found a DEF-USE dependence (insn1, insn2). */
|
|
1968 if (haifa_classify_insn (insn2) != PFREE_CANDIDATE)
|
|
1969 /* insn2 not guaranteed to be a 1 base reg load. */
|
|
1970 continue;
|
|
1971
|
|
1972 if (INSN_BB (insn2) == bb_trg)
|
|
1973 /* insn2 is the similar load, in the target block. */
|
|
1974 return 1;
|
|
1975
|
|
1976 if (*(candp->split_bbs.first_member) == BLOCK_FOR_INSN (insn2))
|
|
1977 /* insn2 is a similar load, in a split-block. */
|
|
1978 return 1;
|
|
1979 }
|
|
1980 }
|
|
1981 }
|
|
1982 }
|
|
1983
|
|
1984 /* Couldn't find a similar load. */
|
|
1985 return 0;
|
|
1986 } /* is_pfree */
|
|
1987
|
|
1988 /* Return 1 if load_insn is prisky (i.e. if load_insn is fed by
|
|
1989 a load moved speculatively, or if load_insn is protected by
|
|
1990 a compare on load_insn's address). */
|
|
1991
|
|
1992 static int
|
|
1993 is_prisky (rtx load_insn, int bb_src, int bb_trg)
|
|
1994 {
|
|
1995 if (FED_BY_SPEC_LOAD (load_insn))
|
|
1996 return 1;
|
|
1997
|
|
1998 if (sd_lists_empty_p (load_insn, SD_LIST_BACK))
|
|
1999 /* Dependence may 'hide' out of the region. */
|
|
2000 return 1;
|
|
2001
|
|
2002 if (is_conditionally_protected (load_insn, bb_src, bb_trg))
|
|
2003 return 1;
|
|
2004
|
|
2005 return 0;
|
|
2006 }
|
|
2007
|
|
2008 /* Insn is a candidate to be moved speculatively from bb_src to bb_trg.
|
|
2009 Return 1 if insn is exception-free (and the motion is valid)
|
|
2010 and 0 otherwise. */
|
|
2011
|
|
2012 static int
|
|
2013 is_exception_free (rtx insn, int bb_src, int bb_trg)
|
|
2014 {
|
|
2015 int insn_class = haifa_classify_insn (insn);
|
|
2016
|
|
2017 /* Handle non-load insns. */
|
|
2018 switch (insn_class)
|
|
2019 {
|
|
2020 case TRAP_FREE:
|
|
2021 return 1;
|
|
2022 case TRAP_RISKY:
|
|
2023 return 0;
|
|
2024 default:;
|
|
2025 }
|
|
2026
|
|
2027 /* Handle loads. */
|
|
2028 if (!flag_schedule_speculative_load)
|
|
2029 return 0;
|
|
2030 IS_LOAD_INSN (insn) = 1;
|
|
2031 switch (insn_class)
|
|
2032 {
|
|
2033 case IFREE:
|
|
2034 return (1);
|
|
2035 case IRISKY:
|
|
2036 return 0;
|
|
2037 case PFREE_CANDIDATE:
|
|
2038 if (is_pfree (insn, bb_src, bb_trg))
|
|
2039 return 1;
|
|
2040 /* Don't 'break' here: PFREE-candidate is also PRISKY-candidate. */
|
|
2041 case PRISKY_CANDIDATE:
|
|
2042 if (!flag_schedule_speculative_load_dangerous
|
|
2043 || is_prisky (insn, bb_src, bb_trg))
|
|
2044 return 0;
|
|
2045 break;
|
|
2046 default:;
|
|
2047 }
|
|
2048
|
|
2049 return flag_schedule_speculative_load_dangerous;
|
|
2050 }
|
|
2051
|
|
2052 /* The number of insns from the current block scheduled so far. */
|
|
2053 static int sched_target_n_insns;
|
|
2054 /* The number of insns from the current block to be scheduled in total. */
|
|
2055 static int target_n_insns;
|
|
2056 /* The number of insns from the entire region scheduled so far. */
|
|
2057 static int sched_n_insns;
|
|
2058
|
|
2059 /* Implementations of the sched_info functions for region scheduling. */
|
|
2060 static void init_ready_list (void);
|
|
2061 static int can_schedule_ready_p (rtx);
|
|
2062 static void begin_schedule_ready (rtx, rtx);
|
|
2063 static ds_t new_ready (rtx, ds_t);
|
|
2064 static int schedule_more_p (void);
|
|
2065 static const char *rgn_print_insn (const_rtx, int);
|
|
2066 static int rgn_rank (rtx, rtx);
|
|
2067 static void compute_jump_reg_dependencies (rtx, regset, regset, regset);
|
|
2068
|
|
2069 /* Functions for speculative scheduling. */
|
|
2070 static void rgn_add_remove_insn (rtx, int);
|
|
2071 static void rgn_add_block (basic_block, basic_block);
|
|
2072 static void rgn_fix_recovery_cfg (int, int, int);
|
|
2073 static basic_block advance_target_bb (basic_block, rtx);
|
|
2074
|
|
2075 /* Return nonzero if there are more insns that should be scheduled. */
|
|
2076
|
|
2077 static int
|
|
2078 schedule_more_p (void)
|
|
2079 {
|
|
2080 return sched_target_n_insns < target_n_insns;
|
|
2081 }
|
|
2082
|
|
2083 /* Add all insns that are initially ready to the ready list READY. Called
|
|
2084 once before scheduling a set of insns. */
|
|
2085
|
|
2086 static void
|
|
2087 init_ready_list (void)
|
|
2088 {
|
|
2089 rtx prev_head = current_sched_info->prev_head;
|
|
2090 rtx next_tail = current_sched_info->next_tail;
|
|
2091 int bb_src;
|
|
2092 rtx insn;
|
|
2093
|
|
2094 target_n_insns = 0;
|
|
2095 sched_target_n_insns = 0;
|
|
2096 sched_n_insns = 0;
|
|
2097
|
|
2098 /* Print debugging information. */
|
|
2099 if (sched_verbose >= 5)
|
|
2100 debug_rgn_dependencies (target_bb);
|
|
2101
|
|
2102 /* Prepare current target block info. */
|
|
2103 if (current_nr_blocks > 1)
|
|
2104 compute_trg_info (target_bb);
|
|
2105
|
|
2106 /* Initialize ready list with all 'ready' insns in target block.
|
|
2107 Count number of insns in the target block being scheduled. */
|
|
2108 for (insn = NEXT_INSN (prev_head); insn != next_tail; insn = NEXT_INSN (insn))
|
|
2109 {
|
|
2110 try_ready (insn);
|
|
2111 target_n_insns++;
|
|
2112
|
|
2113 gcc_assert (!(TODO_SPEC (insn) & BEGIN_CONTROL));
|
|
2114 }
|
|
2115
|
|
2116 /* Add to ready list all 'ready' insns in valid source blocks.
|
|
2117 For speculative insns, check-live, exception-free, and
|
|
2118 issue-delay. */
|
|
2119 for (bb_src = target_bb + 1; bb_src < current_nr_blocks; bb_src++)
|
|
2120 if (IS_VALID (bb_src))
|
|
2121 {
|
|
2122 rtx src_head;
|
|
2123 rtx src_next_tail;
|
|
2124 rtx tail, head;
|
|
2125
|
|
2126 get_ebb_head_tail (EBB_FIRST_BB (bb_src), EBB_LAST_BB (bb_src),
|
|
2127 &head, &tail);
|
|
2128 src_next_tail = NEXT_INSN (tail);
|
|
2129 src_head = head;
|
|
2130
|
|
2131 for (insn = src_head; insn != src_next_tail; insn = NEXT_INSN (insn))
|
|
2132 if (INSN_P (insn))
|
|
2133 try_ready (insn);
|
|
2134 }
|
|
2135 }
|
|
2136
|
|
2137 /* Called after taking INSN from the ready list. Returns nonzero if this
|
|
2138 insn can be scheduled, nonzero if we should silently discard it. */
|
|
2139
|
|
2140 static int
|
|
2141 can_schedule_ready_p (rtx insn)
|
|
2142 {
|
|
2143 /* An interblock motion? */
|
|
2144 if (INSN_BB (insn) != target_bb
|
|
2145 && IS_SPECULATIVE_INSN (insn)
|
|
2146 && !check_live (insn, INSN_BB (insn)))
|
|
2147 return 0;
|
|
2148 else
|
|
2149 return 1;
|
|
2150 }
|
|
2151
|
|
2152 /* Updates counter and other information. Split from can_schedule_ready_p ()
|
|
2153 because when we schedule insn speculatively then insn passed to
|
|
2154 can_schedule_ready_p () differs from the one passed to
|
|
2155 begin_schedule_ready (). */
|
|
2156 static void
|
|
2157 begin_schedule_ready (rtx insn, rtx last ATTRIBUTE_UNUSED)
|
|
2158 {
|
|
2159 /* An interblock motion? */
|
|
2160 if (INSN_BB (insn) != target_bb)
|
|
2161 {
|
|
2162 if (IS_SPECULATIVE_INSN (insn))
|
|
2163 {
|
|
2164 gcc_assert (check_live (insn, INSN_BB (insn)));
|
|
2165
|
|
2166 update_live (insn, INSN_BB (insn));
|
|
2167
|
|
2168 /* For speculative load, mark insns fed by it. */
|
|
2169 if (IS_LOAD_INSN (insn) || FED_BY_SPEC_LOAD (insn))
|
|
2170 set_spec_fed (insn);
|
|
2171
|
|
2172 nr_spec++;
|
|
2173 }
|
|
2174 nr_inter++;
|
|
2175 }
|
|
2176 else
|
|
2177 {
|
|
2178 /* In block motion. */
|
|
2179 sched_target_n_insns++;
|
|
2180 }
|
|
2181 sched_n_insns++;
|
|
2182 }
|
|
2183
|
|
2184 /* Called after INSN has all its hard dependencies resolved and the speculation
|
|
2185 of type TS is enough to overcome them all.
|
|
2186 Return nonzero if it should be moved to the ready list or the queue, or zero
|
|
2187 if we should silently discard it. */
|
|
2188 static ds_t
|
|
2189 new_ready (rtx next, ds_t ts)
|
|
2190 {
|
|
2191 if (INSN_BB (next) != target_bb)
|
|
2192 {
|
|
2193 int not_ex_free = 0;
|
|
2194
|
|
2195 /* For speculative insns, before inserting to ready/queue,
|
|
2196 check live, exception-free, and issue-delay. */
|
|
2197 if (!IS_VALID (INSN_BB (next))
|
|
2198 || CANT_MOVE (next)
|
|
2199 || (IS_SPECULATIVE_INSN (next)
|
|
2200 && ((recog_memoized (next) >= 0
|
|
2201 && min_insn_conflict_delay (curr_state, next, next)
|
|
2202 > PARAM_VALUE (PARAM_MAX_SCHED_INSN_CONFLICT_DELAY))
|
|
2203 || IS_SPECULATION_CHECK_P (next)
|
|
2204 || !check_live (next, INSN_BB (next))
|
|
2205 || (not_ex_free = !is_exception_free (next, INSN_BB (next),
|
|
2206 target_bb)))))
|
|
2207 {
|
|
2208 if (not_ex_free
|
|
2209 /* We are here because is_exception_free () == false.
|
|
2210 But we possibly can handle that with control speculation. */
|
|
2211 && sched_deps_info->generate_spec_deps
|
|
2212 && spec_info->mask & BEGIN_CONTROL)
|
|
2213 {
|
|
2214 ds_t new_ds;
|
|
2215
|
|
2216 /* Add control speculation to NEXT's dependency type. */
|
|
2217 new_ds = set_dep_weak (ts, BEGIN_CONTROL, MAX_DEP_WEAK);
|
|
2218
|
|
2219 /* Check if NEXT can be speculated with new dependency type. */
|
|
2220 if (sched_insn_is_legitimate_for_speculation_p (next, new_ds))
|
|
2221 /* Here we got new control-speculative instruction. */
|
|
2222 ts = new_ds;
|
|
2223 else
|
|
2224 /* NEXT isn't ready yet. */
|
|
2225 ts = (ts & ~SPECULATIVE) | HARD_DEP;
|
|
2226 }
|
|
2227 else
|
|
2228 /* NEXT isn't ready yet. */
|
|
2229 ts = (ts & ~SPECULATIVE) | HARD_DEP;
|
|
2230 }
|
|
2231 }
|
|
2232
|
|
2233 return ts;
|
|
2234 }
|
|
2235
|
|
2236 /* Return a string that contains the insn uid and optionally anything else
|
|
2237 necessary to identify this insn in an output. It's valid to use a
|
|
2238 static buffer for this. The ALIGNED parameter should cause the string
|
|
2239 to be formatted so that multiple output lines will line up nicely. */
|
|
2240
|
|
2241 static const char *
|
|
2242 rgn_print_insn (const_rtx insn, int aligned)
|
|
2243 {
|
|
2244 static char tmp[80];
|
|
2245
|
|
2246 if (aligned)
|
|
2247 sprintf (tmp, "b%3d: i%4d", INSN_BB (insn), INSN_UID (insn));
|
|
2248 else
|
|
2249 {
|
|
2250 if (current_nr_blocks > 1 && INSN_BB (insn) != target_bb)
|
|
2251 sprintf (tmp, "%d/b%d", INSN_UID (insn), INSN_BB (insn));
|
|
2252 else
|
|
2253 sprintf (tmp, "%d", INSN_UID (insn));
|
|
2254 }
|
|
2255 return tmp;
|
|
2256 }
|
|
2257
|
|
2258 /* Compare priority of two insns. Return a positive number if the second
|
|
2259 insn is to be preferred for scheduling, and a negative one if the first
|
|
2260 is to be preferred. Zero if they are equally good. */
|
|
2261
|
|
2262 static int
|
|
2263 rgn_rank (rtx insn1, rtx insn2)
|
|
2264 {
|
|
2265 /* Some comparison make sense in interblock scheduling only. */
|
|
2266 if (INSN_BB (insn1) != INSN_BB (insn2))
|
|
2267 {
|
|
2268 int spec_val, prob_val;
|
|
2269
|
|
2270 /* Prefer an inblock motion on an interblock motion. */
|
|
2271 if ((INSN_BB (insn2) == target_bb) && (INSN_BB (insn1) != target_bb))
|
|
2272 return 1;
|
|
2273 if ((INSN_BB (insn1) == target_bb) && (INSN_BB (insn2) != target_bb))
|
|
2274 return -1;
|
|
2275
|
|
2276 /* Prefer a useful motion on a speculative one. */
|
|
2277 spec_val = IS_SPECULATIVE_INSN (insn1) - IS_SPECULATIVE_INSN (insn2);
|
|
2278 if (spec_val)
|
|
2279 return spec_val;
|
|
2280
|
|
2281 /* Prefer a more probable (speculative) insn. */
|
|
2282 prob_val = INSN_PROBABILITY (insn2) - INSN_PROBABILITY (insn1);
|
|
2283 if (prob_val)
|
|
2284 return prob_val;
|
|
2285 }
|
|
2286 return 0;
|
|
2287 }
|
|
2288
|
|
2289 /* NEXT is an instruction that depends on INSN (a backward dependence);
|
|
2290 return nonzero if we should include this dependence in priority
|
|
2291 calculations. */
|
|
2292
|
|
2293 int
|
|
2294 contributes_to_priority (rtx next, rtx insn)
|
|
2295 {
|
|
2296 /* NEXT and INSN reside in one ebb. */
|
|
2297 return BLOCK_TO_BB (BLOCK_NUM (next)) == BLOCK_TO_BB (BLOCK_NUM (insn));
|
|
2298 }
|
|
2299
|
|
2300 /* INSN is a JUMP_INSN, COND_SET is the set of registers that are
|
|
2301 conditionally set before INSN. Store the set of registers that
|
|
2302 must be considered as used by this jump in USED and that of
|
|
2303 registers that must be considered as set in SET. */
|
|
2304
|
|
2305 static void
|
|
2306 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
|
|
2307 regset cond_exec ATTRIBUTE_UNUSED,
|
|
2308 regset used ATTRIBUTE_UNUSED,
|
|
2309 regset set ATTRIBUTE_UNUSED)
|
|
2310 {
|
|
2311 /* Nothing to do here, since we postprocess jumps in
|
|
2312 add_branch_dependences. */
|
|
2313 }
|
|
2314
|
|
2315 /* This variable holds common_sched_info hooks and data relevant to
|
|
2316 the interblock scheduler. */
|
|
2317 static struct common_sched_info_def rgn_common_sched_info;
|
|
2318
|
|
2319
|
|
2320 /* This holds data for the dependence analysis relevant to
|
|
2321 the interblock scheduler. */
|
|
2322 static struct sched_deps_info_def rgn_sched_deps_info;
|
|
2323
|
|
2324 /* This holds constant data used for initializing the above structure
|
|
2325 for the Haifa scheduler. */
|
|
2326 static const struct sched_deps_info_def rgn_const_sched_deps_info =
|
|
2327 {
|
|
2328 compute_jump_reg_dependencies,
|
|
2329 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
|
2330 0, 0, 0
|
|
2331 };
|
|
2332
|
|
2333 /* Same as above, but for the selective scheduler. */
|
|
2334 static const struct sched_deps_info_def rgn_const_sel_sched_deps_info =
|
|
2335 {
|
|
2336 compute_jump_reg_dependencies,
|
|
2337 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
|
2338 0, 0, 0
|
|
2339 };
|
|
2340
|
|
2341 /* Used in schedule_insns to initialize current_sched_info for scheduling
|
|
2342 regions (or single basic blocks). */
|
|
2343
|
|
2344 static const struct haifa_sched_info rgn_const_sched_info =
|
|
2345 {
|
|
2346 init_ready_list,
|
|
2347 can_schedule_ready_p,
|
|
2348 schedule_more_p,
|
|
2349 new_ready,
|
|
2350 rgn_rank,
|
|
2351 rgn_print_insn,
|
|
2352 contributes_to_priority,
|
|
2353
|
|
2354 NULL, NULL,
|
|
2355 NULL, NULL,
|
|
2356 0, 0,
|
|
2357
|
|
2358 rgn_add_remove_insn,
|
|
2359 begin_schedule_ready,
|
|
2360 advance_target_bb,
|
|
2361 SCHED_RGN
|
|
2362 };
|
|
2363
|
|
2364 /* This variable holds the data and hooks needed to the Haifa scheduler backend
|
|
2365 for the interblock scheduler frontend. */
|
|
2366 static struct haifa_sched_info rgn_sched_info;
|
|
2367
|
|
2368 /* Returns maximum priority that an insn was assigned to. */
|
|
2369
|
|
2370 int
|
|
2371 get_rgn_sched_max_insns_priority (void)
|
|
2372 {
|
|
2373 return rgn_sched_info.sched_max_insns_priority;
|
|
2374 }
|
|
2375
|
|
2376 /* Determine if PAT sets a CLASS_LIKELY_SPILLED_P register. */
|
|
2377
|
|
2378 static bool
|
|
2379 sets_likely_spilled (rtx pat)
|
|
2380 {
|
|
2381 bool ret = false;
|
|
2382 note_stores (pat, sets_likely_spilled_1, &ret);
|
|
2383 return ret;
|
|
2384 }
|
|
2385
|
|
2386 static void
|
|
2387 sets_likely_spilled_1 (rtx x, const_rtx pat, void *data)
|
|
2388 {
|
|
2389 bool *ret = (bool *) data;
|
|
2390
|
|
2391 if (GET_CODE (pat) == SET
|
|
2392 && REG_P (x)
|
|
2393 && REGNO (x) < FIRST_PSEUDO_REGISTER
|
|
2394 && CLASS_LIKELY_SPILLED_P (REGNO_REG_CLASS (REGNO (x))))
|
|
2395 *ret = true;
|
|
2396 }
|
|
2397
|
|
2398 /* A bitmap to note insns that participate in any dependency. Used in
|
|
2399 add_branch_dependences. */
|
|
2400 static sbitmap insn_referenced;
|
|
2401
|
|
2402 /* Add dependences so that branches are scheduled to run last in their
|
|
2403 block. */
|
|
2404 static void
|
|
2405 add_branch_dependences (rtx head, rtx tail)
|
|
2406 {
|
|
2407 rtx insn, last;
|
|
2408
|
|
2409 /* For all branches, calls, uses, clobbers, cc0 setters, and instructions
|
|
2410 that can throw exceptions, force them to remain in order at the end of
|
|
2411 the block by adding dependencies and giving the last a high priority.
|
|
2412 There may be notes present, and prev_head may also be a note.
|
|
2413
|
|
2414 Branches must obviously remain at the end. Calls should remain at the
|
|
2415 end since moving them results in worse register allocation. Uses remain
|
|
2416 at the end to ensure proper register allocation.
|
|
2417
|
|
2418 cc0 setters remain at the end because they can't be moved away from
|
|
2419 their cc0 user.
|
|
2420
|
|
2421 COND_EXEC insns cannot be moved past a branch (see e.g. PR17808).
|
|
2422
|
|
2423 Insns setting CLASS_LIKELY_SPILLED_P registers (usually return values)
|
|
2424 are not moved before reload because we can wind up with register
|
|
2425 allocation failures. */
|
|
2426
|
|
2427 insn = tail;
|
|
2428 last = 0;
|
|
2429 while (CALL_P (insn)
|
|
2430 || JUMP_P (insn)
|
|
2431 || (NONJUMP_INSN_P (insn)
|
|
2432 && (GET_CODE (PATTERN (insn)) == USE
|
|
2433 || GET_CODE (PATTERN (insn)) == CLOBBER
|
|
2434 || can_throw_internal (insn)
|
|
2435 #ifdef HAVE_cc0
|
|
2436 || sets_cc0_p (PATTERN (insn))
|
|
2437 #endif
|
|
2438 || (!reload_completed
|
|
2439 && sets_likely_spilled (PATTERN (insn)))))
|
|
2440 || NOTE_P (insn))
|
|
2441 {
|
|
2442 if (!NOTE_P (insn))
|
|
2443 {
|
|
2444 if (last != 0
|
|
2445 && sd_find_dep_between (insn, last, false) == NULL)
|
|
2446 {
|
|
2447 if (! sched_insns_conditions_mutex_p (last, insn))
|
|
2448 add_dependence (last, insn, REG_DEP_ANTI);
|
|
2449 SET_BIT (insn_referenced, INSN_LUID (insn));
|
|
2450 }
|
|
2451
|
|
2452 CANT_MOVE (insn) = 1;
|
|
2453
|
|
2454 last = insn;
|
|
2455 }
|
|
2456
|
|
2457 /* Don't overrun the bounds of the basic block. */
|
|
2458 if (insn == head)
|
|
2459 break;
|
|
2460
|
|
2461 insn = PREV_INSN (insn);
|
|
2462 }
|
|
2463
|
|
2464 /* Make sure these insns are scheduled last in their block. */
|
|
2465 insn = last;
|
|
2466 if (insn != 0)
|
|
2467 while (insn != head)
|
|
2468 {
|
|
2469 insn = prev_nonnote_insn (insn);
|
|
2470
|
|
2471 if (TEST_BIT (insn_referenced, INSN_LUID (insn)))
|
|
2472 continue;
|
|
2473
|
|
2474 if (! sched_insns_conditions_mutex_p (last, insn))
|
|
2475 add_dependence (last, insn, REG_DEP_ANTI);
|
|
2476 }
|
|
2477
|
|
2478 #ifdef HAVE_conditional_execution
|
|
2479 /* Finally, if the block ends in a jump, and we are doing intra-block
|
|
2480 scheduling, make sure that the branch depends on any COND_EXEC insns
|
|
2481 inside the block to avoid moving the COND_EXECs past the branch insn.
|
|
2482
|
|
2483 We only have to do this after reload, because (1) before reload there
|
|
2484 are no COND_EXEC insns, and (2) the region scheduler is an intra-block
|
|
2485 scheduler after reload.
|
|
2486
|
|
2487 FIXME: We could in some cases move COND_EXEC insns past the branch if
|
|
2488 this scheduler would be a little smarter. Consider this code:
|
|
2489
|
|
2490 T = [addr]
|
|
2491 C ? addr += 4
|
|
2492 !C ? X += 12
|
|
2493 C ? T += 1
|
|
2494 C ? jump foo
|
|
2495
|
|
2496 On a target with a one cycle stall on a memory access the optimal
|
|
2497 sequence would be:
|
|
2498
|
|
2499 T = [addr]
|
|
2500 C ? addr += 4
|
|
2501 C ? T += 1
|
|
2502 C ? jump foo
|
|
2503 !C ? X += 12
|
|
2504
|
|
2505 We don't want to put the 'X += 12' before the branch because it just
|
|
2506 wastes a cycle of execution time when the branch is taken.
|
|
2507
|
|
2508 Note that in the example "!C" will always be true. That is another
|
|
2509 possible improvement for handling COND_EXECs in this scheduler: it
|
|
2510 could remove always-true predicates. */
|
|
2511
|
|
2512 if (!reload_completed || ! JUMP_P (tail))
|
|
2513 return;
|
|
2514
|
|
2515 insn = tail;
|
|
2516 while (insn != head)
|
|
2517 {
|
|
2518 insn = PREV_INSN (insn);
|
|
2519
|
|
2520 /* Note that we want to add this dependency even when
|
|
2521 sched_insns_conditions_mutex_p returns true. The whole point
|
|
2522 is that we _want_ this dependency, even if these insns really
|
|
2523 are independent. */
|
|
2524 if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == COND_EXEC)
|
|
2525 add_dependence (tail, insn, REG_DEP_ANTI);
|
|
2526 }
|
|
2527 #endif
|
|
2528 }
|
|
2529
|
|
2530 /* Data structures for the computation of data dependences in a regions. We
|
|
2531 keep one `deps' structure for every basic block. Before analyzing the
|
|
2532 data dependences for a bb, its variables are initialized as a function of
|
|
2533 the variables of its predecessors. When the analysis for a bb completes,
|
|
2534 we save the contents to the corresponding bb_deps[bb] variable. */
|
|
2535
|
|
2536 static struct deps *bb_deps;
|
|
2537
|
|
2538 /* Duplicate the INSN_LIST elements of COPY and prepend them to OLD. */
|
|
2539
|
|
2540 static rtx
|
|
2541 concat_INSN_LIST (rtx copy, rtx old)
|
|
2542 {
|
|
2543 rtx new_rtx = old;
|
|
2544 for (; copy ; copy = XEXP (copy, 1))
|
|
2545 new_rtx = alloc_INSN_LIST (XEXP (copy, 0), new_rtx);
|
|
2546 return new_rtx;
|
|
2547 }
|
|
2548
|
|
2549 static void
|
|
2550 concat_insn_mem_list (rtx copy_insns, rtx copy_mems, rtx *old_insns_p,
|
|
2551 rtx *old_mems_p)
|
|
2552 {
|
|
2553 rtx new_insns = *old_insns_p;
|
|
2554 rtx new_mems = *old_mems_p;
|
|
2555
|
|
2556 while (copy_insns)
|
|
2557 {
|
|
2558 new_insns = alloc_INSN_LIST (XEXP (copy_insns, 0), new_insns);
|
|
2559 new_mems = alloc_EXPR_LIST (VOIDmode, XEXP (copy_mems, 0), new_mems);
|
|
2560 copy_insns = XEXP (copy_insns, 1);
|
|
2561 copy_mems = XEXP (copy_mems, 1);
|
|
2562 }
|
|
2563
|
|
2564 *old_insns_p = new_insns;
|
|
2565 *old_mems_p = new_mems;
|
|
2566 }
|
|
2567
|
|
2568 /* Join PRED_DEPS to the SUCC_DEPS. */
|
|
2569 void
|
|
2570 deps_join (struct deps *succ_deps, struct deps *pred_deps)
|
|
2571 {
|
|
2572 unsigned reg;
|
|
2573 reg_set_iterator rsi;
|
|
2574
|
|
2575 /* The reg_last lists are inherited by successor. */
|
|
2576 EXECUTE_IF_SET_IN_REG_SET (&pred_deps->reg_last_in_use, 0, reg, rsi)
|
|
2577 {
|
|
2578 struct deps_reg *pred_rl = &pred_deps->reg_last[reg];
|
|
2579 struct deps_reg *succ_rl = &succ_deps->reg_last[reg];
|
|
2580
|
|
2581 succ_rl->uses = concat_INSN_LIST (pred_rl->uses, succ_rl->uses);
|
|
2582 succ_rl->sets = concat_INSN_LIST (pred_rl->sets, succ_rl->sets);
|
|
2583 succ_rl->clobbers = concat_INSN_LIST (pred_rl->clobbers,
|
|
2584 succ_rl->clobbers);
|
|
2585 succ_rl->uses_length += pred_rl->uses_length;
|
|
2586 succ_rl->clobbers_length += pred_rl->clobbers_length;
|
|
2587 }
|
|
2588 IOR_REG_SET (&succ_deps->reg_last_in_use, &pred_deps->reg_last_in_use);
|
|
2589
|
|
2590 /* Mem read/write lists are inherited by successor. */
|
|
2591 concat_insn_mem_list (pred_deps->pending_read_insns,
|
|
2592 pred_deps->pending_read_mems,
|
|
2593 &succ_deps->pending_read_insns,
|
|
2594 &succ_deps->pending_read_mems);
|
|
2595 concat_insn_mem_list (pred_deps->pending_write_insns,
|
|
2596 pred_deps->pending_write_mems,
|
|
2597 &succ_deps->pending_write_insns,
|
|
2598 &succ_deps->pending_write_mems);
|
|
2599
|
|
2600 succ_deps->last_pending_memory_flush
|
|
2601 = concat_INSN_LIST (pred_deps->last_pending_memory_flush,
|
|
2602 succ_deps->last_pending_memory_flush);
|
|
2603
|
|
2604 succ_deps->pending_read_list_length += pred_deps->pending_read_list_length;
|
|
2605 succ_deps->pending_write_list_length += pred_deps->pending_write_list_length;
|
|
2606 succ_deps->pending_flush_length += pred_deps->pending_flush_length;
|
|
2607
|
|
2608 /* last_function_call is inherited by successor. */
|
|
2609 succ_deps->last_function_call
|
|
2610 = concat_INSN_LIST (pred_deps->last_function_call,
|
|
2611 succ_deps->last_function_call);
|
|
2612
|
|
2613 /* sched_before_next_call is inherited by successor. */
|
|
2614 succ_deps->sched_before_next_call
|
|
2615 = concat_INSN_LIST (pred_deps->sched_before_next_call,
|
|
2616 succ_deps->sched_before_next_call);
|
|
2617 }
|
|
2618
|
|
2619 /* After computing the dependencies for block BB, propagate the dependencies
|
|
2620 found in TMP_DEPS to the successors of the block. */
|
|
2621 static void
|
|
2622 propagate_deps (int bb, struct deps *pred_deps)
|
|
2623 {
|
|
2624 basic_block block = BASIC_BLOCK (BB_TO_BLOCK (bb));
|
|
2625 edge_iterator ei;
|
|
2626 edge e;
|
|
2627
|
|
2628 /* bb's structures are inherited by its successors. */
|
|
2629 FOR_EACH_EDGE (e, ei, block->succs)
|
|
2630 {
|
|
2631 /* Only bbs "below" bb, in the same region, are interesting. */
|
|
2632 if (e->dest == EXIT_BLOCK_PTR
|
|
2633 || CONTAINING_RGN (block->index) != CONTAINING_RGN (e->dest->index)
|
|
2634 || BLOCK_TO_BB (e->dest->index) <= bb)
|
|
2635 continue;
|
|
2636
|
|
2637 deps_join (bb_deps + BLOCK_TO_BB (e->dest->index), pred_deps);
|
|
2638 }
|
|
2639
|
|
2640 /* These lists should point to the right place, for correct
|
|
2641 freeing later. */
|
|
2642 bb_deps[bb].pending_read_insns = pred_deps->pending_read_insns;
|
|
2643 bb_deps[bb].pending_read_mems = pred_deps->pending_read_mems;
|
|
2644 bb_deps[bb].pending_write_insns = pred_deps->pending_write_insns;
|
|
2645 bb_deps[bb].pending_write_mems = pred_deps->pending_write_mems;
|
|
2646
|
|
2647 /* Can't allow these to be freed twice. */
|
|
2648 pred_deps->pending_read_insns = 0;
|
|
2649 pred_deps->pending_read_mems = 0;
|
|
2650 pred_deps->pending_write_insns = 0;
|
|
2651 pred_deps->pending_write_mems = 0;
|
|
2652 }
|
|
2653
|
|
2654 /* Compute dependences inside bb. In a multiple blocks region:
|
|
2655 (1) a bb is analyzed after its predecessors, and (2) the lists in
|
|
2656 effect at the end of bb (after analyzing for bb) are inherited by
|
|
2657 bb's successors.
|
|
2658
|
|
2659 Specifically for reg-reg data dependences, the block insns are
|
|
2660 scanned by sched_analyze () top-to-bottom. Two lists are
|
|
2661 maintained by sched_analyze (): reg_last[].sets for register DEFs,
|
|
2662 and reg_last[].uses for register USEs.
|
|
2663
|
|
2664 When analysis is completed for bb, we update for its successors:
|
|
2665 ; - DEFS[succ] = Union (DEFS [succ], DEFS [bb])
|
|
2666 ; - USES[succ] = Union (USES [succ], DEFS [bb])
|
|
2667
|
|
2668 The mechanism for computing mem-mem data dependence is very
|
|
2669 similar, and the result is interblock dependences in the region. */
|
|
2670
|
|
2671 static void
|
|
2672 compute_block_dependences (int bb)
|
|
2673 {
|
|
2674 rtx head, tail;
|
|
2675 struct deps tmp_deps;
|
|
2676
|
|
2677 tmp_deps = bb_deps[bb];
|
|
2678
|
|
2679 /* Do the analysis for this block. */
|
|
2680 gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb));
|
|
2681 get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
|
|
2682
|
|
2683 sched_analyze (&tmp_deps, head, tail);
|
|
2684
|
|
2685 /* Selective scheduling handles control dependencies by itself. */
|
|
2686 if (!sel_sched_p ())
|
|
2687 add_branch_dependences (head, tail);
|
|
2688
|
|
2689 if (current_nr_blocks > 1)
|
|
2690 propagate_deps (bb, &tmp_deps);
|
|
2691
|
|
2692 /* Free up the INSN_LISTs. */
|
|
2693 free_deps (&tmp_deps);
|
|
2694
|
|
2695 if (targetm.sched.dependencies_evaluation_hook)
|
|
2696 targetm.sched.dependencies_evaluation_hook (head, tail);
|
|
2697 }
|
|
2698
|
|
2699 /* Free dependencies of instructions inside BB. */
|
|
2700 static void
|
|
2701 free_block_dependencies (int bb)
|
|
2702 {
|
|
2703 rtx head;
|
|
2704 rtx tail;
|
|
2705
|
|
2706 get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
|
|
2707
|
|
2708 sched_free_deps (head, tail, true);
|
|
2709 }
|
|
2710
|
|
2711 /* Remove all INSN_LISTs and EXPR_LISTs from the pending lists and add
|
|
2712 them to the unused_*_list variables, so that they can be reused. */
|
|
2713
|
|
2714 static void
|
|
2715 free_pending_lists (void)
|
|
2716 {
|
|
2717 int bb;
|
|
2718
|
|
2719 for (bb = 0; bb < current_nr_blocks; bb++)
|
|
2720 {
|
|
2721 free_INSN_LIST_list (&bb_deps[bb].pending_read_insns);
|
|
2722 free_INSN_LIST_list (&bb_deps[bb].pending_write_insns);
|
|
2723 free_EXPR_LIST_list (&bb_deps[bb].pending_read_mems);
|
|
2724 free_EXPR_LIST_list (&bb_deps[bb].pending_write_mems);
|
|
2725 }
|
|
2726 }
|
|
2727
|
|
2728 /* Print dependences for debugging starting from FROM_BB.
|
|
2729 Callable from debugger. */
|
|
2730 /* Print dependences for debugging starting from FROM_BB.
|
|
2731 Callable from debugger. */
|
|
2732 void
|
|
2733 debug_rgn_dependencies (int from_bb)
|
|
2734 {
|
|
2735 int bb;
|
|
2736
|
|
2737 fprintf (sched_dump,
|
|
2738 ";; --------------- forward dependences: ------------ \n");
|
|
2739
|
|
2740 for (bb = from_bb; bb < current_nr_blocks; bb++)
|
|
2741 {
|
|
2742 rtx head, tail;
|
|
2743
|
|
2744 get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
|
|
2745 fprintf (sched_dump, "\n;; --- Region Dependences --- b %d bb %d \n",
|
|
2746 BB_TO_BLOCK (bb), bb);
|
|
2747
|
|
2748 debug_dependencies (head, tail);
|
|
2749 }
|
|
2750 }
|
|
2751
|
|
2752 /* Print dependencies information for instructions between HEAD and TAIL.
|
|
2753 ??? This function would probably fit best in haifa-sched.c. */
|
|
2754 void debug_dependencies (rtx head, rtx tail)
|
|
2755 {
|
|
2756 rtx insn;
|
|
2757 rtx next_tail = NEXT_INSN (tail);
|
|
2758
|
|
2759 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
|
|
2760 "insn", "code", "bb", "dep", "prio", "cost",
|
|
2761 "reservation");
|
|
2762 fprintf (sched_dump, ";; %7s%6s%6s%6s%6s%6s%14s\n",
|
|
2763 "----", "----", "--", "---", "----", "----",
|
|
2764 "-----------");
|
|
2765
|
|
2766 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
|
|
2767 {
|
|
2768 if (! INSN_P (insn))
|
|
2769 {
|
|
2770 int n;
|
|
2771 fprintf (sched_dump, ";; %6d ", INSN_UID (insn));
|
|
2772 if (NOTE_P (insn))
|
|
2773 {
|
|
2774 n = NOTE_KIND (insn);
|
|
2775 fprintf (sched_dump, "%s\n", GET_NOTE_INSN_NAME (n));
|
|
2776 }
|
|
2777 else
|
|
2778 fprintf (sched_dump, " {%s}\n", GET_RTX_NAME (GET_CODE (insn)));
|
|
2779 continue;
|
|
2780 }
|
|
2781
|
|
2782 fprintf (sched_dump,
|
|
2783 ";; %s%5d%6d%6d%6d%6d%6d ",
|
|
2784 (SCHED_GROUP_P (insn) ? "+" : " "),
|
|
2785 INSN_UID (insn),
|
|
2786 INSN_CODE (insn),
|
|
2787 BLOCK_NUM (insn),
|
|
2788 sched_emulate_haifa_p ? -1 : sd_lists_size (insn, SD_LIST_BACK),
|
|
2789 (sel_sched_p () ? (sched_emulate_haifa_p ? -1
|
|
2790 : INSN_PRIORITY (insn))
|
|
2791 : INSN_PRIORITY (insn)),
|
|
2792 (sel_sched_p () ? (sched_emulate_haifa_p ? -1
|
|
2793 : insn_cost (insn))
|
|
2794 : insn_cost (insn)));
|
|
2795
|
|
2796 if (recog_memoized (insn) < 0)
|
|
2797 fprintf (sched_dump, "nothing");
|
|
2798 else
|
|
2799 print_reservation (sched_dump, insn);
|
|
2800
|
|
2801 fprintf (sched_dump, "\t: ");
|
|
2802 {
|
|
2803 sd_iterator_def sd_it;
|
|
2804 dep_t dep;
|
|
2805
|
|
2806 FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
|
|
2807 fprintf (sched_dump, "%d ", INSN_UID (DEP_CON (dep)));
|
|
2808 }
|
|
2809 fprintf (sched_dump, "\n");
|
|
2810 }
|
|
2811
|
|
2812 fprintf (sched_dump, "\n");
|
|
2813 }
|
|
2814
|
|
2815 /* Returns true if all the basic blocks of the current region have
|
|
2816 NOTE_DISABLE_SCHED_OF_BLOCK which means not to schedule that region. */
|
|
2817 bool
|
|
2818 sched_is_disabled_for_current_region_p (void)
|
|
2819 {
|
|
2820 int bb;
|
|
2821
|
|
2822 for (bb = 0; bb < current_nr_blocks; bb++)
|
|
2823 if (!(BASIC_BLOCK (BB_TO_BLOCK (bb))->flags & BB_DISABLE_SCHEDULE))
|
|
2824 return false;
|
|
2825
|
|
2826 return true;
|
|
2827 }
|
|
2828
|
|
2829 /* Free all region dependencies saved in INSN_BACK_DEPS and
|
|
2830 INSN_RESOLVED_BACK_DEPS. The Haifa scheduler does this on the fly
|
|
2831 when scheduling, so this function is supposed to be called from
|
|
2832 the selective scheduling only. */
|
|
2833 void
|
|
2834 free_rgn_deps (void)
|
|
2835 {
|
|
2836 int bb;
|
|
2837
|
|
2838 for (bb = 0; bb < current_nr_blocks; bb++)
|
|
2839 {
|
|
2840 rtx head, tail;
|
|
2841
|
|
2842 gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb));
|
|
2843 get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
|
|
2844
|
|
2845 sched_free_deps (head, tail, false);
|
|
2846 }
|
|
2847 }
|
|
2848
|
|
2849 static int rgn_n_insns;
|
|
2850
|
|
2851 /* Compute insn priority for a current region. */
|
|
2852 void
|
|
2853 compute_priorities (void)
|
|
2854 {
|
|
2855 int bb;
|
|
2856
|
|
2857 current_sched_info->sched_max_insns_priority = 0;
|
|
2858 for (bb = 0; bb < current_nr_blocks; bb++)
|
|
2859 {
|
|
2860 rtx head, tail;
|
|
2861
|
|
2862 gcc_assert (EBB_FIRST_BB (bb) == EBB_LAST_BB (bb));
|
|
2863 get_ebb_head_tail (EBB_FIRST_BB (bb), EBB_LAST_BB (bb), &head, &tail);
|
|
2864
|
|
2865 rgn_n_insns += set_priorities (head, tail);
|
|
2866 }
|
|
2867 current_sched_info->sched_max_insns_priority++;
|
|
2868 }
|
|
2869
|
|
2870 /* Schedule a region. A region is either an inner loop, a loop-free
|
|
2871 subroutine, or a single basic block. Each bb in the region is
|
|
2872 scheduled after its flow predecessors. */
|
|
2873
|
|
2874 static void
|
|
2875 schedule_region (int rgn)
|
|
2876 {
|
|
2877 int bb;
|
|
2878 int sched_rgn_n_insns = 0;
|
|
2879
|
|
2880 rgn_n_insns = 0;
|
|
2881
|
|
2882 rgn_setup_region (rgn);
|
|
2883
|
|
2884 /* Don't schedule region that is marked by
|
|
2885 NOTE_DISABLE_SCHED_OF_BLOCK. */
|
|
2886 if (sched_is_disabled_for_current_region_p ())
|
|
2887 return;
|
|
2888
|
|
2889 sched_rgn_compute_dependencies (rgn);
|
|
2890
|
|
2891 sched_rgn_local_init (rgn);
|
|
2892
|
|
2893 /* Set priorities. */
|
|
2894 compute_priorities ();
|
|
2895
|
|
2896 sched_extend_ready_list (rgn_n_insns);
|
|
2897
|
|
2898 /* Now we can schedule all blocks. */
|
|
2899 for (bb = 0; bb < current_nr_blocks; bb++)
|
|
2900 {
|
|
2901 basic_block first_bb, last_bb, curr_bb;
|
|
2902 rtx head, tail;
|
|
2903
|
|
2904 first_bb = EBB_FIRST_BB (bb);
|
|
2905 last_bb = EBB_LAST_BB (bb);
|
|
2906
|
|
2907 get_ebb_head_tail (first_bb, last_bb, &head, &tail);
|
|
2908
|
|
2909 if (no_real_insns_p (head, tail))
|
|
2910 {
|
|
2911 gcc_assert (first_bb == last_bb);
|
|
2912 continue;
|
|
2913 }
|
|
2914
|
|
2915 current_sched_info->prev_head = PREV_INSN (head);
|
|
2916 current_sched_info->next_tail = NEXT_INSN (tail);
|
|
2917
|
|
2918 remove_notes (head, tail);
|
|
2919
|
|
2920 unlink_bb_notes (first_bb, last_bb);
|
|
2921
|
|
2922 target_bb = bb;
|
|
2923
|
|
2924 gcc_assert (flag_schedule_interblock || current_nr_blocks == 1);
|
|
2925 current_sched_info->queue_must_finish_empty = current_nr_blocks == 1;
|
|
2926
|
|
2927 curr_bb = first_bb;
|
|
2928 if (dbg_cnt (sched_block))
|
|
2929 {
|
|
2930 schedule_block (&curr_bb);
|
|
2931 gcc_assert (EBB_FIRST_BB (bb) == first_bb);
|
|
2932 sched_rgn_n_insns += sched_n_insns;
|
|
2933 }
|
|
2934 else
|
|
2935 {
|
|
2936 sched_rgn_n_insns += rgn_n_insns;
|
|
2937 }
|
|
2938
|
|
2939 /* Clean up. */
|
|
2940 if (current_nr_blocks > 1)
|
|
2941 free_trg_info ();
|
|
2942 }
|
|
2943
|
|
2944 /* Sanity check: verify that all region insns were scheduled. */
|
|
2945 gcc_assert (sched_rgn_n_insns == rgn_n_insns);
|
|
2946
|
|
2947 sched_finish_ready_list ();
|
|
2948
|
|
2949 /* Done with this region. */
|
|
2950 sched_rgn_local_finish ();
|
|
2951
|
|
2952 /* Free dependencies. */
|
|
2953 for (bb = 0; bb < current_nr_blocks; ++bb)
|
|
2954 free_block_dependencies (bb);
|
|
2955
|
|
2956 gcc_assert (haifa_recovery_bb_ever_added_p
|
|
2957 || deps_pools_are_empty_p ());
|
|
2958 }
|
|
2959
|
|
2960 /* Initialize data structures for region scheduling. */
|
|
2961
|
|
2962 void
|
|
2963 sched_rgn_init (bool single_blocks_p)
|
|
2964 {
|
|
2965 min_spec_prob = ((PARAM_VALUE (PARAM_MIN_SPEC_PROB) * REG_BR_PROB_BASE)
|
|
2966 / 100);
|
|
2967
|
|
2968 nr_inter = 0;
|
|
2969 nr_spec = 0;
|
|
2970
|
|
2971 extend_regions ();
|
|
2972
|
|
2973 CONTAINING_RGN (ENTRY_BLOCK) = -1;
|
|
2974 CONTAINING_RGN (EXIT_BLOCK) = -1;
|
|
2975
|
|
2976 /* Compute regions for scheduling. */
|
|
2977 if (single_blocks_p
|
|
2978 || n_basic_blocks == NUM_FIXED_BLOCKS + 1
|
|
2979 || !flag_schedule_interblock
|
|
2980 || is_cfg_nonregular ())
|
|
2981 {
|
|
2982 find_single_block_region (sel_sched_p ());
|
|
2983 }
|
|
2984 else
|
|
2985 {
|
|
2986 /* Compute the dominators and post dominators. */
|
|
2987 if (!sel_sched_p ())
|
|
2988 calculate_dominance_info (CDI_DOMINATORS);
|
|
2989
|
|
2990 /* Find regions. */
|
|
2991 find_rgns ();
|
|
2992
|
|
2993 if (sched_verbose >= 3)
|
|
2994 debug_regions ();
|
|
2995
|
|
2996 /* For now. This will move as more and more of haifa is converted
|
|
2997 to using the cfg code. */
|
|
2998 if (!sel_sched_p ())
|
|
2999 free_dominance_info (CDI_DOMINATORS);
|
|
3000 }
|
|
3001
|
|
3002 gcc_assert (0 < nr_regions && nr_regions <= n_basic_blocks);
|
|
3003
|
|
3004 RGN_BLOCKS (nr_regions) = (RGN_BLOCKS (nr_regions - 1) +
|
|
3005 RGN_NR_BLOCKS (nr_regions - 1));
|
|
3006 }
|
|
3007
|
|
3008 /* Free data structures for region scheduling. */
|
|
3009 void
|
|
3010 sched_rgn_finish (void)
|
|
3011 {
|
|
3012 /* Reposition the prologue and epilogue notes in case we moved the
|
|
3013 prologue/epilogue insns. */
|
|
3014 if (reload_completed)
|
|
3015 reposition_prologue_and_epilogue_notes ();
|
|
3016
|
|
3017 if (sched_verbose)
|
|
3018 {
|
|
3019 if (reload_completed == 0
|
|
3020 && flag_schedule_interblock)
|
|
3021 {
|
|
3022 fprintf (sched_dump,
|
|
3023 "\n;; Procedure interblock/speculative motions == %d/%d \n",
|
|
3024 nr_inter, nr_spec);
|
|
3025 }
|
|
3026 else
|
|
3027 gcc_assert (nr_inter <= 0);
|
|
3028 fprintf (sched_dump, "\n\n");
|
|
3029 }
|
|
3030
|
|
3031 nr_regions = 0;
|
|
3032
|
|
3033 free (rgn_table);
|
|
3034 rgn_table = NULL;
|
|
3035
|
|
3036 free (rgn_bb_table);
|
|
3037 rgn_bb_table = NULL;
|
|
3038
|
|
3039 free (block_to_bb);
|
|
3040 block_to_bb = NULL;
|
|
3041
|
|
3042 free (containing_rgn);
|
|
3043 containing_rgn = NULL;
|
|
3044
|
|
3045 free (ebb_head);
|
|
3046 ebb_head = NULL;
|
|
3047 }
|
|
3048
|
|
3049 /* Setup global variables like CURRENT_BLOCKS and CURRENT_NR_BLOCK to
|
|
3050 point to the region RGN. */
|
|
3051 void
|
|
3052 rgn_setup_region (int rgn)
|
|
3053 {
|
|
3054 int bb;
|
|
3055
|
|
3056 /* Set variables for the current region. */
|
|
3057 current_nr_blocks = RGN_NR_BLOCKS (rgn);
|
|
3058 current_blocks = RGN_BLOCKS (rgn);
|
|
3059
|
|
3060 /* EBB_HEAD is a region-scope structure. But we realloc it for
|
|
3061 each region to save time/memory/something else.
|
|
3062 See comments in add_block1, for what reasons we allocate +1 element. */
|
|
3063 ebb_head = XRESIZEVEC (int, ebb_head, current_nr_blocks + 1);
|
|
3064 for (bb = 0; bb <= current_nr_blocks; bb++)
|
|
3065 ebb_head[bb] = current_blocks + bb;
|
|
3066 }
|
|
3067
|
|
3068 /* Compute instruction dependencies in region RGN. */
|
|
3069 void
|
|
3070 sched_rgn_compute_dependencies (int rgn)
|
|
3071 {
|
|
3072 if (!RGN_DONT_CALC_DEPS (rgn))
|
|
3073 {
|
|
3074 int bb;
|
|
3075
|
|
3076 if (sel_sched_p ())
|
|
3077 sched_emulate_haifa_p = 1;
|
|
3078
|
|
3079 init_deps_global ();
|
|
3080
|
|
3081 /* Initializations for region data dependence analysis. */
|
|
3082 bb_deps = XNEWVEC (struct deps, current_nr_blocks);
|
|
3083 for (bb = 0; bb < current_nr_blocks; bb++)
|
|
3084 init_deps (bb_deps + bb);
|
|
3085
|
|
3086 /* Initialize bitmap used in add_branch_dependences. */
|
|
3087 insn_referenced = sbitmap_alloc (sched_max_luid);
|
|
3088 sbitmap_zero (insn_referenced);
|
|
3089
|
|
3090 /* Compute backward dependencies. */
|
|
3091 for (bb = 0; bb < current_nr_blocks; bb++)
|
|
3092 compute_block_dependences (bb);
|
|
3093
|
|
3094 sbitmap_free (insn_referenced);
|
|
3095 free_pending_lists ();
|
|
3096 finish_deps_global ();
|
|
3097 free (bb_deps);
|
|
3098
|
|
3099 /* We don't want to recalculate this twice. */
|
|
3100 RGN_DONT_CALC_DEPS (rgn) = 1;
|
|
3101
|
|
3102 if (sel_sched_p ())
|
|
3103 sched_emulate_haifa_p = 0;
|
|
3104 }
|
|
3105 else
|
|
3106 /* (This is a recovery block. It is always a single block region.)
|
|
3107 OR (We use selective scheduling.) */
|
|
3108 gcc_assert (current_nr_blocks == 1 || sel_sched_p ());
|
|
3109 }
|
|
3110
|
|
3111 /* Init region data structures. Returns true if this region should
|
|
3112 not be scheduled. */
|
|
3113 void
|
|
3114 sched_rgn_local_init (int rgn)
|
|
3115 {
|
|
3116 int bb;
|
|
3117
|
|
3118 /* Compute interblock info: probabilities, split-edges, dominators, etc. */
|
|
3119 if (current_nr_blocks > 1)
|
|
3120 {
|
|
3121 basic_block block;
|
|
3122 edge e;
|
|
3123 edge_iterator ei;
|
|
3124
|
|
3125 prob = XNEWVEC (int, current_nr_blocks);
|
|
3126
|
|
3127 dom = sbitmap_vector_alloc (current_nr_blocks, current_nr_blocks);
|
|
3128 sbitmap_vector_zero (dom, current_nr_blocks);
|
|
3129
|
|
3130 /* Use ->aux to implement EDGE_TO_BIT mapping. */
|
|
3131 rgn_nr_edges = 0;
|
|
3132 FOR_EACH_BB (block)
|
|
3133 {
|
|
3134 if (CONTAINING_RGN (block->index) != rgn)
|
|
3135 continue;
|
|
3136 FOR_EACH_EDGE (e, ei, block->succs)
|
|
3137 SET_EDGE_TO_BIT (e, rgn_nr_edges++);
|
|
3138 }
|
|
3139
|
|
3140 rgn_edges = XNEWVEC (edge, rgn_nr_edges);
|
|
3141 rgn_nr_edges = 0;
|
|
3142 FOR_EACH_BB (block)
|
|
3143 {
|
|
3144 if (CONTAINING_RGN (block->index) != rgn)
|
|
3145 continue;
|
|
3146 FOR_EACH_EDGE (e, ei, block->succs)
|
|
3147 rgn_edges[rgn_nr_edges++] = e;
|
|
3148 }
|
|
3149
|
|
3150 /* Split edges. */
|
|
3151 pot_split = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
|
|
3152 sbitmap_vector_zero (pot_split, current_nr_blocks);
|
|
3153 ancestor_edges = sbitmap_vector_alloc (current_nr_blocks, rgn_nr_edges);
|
|
3154 sbitmap_vector_zero (ancestor_edges, current_nr_blocks);
|
|
3155
|
|
3156 /* Compute probabilities, dominators, split_edges. */
|
|
3157 for (bb = 0; bb < current_nr_blocks; bb++)
|
|
3158 compute_dom_prob_ps (bb);
|
|
3159
|
|
3160 /* Cleanup ->aux used for EDGE_TO_BIT mapping. */
|
|
3161 /* We don't need them anymore. But we want to avoid duplication of
|
|
3162 aux fields in the newly created edges. */
|
|
3163 FOR_EACH_BB (block)
|
|
3164 {
|
|
3165 if (CONTAINING_RGN (block->index) != rgn)
|
|
3166 continue;
|
|
3167 FOR_EACH_EDGE (e, ei, block->succs)
|
|
3168 e->aux = NULL;
|
|
3169 }
|
|
3170 }
|
|
3171 }
|
|
3172
|
|
3173 /* Free data computed for the finished region. */
|
|
3174 void
|
|
3175 sched_rgn_local_free (void)
|
|
3176 {
|
|
3177 free (prob);
|
|
3178 sbitmap_vector_free (dom);
|
|
3179 sbitmap_vector_free (pot_split);
|
|
3180 sbitmap_vector_free (ancestor_edges);
|
|
3181 free (rgn_edges);
|
|
3182 }
|
|
3183
|
|
3184 /* Free data computed for the finished region. */
|
|
3185 void
|
|
3186 sched_rgn_local_finish (void)
|
|
3187 {
|
|
3188 if (current_nr_blocks > 1 && !sel_sched_p ())
|
|
3189 {
|
|
3190 sched_rgn_local_free ();
|
|
3191 }
|
|
3192 }
|
|
3193
|
|
3194 /* Setup scheduler infos. */
|
|
3195 void
|
|
3196 rgn_setup_common_sched_info (void)
|
|
3197 {
|
|
3198 memcpy (&rgn_common_sched_info, &haifa_common_sched_info,
|
|
3199 sizeof (rgn_common_sched_info));
|
|
3200
|
|
3201 rgn_common_sched_info.fix_recovery_cfg = rgn_fix_recovery_cfg;
|
|
3202 rgn_common_sched_info.add_block = rgn_add_block;
|
|
3203 rgn_common_sched_info.estimate_number_of_insns
|
|
3204 = rgn_estimate_number_of_insns;
|
|
3205 rgn_common_sched_info.sched_pass_id = SCHED_RGN_PASS;
|
|
3206
|
|
3207 common_sched_info = &rgn_common_sched_info;
|
|
3208 }
|
|
3209
|
|
3210 /* Setup all *_sched_info structures (for the Haifa frontend
|
|
3211 and for the dependence analysis) in the interblock scheduler. */
|
|
3212 void
|
|
3213 rgn_setup_sched_infos (void)
|
|
3214 {
|
|
3215 if (!sel_sched_p ())
|
|
3216 memcpy (&rgn_sched_deps_info, &rgn_const_sched_deps_info,
|
|
3217 sizeof (rgn_sched_deps_info));
|
|
3218 else
|
|
3219 memcpy (&rgn_sched_deps_info, &rgn_const_sel_sched_deps_info,
|
|
3220 sizeof (rgn_sched_deps_info));
|
|
3221
|
|
3222 sched_deps_info = &rgn_sched_deps_info;
|
|
3223
|
|
3224 memcpy (&rgn_sched_info, &rgn_const_sched_info, sizeof (rgn_sched_info));
|
|
3225 current_sched_info = &rgn_sched_info;
|
|
3226 }
|
|
3227
|
|
3228 /* The one entry point in this file. */
|
|
3229 void
|
|
3230 schedule_insns (void)
|
|
3231 {
|
|
3232 int rgn;
|
|
3233
|
|
3234 /* Taking care of this degenerate case makes the rest of
|
|
3235 this code simpler. */
|
|
3236 if (n_basic_blocks == NUM_FIXED_BLOCKS)
|
|
3237 return;
|
|
3238
|
|
3239 rgn_setup_common_sched_info ();
|
|
3240 rgn_setup_sched_infos ();
|
|
3241
|
|
3242 haifa_sched_init ();
|
|
3243 sched_rgn_init (reload_completed);
|
|
3244
|
|
3245 bitmap_initialize (¬_in_df, 0);
|
|
3246 bitmap_clear (¬_in_df);
|
|
3247
|
|
3248 /* Schedule every region in the subroutine. */
|
|
3249 for (rgn = 0; rgn < nr_regions; rgn++)
|
|
3250 if (dbg_cnt (sched_region))
|
|
3251 schedule_region (rgn);
|
|
3252
|
|
3253 /* Clean up. */
|
|
3254 sched_rgn_finish ();
|
|
3255 bitmap_clear (¬_in_df);
|
|
3256
|
|
3257 haifa_sched_finish ();
|
|
3258 }
|
|
3259
|
|
3260 /* INSN has been added to/removed from current region. */
|
|
3261 static void
|
|
3262 rgn_add_remove_insn (rtx insn, int remove_p)
|
|
3263 {
|
|
3264 if (!remove_p)
|
|
3265 rgn_n_insns++;
|
|
3266 else
|
|
3267 rgn_n_insns--;
|
|
3268
|
|
3269 if (INSN_BB (insn) == target_bb)
|
|
3270 {
|
|
3271 if (!remove_p)
|
|
3272 target_n_insns++;
|
|
3273 else
|
|
3274 target_n_insns--;
|
|
3275 }
|
|
3276 }
|
|
3277
|
|
3278 /* Extend internal data structures. */
|
|
3279 void
|
|
3280 extend_regions (void)
|
|
3281 {
|
|
3282 rgn_table = XRESIZEVEC (region, rgn_table, n_basic_blocks);
|
|
3283 rgn_bb_table = XRESIZEVEC (int, rgn_bb_table, n_basic_blocks);
|
|
3284 block_to_bb = XRESIZEVEC (int, block_to_bb, last_basic_block);
|
|
3285 containing_rgn = XRESIZEVEC (int, containing_rgn, last_basic_block);
|
|
3286 }
|
|
3287
|
|
3288 void
|
|
3289 rgn_make_new_region_out_of_new_block (basic_block bb)
|
|
3290 {
|
|
3291 int i;
|
|
3292
|
|
3293 i = RGN_BLOCKS (nr_regions);
|
|
3294 /* I - first free position in rgn_bb_table. */
|
|
3295
|
|
3296 rgn_bb_table[i] = bb->index;
|
|
3297 RGN_NR_BLOCKS (nr_regions) = 1;
|
|
3298 RGN_HAS_REAL_EBB (nr_regions) = 0;
|
|
3299 RGN_DONT_CALC_DEPS (nr_regions) = 0;
|
|
3300 CONTAINING_RGN (bb->index) = nr_regions;
|
|
3301 BLOCK_TO_BB (bb->index) = 0;
|
|
3302
|
|
3303 nr_regions++;
|
|
3304
|
|
3305 RGN_BLOCKS (nr_regions) = i + 1;
|
|
3306 }
|
|
3307
|
|
3308 /* BB was added to ebb after AFTER. */
|
|
3309 static void
|
|
3310 rgn_add_block (basic_block bb, basic_block after)
|
|
3311 {
|
|
3312 extend_regions ();
|
|
3313 bitmap_set_bit (¬_in_df, bb->index);
|
|
3314
|
|
3315 if (after == 0 || after == EXIT_BLOCK_PTR)
|
|
3316 {
|
|
3317 rgn_make_new_region_out_of_new_block (bb);
|
|
3318 RGN_DONT_CALC_DEPS (nr_regions - 1) = (after == EXIT_BLOCK_PTR);
|
|
3319 }
|
|
3320 else
|
|
3321 {
|
|
3322 int i, pos;
|
|
3323
|
|
3324 /* We need to fix rgn_table, block_to_bb, containing_rgn
|
|
3325 and ebb_head. */
|
|
3326
|
|
3327 BLOCK_TO_BB (bb->index) = BLOCK_TO_BB (after->index);
|
|
3328
|
|
3329 /* We extend ebb_head to one more position to
|
|
3330 easily find the last position of the last ebb in
|
|
3331 the current region. Thus, ebb_head[BLOCK_TO_BB (after) + 1]
|
|
3332 is _always_ valid for access. */
|
|
3333
|
|
3334 i = BLOCK_TO_BB (after->index) + 1;
|
|
3335 pos = ebb_head[i] - 1;
|
|
3336 /* Now POS is the index of the last block in the region. */
|
|
3337
|
|
3338 /* Find index of basic block AFTER. */
|
|
3339 for (; rgn_bb_table[pos] != after->index; pos--);
|
|
3340
|
|
3341 pos++;
|
|
3342 gcc_assert (pos > ebb_head[i - 1]);
|
|
3343
|
|
3344 /* i - ebb right after "AFTER". */
|
|
3345 /* ebb_head[i] - VALID. */
|
|
3346
|
|
3347 /* Source position: ebb_head[i]
|
|
3348 Destination position: ebb_head[i] + 1
|
|
3349 Last position:
|
|
3350 RGN_BLOCKS (nr_regions) - 1
|
|
3351 Number of elements to copy: (last_position) - (source_position) + 1
|
|
3352 */
|
|
3353
|
|
3354 memmove (rgn_bb_table + pos + 1,
|
|
3355 rgn_bb_table + pos,
|
|
3356 ((RGN_BLOCKS (nr_regions) - 1) - (pos) + 1)
|
|
3357 * sizeof (*rgn_bb_table));
|
|
3358
|
|
3359 rgn_bb_table[pos] = bb->index;
|
|
3360
|
|
3361 for (; i <= current_nr_blocks; i++)
|
|
3362 ebb_head [i]++;
|
|
3363
|
|
3364 i = CONTAINING_RGN (after->index);
|
|
3365 CONTAINING_RGN (bb->index) = i;
|
|
3366
|
|
3367 RGN_HAS_REAL_EBB (i) = 1;
|
|
3368
|
|
3369 for (++i; i <= nr_regions; i++)
|
|
3370 RGN_BLOCKS (i)++;
|
|
3371 }
|
|
3372 }
|
|
3373
|
|
3374 /* Fix internal data after interblock movement of jump instruction.
|
|
3375 For parameter meaning please refer to
|
|
3376 sched-int.h: struct sched_info: fix_recovery_cfg. */
|
|
3377 static void
|
|
3378 rgn_fix_recovery_cfg (int bbi, int check_bbi, int check_bb_nexti)
|
|
3379 {
|
|
3380 int old_pos, new_pos, i;
|
|
3381
|
|
3382 BLOCK_TO_BB (check_bb_nexti) = BLOCK_TO_BB (bbi);
|
|
3383
|
|
3384 for (old_pos = ebb_head[BLOCK_TO_BB (check_bbi) + 1] - 1;
|
|
3385 rgn_bb_table[old_pos] != check_bb_nexti;
|
|
3386 old_pos--);
|
|
3387 gcc_assert (old_pos > ebb_head[BLOCK_TO_BB (check_bbi)]);
|
|
3388
|
|
3389 for (new_pos = ebb_head[BLOCK_TO_BB (bbi) + 1] - 1;
|
|
3390 rgn_bb_table[new_pos] != bbi;
|
|
3391 new_pos--);
|
|
3392 new_pos++;
|
|
3393 gcc_assert (new_pos > ebb_head[BLOCK_TO_BB (bbi)]);
|
|
3394
|
|
3395 gcc_assert (new_pos < old_pos);
|
|
3396
|
|
3397 memmove (rgn_bb_table + new_pos + 1,
|
|
3398 rgn_bb_table + new_pos,
|
|
3399 (old_pos - new_pos) * sizeof (*rgn_bb_table));
|
|
3400
|
|
3401 rgn_bb_table[new_pos] = check_bb_nexti;
|
|
3402
|
|
3403 for (i = BLOCK_TO_BB (bbi) + 1; i <= BLOCK_TO_BB (check_bbi); i++)
|
|
3404 ebb_head[i]++;
|
|
3405 }
|
|
3406
|
|
3407 /* Return next block in ebb chain. For parameter meaning please refer to
|
|
3408 sched-int.h: struct sched_info: advance_target_bb. */
|
|
3409 static basic_block
|
|
3410 advance_target_bb (basic_block bb, rtx insn)
|
|
3411 {
|
|
3412 if (insn)
|
|
3413 return 0;
|
|
3414
|
|
3415 gcc_assert (BLOCK_TO_BB (bb->index) == target_bb
|
|
3416 && BLOCK_TO_BB (bb->next_bb->index) == target_bb);
|
|
3417 return bb->next_bb;
|
|
3418 }
|
|
3419
|
|
3420 #endif
|
|
3421
|
|
3422 static bool
|
|
3423 gate_handle_sched (void)
|
|
3424 {
|
|
3425 #ifdef INSN_SCHEDULING
|
|
3426 return flag_schedule_insns && dbg_cnt (sched_func);
|
|
3427 #else
|
|
3428 return 0;
|
|
3429 #endif
|
|
3430 }
|
|
3431
|
|
3432 /* Run instruction scheduler. */
|
|
3433 static unsigned int
|
|
3434 rest_of_handle_sched (void)
|
|
3435 {
|
|
3436 #ifdef INSN_SCHEDULING
|
|
3437 if (flag_selective_scheduling
|
|
3438 && ! maybe_skip_selective_scheduling ())
|
|
3439 run_selective_scheduling ();
|
|
3440 else
|
|
3441 schedule_insns ();
|
|
3442 #endif
|
|
3443 return 0;
|
|
3444 }
|
|
3445
|
|
3446 static bool
|
|
3447 gate_handle_sched2 (void)
|
|
3448 {
|
|
3449 #ifdef INSN_SCHEDULING
|
|
3450 return optimize > 0 && flag_schedule_insns_after_reload
|
|
3451 && dbg_cnt (sched2_func);
|
|
3452 #else
|
|
3453 return 0;
|
|
3454 #endif
|
|
3455 }
|
|
3456
|
|
3457 /* Run second scheduling pass after reload. */
|
|
3458 static unsigned int
|
|
3459 rest_of_handle_sched2 (void)
|
|
3460 {
|
|
3461 #ifdef INSN_SCHEDULING
|
|
3462 if (flag_selective_scheduling2
|
|
3463 && ! maybe_skip_selective_scheduling ())
|
|
3464 run_selective_scheduling ();
|
|
3465 else
|
|
3466 {
|
|
3467 /* Do control and data sched analysis again,
|
|
3468 and write some more of the results to dump file. */
|
|
3469 if (flag_sched2_use_superblocks || flag_sched2_use_traces)
|
|
3470 schedule_ebbs ();
|
|
3471 else
|
|
3472 schedule_insns ();
|
|
3473 }
|
|
3474 #endif
|
|
3475 return 0;
|
|
3476 }
|
|
3477
|
|
3478 struct rtl_opt_pass pass_sched =
|
|
3479 {
|
|
3480 {
|
|
3481 RTL_PASS,
|
|
3482 "sched1", /* name */
|
|
3483 gate_handle_sched, /* gate */
|
|
3484 rest_of_handle_sched, /* execute */
|
|
3485 NULL, /* sub */
|
|
3486 NULL, /* next */
|
|
3487 0, /* static_pass_number */
|
|
3488 TV_SCHED, /* tv_id */
|
|
3489 0, /* properties_required */
|
|
3490 0, /* properties_provided */
|
|
3491 0, /* properties_destroyed */
|
|
3492 0, /* todo_flags_start */
|
|
3493 TODO_df_finish | TODO_verify_rtl_sharing |
|
|
3494 TODO_dump_func |
|
|
3495 TODO_verify_flow |
|
|
3496 TODO_ggc_collect /* todo_flags_finish */
|
|
3497 }
|
|
3498 };
|
|
3499
|
|
3500 struct rtl_opt_pass pass_sched2 =
|
|
3501 {
|
|
3502 {
|
|
3503 RTL_PASS,
|
|
3504 "sched2", /* name */
|
|
3505 gate_handle_sched2, /* gate */
|
|
3506 rest_of_handle_sched2, /* execute */
|
|
3507 NULL, /* sub */
|
|
3508 NULL, /* next */
|
|
3509 0, /* static_pass_number */
|
|
3510 TV_SCHED2, /* tv_id */
|
|
3511 0, /* properties_required */
|
|
3512 0, /* properties_provided */
|
|
3513 0, /* properties_destroyed */
|
|
3514 0, /* todo_flags_start */
|
|
3515 TODO_df_finish | TODO_verify_rtl_sharing |
|
|
3516 TODO_dump_func |
|
|
3517 TODO_verify_flow |
|
|
3518 TODO_ggc_collect /* todo_flags_finish */
|
|
3519 }
|
|
3520 };
|
|
3521
|