Mercurial > hg > CbC > CbC_gcc
comparison gcc/modulo-sched.c @ 0:a06113de4d67
first commit
author | kent <kent@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 17 Jul 2009 14:47:48 +0900 |
parents | |
children | 77e2b8dfacca |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a06113de4d67 |
---|---|
1 /* Swing Modulo Scheduling implementation. | |
2 Copyright (C) 2004, 2005, 2006, 2007, 2008 | |
3 Free Software Foundation, Inc. | |
4 Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com> | |
5 | |
6 This file is part of GCC. | |
7 | |
8 GCC is free software; you can redistribute it and/or modify it under | |
9 the terms of the GNU General Public License as published by the Free | |
10 Software Foundation; either version 3, or (at your option) any later | |
11 version. | |
12 | |
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 for more details. | |
17 | |
18 You should have received a copy of the GNU General Public License | |
19 along with GCC; see the file COPYING3. If not see | |
20 <http://www.gnu.org/licenses/>. */ | |
21 | |
22 | |
23 #include "config.h" | |
24 #include "system.h" | |
25 #include "coretypes.h" | |
26 #include "tm.h" | |
27 #include "toplev.h" | |
28 #include "rtl.h" | |
29 #include "tm_p.h" | |
30 #include "hard-reg-set.h" | |
31 #include "regs.h" | |
32 #include "function.h" | |
33 #include "flags.h" | |
34 #include "insn-config.h" | |
35 #include "insn-attr.h" | |
36 #include "except.h" | |
37 #include "toplev.h" | |
38 #include "recog.h" | |
39 #include "sched-int.h" | |
40 #include "target.h" | |
41 #include "cfglayout.h" | |
42 #include "cfgloop.h" | |
43 #include "cfghooks.h" | |
44 #include "expr.h" | |
45 #include "params.h" | |
46 #include "gcov-io.h" | |
47 #include "ddg.h" | |
48 #include "timevar.h" | |
49 #include "tree-pass.h" | |
50 #include "dbgcnt.h" | |
51 | |
52 #ifdef INSN_SCHEDULING | |
53 | |
54 /* This file contains the implementation of the Swing Modulo Scheduler, | |
55 described in the following references: | |
56 [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt. | |
57 Lifetime--sensitive modulo scheduling in a production environment. | |
58 IEEE Trans. on Comps., 50(3), March 2001 | |
59 [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero. | |
60 Swing Modulo Scheduling: A Lifetime Sensitive Approach. | |
61 PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA). | |
62 | |
63 The basic structure is: | |
64 1. Build a data-dependence graph (DDG) for each loop. | |
65 2. Use the DDG to order the insns of a loop (not in topological order | |
66 necessarily, but rather) trying to place each insn after all its | |
67 predecessors _or_ after all its successors. | |
68 3. Compute MII: a lower bound on the number of cycles to schedule the loop. | |
69 4. Use the ordering to perform list-scheduling of the loop: | |
70 1. Set II = MII. We will try to schedule the loop within II cycles. | |
71 2. Try to schedule the insns one by one according to the ordering. | |
72 For each insn compute an interval of cycles by considering already- | |
73 scheduled preds and succs (and associated latencies); try to place | |
74 the insn in the cycles of this window checking for potential | |
75 resource conflicts (using the DFA interface). | |
76 Note: this is different from the cycle-scheduling of schedule_insns; | |
77 here the insns are not scheduled monotonically top-down (nor bottom- | |
78 up). | |
79 3. If failed in scheduling all insns - bump II++ and try again, unless | |
80 II reaches an upper bound MaxII, in which case report failure. | |
81 5. If we succeeded in scheduling the loop within II cycles, we now | |
82 generate prolog and epilog, decrease the counter of the loop, and | |
83 perform modulo variable expansion for live ranges that span more than | |
84 II cycles (i.e. use register copies to prevent a def from overwriting | |
85 itself before reaching the use). | |
86 | |
87 SMS works with countable loops (1) whose control part can be easily | |
88 decoupled from the rest of the loop and (2) whose loop count can | |
89 be easily adjusted. This is because we peel a constant number of | |
90 iterations into a prologue and epilogue for which we want to avoid | |
91 emitting the control part, and a kernel which is to iterate that | |
92 constant number of iterations less than the original loop. So the | |
93 control part should be a set of insns clearly identified and having | |
94 its own iv, not otherwise used in the loop (at-least for now), which | |
95 initializes a register before the loop to the number of iterations. | |
96 Currently SMS relies on the do-loop pattern to recognize such loops, | |
97 where (1) the control part comprises of all insns defining and/or | |
98 using a certain 'count' register and (2) the loop count can be | |
99 adjusted by modifying this register prior to the loop. | |
100 TODO: Rely on cfgloop analysis instead. */ | |
101 | |
102 /* This page defines partial-schedule structures and functions for | |
103 modulo scheduling. */ | |
104 | |
105 typedef struct partial_schedule *partial_schedule_ptr; | |
106 typedef struct ps_insn *ps_insn_ptr; | |
107 | |
108 /* The minimum (absolute) cycle that a node of ps was scheduled in. */ | |
109 #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle) | |
110 | |
111 /* The maximum (absolute) cycle that a node of ps was scheduled in. */ | |
112 #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle) | |
113 | |
114 /* Perform signed modulo, always returning a non-negative value. */ | |
115 #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y)) | |
116 | |
117 /* The number of different iterations the nodes in ps span, assuming | |
118 the stage boundaries are placed efficiently. */ | |
119 #define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \ | |
120 + 1 + (ps)->ii - 1) / (ps)->ii) | |
121 | |
122 /* A single instruction in the partial schedule. */ | |
123 struct ps_insn | |
124 { | |
125 /* The corresponding DDG_NODE. */ | |
126 ddg_node_ptr node; | |
127 | |
128 /* The (absolute) cycle in which the PS instruction is scheduled. | |
129 Same as SCHED_TIME (node). */ | |
130 int cycle; | |
131 | |
132 /* The next/prev PS_INSN in the same row. */ | |
133 ps_insn_ptr next_in_row, | |
134 prev_in_row; | |
135 | |
136 /* The number of nodes in the same row that come after this node. */ | |
137 int row_rest_count; | |
138 }; | |
139 | |
140 /* Holds the partial schedule as an array of II rows. Each entry of the | |
141 array points to a linked list of PS_INSNs, which represents the | |
142 instructions that are scheduled for that row. */ | |
143 struct partial_schedule | |
144 { | |
145 int ii; /* Number of rows in the partial schedule. */ | |
146 int history; /* Threshold for conflict checking using DFA. */ | |
147 | |
148 /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */ | |
149 ps_insn_ptr *rows; | |
150 | |
151 /* The earliest absolute cycle of an insn in the partial schedule. */ | |
152 int min_cycle; | |
153 | |
154 /* The latest absolute cycle of an insn in the partial schedule. */ | |
155 int max_cycle; | |
156 | |
157 ddg_ptr g; /* The DDG of the insns in the partial schedule. */ | |
158 }; | |
159 | |
160 /* We use this to record all the register replacements we do in | |
161 the kernel so we can undo SMS if it is not profitable. */ | |
162 struct undo_replace_buff_elem | |
163 { | |
164 rtx insn; | |
165 rtx orig_reg; | |
166 rtx new_reg; | |
167 struct undo_replace_buff_elem *next; | |
168 }; | |
169 | |
170 | |
171 | |
172 static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history); | |
173 static void free_partial_schedule (partial_schedule_ptr); | |
174 static void reset_partial_schedule (partial_schedule_ptr, int new_ii); | |
175 void print_partial_schedule (partial_schedule_ptr, FILE *); | |
176 static void verify_partial_schedule (partial_schedule_ptr, sbitmap); | |
177 static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr, | |
178 ddg_node_ptr node, int cycle, | |
179 sbitmap must_precede, | |
180 sbitmap must_follow); | |
181 static void rotate_partial_schedule (partial_schedule_ptr, int); | |
182 void set_row_column_for_ps (partial_schedule_ptr); | |
183 static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap); | |
184 static int compute_split_row (sbitmap, int, int, int, ddg_node_ptr); | |
185 | |
186 | |
187 /* This page defines constants and structures for the modulo scheduling | |
188 driver. */ | |
189 | |
190 static int sms_order_nodes (ddg_ptr, int, int *, int *); | |
191 static void set_node_sched_params (ddg_ptr); | |
192 static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *); | |
193 static void permute_partial_schedule (partial_schedule_ptr, rtx); | |
194 static void generate_prolog_epilog (partial_schedule_ptr, struct loop *, | |
195 rtx, rtx); | |
196 static void duplicate_insns_of_cycles (partial_schedule_ptr, | |
197 int, int, int, rtx); | |
198 | |
199 #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap) | |
200 #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time) | |
201 #define SCHED_FIRST_REG_MOVE(x) \ | |
202 (((node_sched_params_ptr)(x)->aux.info)->first_reg_move) | |
203 #define SCHED_NREG_MOVES(x) \ | |
204 (((node_sched_params_ptr)(x)->aux.info)->nreg_moves) | |
205 #define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row) | |
206 #define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage) | |
207 #define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column) | |
208 | |
209 /* The scheduling parameters held for each node. */ | |
210 typedef struct node_sched_params | |
211 { | |
212 int asap; /* A lower-bound on the absolute scheduling cycle. */ | |
213 int time; /* The absolute scheduling cycle (time >= asap). */ | |
214 | |
215 /* The following field (first_reg_move) is a pointer to the first | |
216 register-move instruction added to handle the modulo-variable-expansion | |
217 of the register defined by this node. This register-move copies the | |
218 original register defined by the node. */ | |
219 rtx first_reg_move; | |
220 | |
221 /* The number of register-move instructions added, immediately preceding | |
222 first_reg_move. */ | |
223 int nreg_moves; | |
224 | |
225 int row; /* Holds time % ii. */ | |
226 int stage; /* Holds time / ii. */ | |
227 | |
228 /* The column of a node inside the ps. If nodes u, v are on the same row, | |
229 u will precede v if column (u) < column (v). */ | |
230 int column; | |
231 } *node_sched_params_ptr; | |
232 | |
233 | |
234 /* The following three functions are copied from the current scheduler | |
235 code in order to use sched_analyze() for computing the dependencies. | |
236 They are used when initializing the sched_info structure. */ | |
237 static const char * | |
238 sms_print_insn (const_rtx insn, int aligned ATTRIBUTE_UNUSED) | |
239 { | |
240 static char tmp[80]; | |
241 | |
242 sprintf (tmp, "i%4d", INSN_UID (insn)); | |
243 return tmp; | |
244 } | |
245 | |
246 static void | |
247 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED, | |
248 regset cond_exec ATTRIBUTE_UNUSED, | |
249 regset used ATTRIBUTE_UNUSED, | |
250 regset set ATTRIBUTE_UNUSED) | |
251 { | |
252 } | |
253 | |
254 static struct common_sched_info_def sms_common_sched_info; | |
255 | |
256 static struct sched_deps_info_def sms_sched_deps_info = | |
257 { | |
258 compute_jump_reg_dependencies, | |
259 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | |
260 NULL, | |
261 0, 0, 0 | |
262 }; | |
263 | |
264 static struct haifa_sched_info sms_sched_info = | |
265 { | |
266 NULL, | |
267 NULL, | |
268 NULL, | |
269 NULL, | |
270 NULL, | |
271 sms_print_insn, | |
272 NULL, | |
273 NULL, NULL, | |
274 NULL, NULL, | |
275 0, 0, | |
276 | |
277 NULL, NULL, NULL, | |
278 0 | |
279 }; | |
280 | |
281 /* Given HEAD and TAIL which are the first and last insns in a loop; | |
282 return the register which controls the loop. Return zero if it has | |
283 more than one occurrence in the loop besides the control part or the | |
284 do-loop pattern is not of the form we expect. */ | |
285 static rtx | |
286 doloop_register_get (rtx head ATTRIBUTE_UNUSED, rtx tail ATTRIBUTE_UNUSED) | |
287 { | |
288 #ifdef HAVE_doloop_end | |
289 rtx reg, condition, insn, first_insn_not_to_check; | |
290 | |
291 if (!JUMP_P (tail)) | |
292 return NULL_RTX; | |
293 | |
294 /* TODO: Free SMS's dependence on doloop_condition_get. */ | |
295 condition = doloop_condition_get (tail); | |
296 if (! condition) | |
297 return NULL_RTX; | |
298 | |
299 if (REG_P (XEXP (condition, 0))) | |
300 reg = XEXP (condition, 0); | |
301 else if (GET_CODE (XEXP (condition, 0)) == PLUS | |
302 && REG_P (XEXP (XEXP (condition, 0), 0))) | |
303 reg = XEXP (XEXP (condition, 0), 0); | |
304 else | |
305 gcc_unreachable (); | |
306 | |
307 /* Check that the COUNT_REG has no other occurrences in the loop | |
308 until the decrement. We assume the control part consists of | |
309 either a single (parallel) branch-on-count or a (non-parallel) | |
310 branch immediately preceded by a single (decrement) insn. */ | |
311 first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail | |
312 : PREV_INSN (tail)); | |
313 | |
314 for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn)) | |
315 if (reg_mentioned_p (reg, insn)) | |
316 { | |
317 if (dump_file) | |
318 { | |
319 fprintf (dump_file, "SMS count_reg found "); | |
320 print_rtl_single (dump_file, reg); | |
321 fprintf (dump_file, " outside control in insn:\n"); | |
322 print_rtl_single (dump_file, insn); | |
323 } | |
324 | |
325 return NULL_RTX; | |
326 } | |
327 | |
328 return reg; | |
329 #else | |
330 return NULL_RTX; | |
331 #endif | |
332 } | |
333 | |
334 /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so | |
335 that the number of iterations is a compile-time constant. If so, | |
336 return the rtx that sets COUNT_REG to a constant, and set COUNT to | |
337 this constant. Otherwise return 0. */ | |
338 static rtx | |
339 const_iteration_count (rtx count_reg, basic_block pre_header, | |
340 HOST_WIDEST_INT * count) | |
341 { | |
342 rtx insn; | |
343 rtx head, tail; | |
344 | |
345 if (! pre_header) | |
346 return NULL_RTX; | |
347 | |
348 get_ebb_head_tail (pre_header, pre_header, &head, &tail); | |
349 | |
350 for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn)) | |
351 if (INSN_P (insn) && single_set (insn) && | |
352 rtx_equal_p (count_reg, SET_DEST (single_set (insn)))) | |
353 { | |
354 rtx pat = single_set (insn); | |
355 | |
356 if (GET_CODE (SET_SRC (pat)) == CONST_INT) | |
357 { | |
358 *count = INTVAL (SET_SRC (pat)); | |
359 return insn; | |
360 } | |
361 | |
362 return NULL_RTX; | |
363 } | |
364 | |
365 return NULL_RTX; | |
366 } | |
367 | |
368 /* A very simple resource-based lower bound on the initiation interval. | |
369 ??? Improve the accuracy of this bound by considering the | |
370 utilization of various units. */ | |
371 static int | |
372 res_MII (ddg_ptr g) | |
373 { | |
374 if (targetm.sched.sms_res_mii) | |
375 return targetm.sched.sms_res_mii (g); | |
376 | |
377 return (g->num_nodes / issue_rate); | |
378 } | |
379 | |
380 | |
381 /* Points to the array that contains the sched data for each node. */ | |
382 static node_sched_params_ptr node_sched_params; | |
383 | |
384 /* Allocate sched_params for each node and initialize it. Assumes that | |
385 the aux field of each node contain the asap bound (computed earlier), | |
386 and copies it into the sched_params field. */ | |
387 static void | |
388 set_node_sched_params (ddg_ptr g) | |
389 { | |
390 int i; | |
391 | |
392 /* Allocate for each node in the DDG a place to hold the "sched_data". */ | |
393 /* Initialize ASAP/ALAP/HIGHT to zero. */ | |
394 node_sched_params = (node_sched_params_ptr) | |
395 xcalloc (g->num_nodes, | |
396 sizeof (struct node_sched_params)); | |
397 | |
398 /* Set the pointer of the general data of the node to point to the | |
399 appropriate sched_params structure. */ | |
400 for (i = 0; i < g->num_nodes; i++) | |
401 { | |
402 /* Watch out for aliasing problems? */ | |
403 node_sched_params[i].asap = g->nodes[i].aux.count; | |
404 g->nodes[i].aux.info = &node_sched_params[i]; | |
405 } | |
406 } | |
407 | |
408 static void | |
409 print_node_sched_params (FILE *file, int num_nodes, ddg_ptr g) | |
410 { | |
411 int i; | |
412 | |
413 if (! file) | |
414 return; | |
415 for (i = 0; i < num_nodes; i++) | |
416 { | |
417 node_sched_params_ptr nsp = &node_sched_params[i]; | |
418 rtx reg_move = nsp->first_reg_move; | |
419 int j; | |
420 | |
421 fprintf (file, "Node = %d; INSN = %d\n", i, | |
422 (INSN_UID (g->nodes[i].insn))); | |
423 fprintf (file, " asap = %d:\n", nsp->asap); | |
424 fprintf (file, " time = %d:\n", nsp->time); | |
425 fprintf (file, " nreg_moves = %d:\n", nsp->nreg_moves); | |
426 for (j = 0; j < nsp->nreg_moves; j++) | |
427 { | |
428 fprintf (file, " reg_move = "); | |
429 print_rtl_single (file, reg_move); | |
430 reg_move = PREV_INSN (reg_move); | |
431 } | |
432 } | |
433 } | |
434 | |
435 /* | |
436 Breaking intra-loop register anti-dependences: | |
437 Each intra-loop register anti-dependence implies a cross-iteration true | |
438 dependence of distance 1. Therefore, we can remove such false dependencies | |
439 and figure out if the partial schedule broke them by checking if (for a | |
440 true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and | |
441 if so generate a register move. The number of such moves is equal to: | |
442 SCHED_TIME (use) - SCHED_TIME (def) { 0 broken | |
443 nreg_moves = ----------------------------------- + 1 - { dependence. | |
444 ii { 1 if not. | |
445 */ | |
446 static struct undo_replace_buff_elem * | |
447 generate_reg_moves (partial_schedule_ptr ps, bool rescan) | |
448 { | |
449 ddg_ptr g = ps->g; | |
450 int ii = ps->ii; | |
451 int i; | |
452 struct undo_replace_buff_elem *reg_move_replaces = NULL; | |
453 | |
454 for (i = 0; i < g->num_nodes; i++) | |
455 { | |
456 ddg_node_ptr u = &g->nodes[i]; | |
457 ddg_edge_ptr e; | |
458 int nreg_moves = 0, i_reg_move; | |
459 sbitmap *uses_of_defs; | |
460 rtx last_reg_move; | |
461 rtx prev_reg, old_reg; | |
462 | |
463 /* Compute the number of reg_moves needed for u, by looking at life | |
464 ranges started at u (excluding self-loops). */ | |
465 for (e = u->out; e; e = e->next_out) | |
466 if (e->type == TRUE_DEP && e->dest != e->src) | |
467 { | |
468 int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii; | |
469 | |
470 if (e->distance == 1) | |
471 nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii; | |
472 | |
473 /* If dest precedes src in the schedule of the kernel, then dest | |
474 will read before src writes and we can save one reg_copy. */ | |
475 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src) | |
476 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src)) | |
477 nreg_moves4e--; | |
478 | |
479 nreg_moves = MAX (nreg_moves, nreg_moves4e); | |
480 } | |
481 | |
482 if (nreg_moves == 0) | |
483 continue; | |
484 | |
485 /* Every use of the register defined by node may require a different | |
486 copy of this register, depending on the time the use is scheduled. | |
487 Set a bitmap vector, telling which nodes use each copy of this | |
488 register. */ | |
489 uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes); | |
490 sbitmap_vector_zero (uses_of_defs, nreg_moves); | |
491 for (e = u->out; e; e = e->next_out) | |
492 if (e->type == TRUE_DEP && e->dest != e->src) | |
493 { | |
494 int dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii; | |
495 | |
496 if (e->distance == 1) | |
497 dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii; | |
498 | |
499 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src) | |
500 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src)) | |
501 dest_copy--; | |
502 | |
503 if (dest_copy) | |
504 SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid); | |
505 } | |
506 | |
507 /* Now generate the reg_moves, attaching relevant uses to them. */ | |
508 SCHED_NREG_MOVES (u) = nreg_moves; | |
509 old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn))); | |
510 /* Insert the reg-moves right before the notes which precede | |
511 the insn they relates to. */ | |
512 last_reg_move = u->first_note; | |
513 | |
514 for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++) | |
515 { | |
516 unsigned int i_use = 0; | |
517 rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg)); | |
518 rtx reg_move = gen_move_insn (new_reg, prev_reg); | |
519 sbitmap_iterator sbi; | |
520 | |
521 add_insn_before (reg_move, last_reg_move, NULL); | |
522 last_reg_move = reg_move; | |
523 | |
524 if (!SCHED_FIRST_REG_MOVE (u)) | |
525 SCHED_FIRST_REG_MOVE (u) = reg_move; | |
526 | |
527 EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use, sbi) | |
528 { | |
529 struct undo_replace_buff_elem *rep; | |
530 | |
531 rep = (struct undo_replace_buff_elem *) | |
532 xcalloc (1, sizeof (struct undo_replace_buff_elem)); | |
533 rep->insn = g->nodes[i_use].insn; | |
534 rep->orig_reg = old_reg; | |
535 rep->new_reg = new_reg; | |
536 | |
537 if (! reg_move_replaces) | |
538 reg_move_replaces = rep; | |
539 else | |
540 { | |
541 rep->next = reg_move_replaces; | |
542 reg_move_replaces = rep; | |
543 } | |
544 | |
545 replace_rtx (g->nodes[i_use].insn, old_reg, new_reg); | |
546 if (rescan) | |
547 df_insn_rescan (g->nodes[i_use].insn); | |
548 } | |
549 | |
550 prev_reg = new_reg; | |
551 } | |
552 sbitmap_vector_free (uses_of_defs); | |
553 } | |
554 return reg_move_replaces; | |
555 } | |
556 | |
557 /* Free memory allocated for the undo buffer. */ | |
558 static void | |
559 free_undo_replace_buff (struct undo_replace_buff_elem *reg_move_replaces) | |
560 { | |
561 | |
562 while (reg_move_replaces) | |
563 { | |
564 struct undo_replace_buff_elem *rep = reg_move_replaces; | |
565 | |
566 reg_move_replaces = reg_move_replaces->next; | |
567 free (rep); | |
568 } | |
569 } | |
570 | |
571 /* Bump the SCHED_TIMEs of all nodes to start from zero. Set the values | |
572 of SCHED_ROW and SCHED_STAGE. */ | |
573 static void | |
574 normalize_sched_times (partial_schedule_ptr ps) | |
575 { | |
576 int row; | |
577 int amount = PS_MIN_CYCLE (ps); | |
578 int ii = ps->ii; | |
579 ps_insn_ptr crr_insn; | |
580 | |
581 for (row = 0; row < ii; row++) | |
582 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row) | |
583 { | |
584 ddg_node_ptr u = crr_insn->node; | |
585 int normalized_time = SCHED_TIME (u) - amount; | |
586 | |
587 if (dump_file) | |
588 fprintf (dump_file, "crr_insn->node=%d, crr_insn->cycle=%d,\ | |
589 min_cycle=%d\n", crr_insn->node->cuid, SCHED_TIME | |
590 (u), ps->min_cycle); | |
591 gcc_assert (SCHED_TIME (u) >= ps->min_cycle); | |
592 gcc_assert (SCHED_TIME (u) <= ps->max_cycle); | |
593 SCHED_TIME (u) = normalized_time; | |
594 SCHED_ROW (u) = normalized_time % ii; | |
595 SCHED_STAGE (u) = normalized_time / ii; | |
596 } | |
597 } | |
598 | |
599 /* Set SCHED_COLUMN of each node according to its position in PS. */ | |
600 static void | |
601 set_columns_for_ps (partial_schedule_ptr ps) | |
602 { | |
603 int row; | |
604 | |
605 for (row = 0; row < ps->ii; row++) | |
606 { | |
607 ps_insn_ptr cur_insn = ps->rows[row]; | |
608 int column = 0; | |
609 | |
610 for (; cur_insn; cur_insn = cur_insn->next_in_row) | |
611 SCHED_COLUMN (cur_insn->node) = column++; | |
612 } | |
613 } | |
614 | |
615 /* Permute the insns according to their order in PS, from row 0 to | |
616 row ii-1, and position them right before LAST. This schedules | |
617 the insns of the loop kernel. */ | |
618 static void | |
619 permute_partial_schedule (partial_schedule_ptr ps, rtx last) | |
620 { | |
621 int ii = ps->ii; | |
622 int row; | |
623 ps_insn_ptr ps_ij; | |
624 | |
625 for (row = 0; row < ii ; row++) | |
626 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row) | |
627 if (PREV_INSN (last) != ps_ij->node->insn) | |
628 reorder_insns_nobb (ps_ij->node->first_note, ps_ij->node->insn, | |
629 PREV_INSN (last)); | |
630 } | |
631 | |
632 static void | |
633 duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage, | |
634 int to_stage, int for_prolog, rtx count_reg) | |
635 { | |
636 int row; | |
637 ps_insn_ptr ps_ij; | |
638 | |
639 for (row = 0; row < ps->ii; row++) | |
640 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row) | |
641 { | |
642 ddg_node_ptr u_node = ps_ij->node; | |
643 int j, i_reg_moves; | |
644 rtx reg_move = NULL_RTX; | |
645 | |
646 /* Do not duplicate any insn which refers to count_reg as it | |
647 belongs to the control part. | |
648 TODO: This should be done by analyzing the control part of | |
649 the loop. */ | |
650 if (reg_mentioned_p (count_reg, u_node->insn)) | |
651 continue; | |
652 | |
653 if (for_prolog) | |
654 { | |
655 /* SCHED_STAGE (u_node) >= from_stage == 0. Generate increasing | |
656 number of reg_moves starting with the second occurrence of | |
657 u_node, which is generated if its SCHED_STAGE <= to_stage. */ | |
658 i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1; | |
659 i_reg_moves = MAX (i_reg_moves, 0); | |
660 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node)); | |
661 | |
662 /* The reg_moves start from the *first* reg_move backwards. */ | |
663 if (i_reg_moves) | |
664 { | |
665 reg_move = SCHED_FIRST_REG_MOVE (u_node); | |
666 for (j = 1; j < i_reg_moves; j++) | |
667 reg_move = PREV_INSN (reg_move); | |
668 } | |
669 } | |
670 else /* It's for the epilog. */ | |
671 { | |
672 /* SCHED_STAGE (u_node) <= to_stage. Generate all reg_moves, | |
673 starting to decrease one stage after u_node no longer occurs; | |
674 that is, generate all reg_moves until | |
675 SCHED_STAGE (u_node) == from_stage - 1. */ | |
676 i_reg_moves = SCHED_NREG_MOVES (u_node) | |
677 - (from_stage - SCHED_STAGE (u_node) - 1); | |
678 i_reg_moves = MAX (i_reg_moves, 0); | |
679 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node)); | |
680 | |
681 /* The reg_moves start from the *last* reg_move forwards. */ | |
682 if (i_reg_moves) | |
683 { | |
684 reg_move = SCHED_FIRST_REG_MOVE (u_node); | |
685 for (j = 1; j < SCHED_NREG_MOVES (u_node); j++) | |
686 reg_move = PREV_INSN (reg_move); | |
687 } | |
688 } | |
689 | |
690 for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move)) | |
691 emit_insn (copy_rtx (PATTERN (reg_move))); | |
692 if (SCHED_STAGE (u_node) >= from_stage | |
693 && SCHED_STAGE (u_node) <= to_stage) | |
694 duplicate_insn_chain (u_node->first_note, u_node->insn); | |
695 } | |
696 } | |
697 | |
698 | |
699 /* Generate the instructions (including reg_moves) for prolog & epilog. */ | |
700 static void | |
701 generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop, | |
702 rtx count_reg, rtx count_init) | |
703 { | |
704 int i; | |
705 int last_stage = PS_STAGE_COUNT (ps) - 1; | |
706 edge e; | |
707 | |
708 /* Generate the prolog, inserting its insns on the loop-entry edge. */ | |
709 start_sequence (); | |
710 | |
711 if (!count_init) | |
712 { | |
713 /* Generate instructions at the beginning of the prolog to | |
714 adjust the loop count by STAGE_COUNT. If loop count is constant | |
715 (count_init), this constant is adjusted by STAGE_COUNT in | |
716 generate_prolog_epilog function. */ | |
717 rtx sub_reg = NULL_RTX; | |
718 | |
719 sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS, | |
720 count_reg, GEN_INT (last_stage), | |
721 count_reg, 1, OPTAB_DIRECT); | |
722 gcc_assert (REG_P (sub_reg)); | |
723 if (REGNO (sub_reg) != REGNO (count_reg)) | |
724 emit_move_insn (count_reg, sub_reg); | |
725 } | |
726 | |
727 for (i = 0; i < last_stage; i++) | |
728 duplicate_insns_of_cycles (ps, 0, i, 1, count_reg); | |
729 | |
730 /* Put the prolog on the entry edge. */ | |
731 e = loop_preheader_edge (loop); | |
732 split_edge_and_insert (e, get_insns ()); | |
733 | |
734 end_sequence (); | |
735 | |
736 /* Generate the epilog, inserting its insns on the loop-exit edge. */ | |
737 start_sequence (); | |
738 | |
739 for (i = 0; i < last_stage; i++) | |
740 duplicate_insns_of_cycles (ps, i + 1, last_stage, 0, count_reg); | |
741 | |
742 /* Put the epilogue on the exit edge. */ | |
743 gcc_assert (single_exit (loop)); | |
744 e = single_exit (loop); | |
745 split_edge_and_insert (e, get_insns ()); | |
746 end_sequence (); | |
747 } | |
748 | |
749 /* Return true if all the BBs of the loop are empty except the | |
750 loop header. */ | |
751 static bool | |
752 loop_single_full_bb_p (struct loop *loop) | |
753 { | |
754 unsigned i; | |
755 basic_block *bbs = get_loop_body (loop); | |
756 | |
757 for (i = 0; i < loop->num_nodes ; i++) | |
758 { | |
759 rtx head, tail; | |
760 bool empty_bb = true; | |
761 | |
762 if (bbs[i] == loop->header) | |
763 continue; | |
764 | |
765 /* Make sure that basic blocks other than the header | |
766 have only notes labels or jumps. */ | |
767 get_ebb_head_tail (bbs[i], bbs[i], &head, &tail); | |
768 for (; head != NEXT_INSN (tail); head = NEXT_INSN (head)) | |
769 { | |
770 if (NOTE_P (head) || LABEL_P (head) | |
771 || (INSN_P (head) && JUMP_P (head))) | |
772 continue; | |
773 empty_bb = false; | |
774 break; | |
775 } | |
776 | |
777 if (! empty_bb) | |
778 { | |
779 free (bbs); | |
780 return false; | |
781 } | |
782 } | |
783 free (bbs); | |
784 return true; | |
785 } | |
786 | |
787 /* A simple loop from SMS point of view; it is a loop that is composed of | |
788 either a single basic block or two BBs - a header and a latch. */ | |
789 #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) \ | |
790 && (EDGE_COUNT (loop->latch->preds) == 1) \ | |
791 && (EDGE_COUNT (loop->latch->succs) == 1)) | |
792 | |
793 /* Return true if the loop is in its canonical form and false if not. | |
794 i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit. */ | |
795 static bool | |
796 loop_canon_p (struct loop *loop) | |
797 { | |
798 | |
799 if (loop->inner || !loop_outer (loop)) | |
800 { | |
801 if (dump_file) | |
802 fprintf (dump_file, "SMS loop inner or !loop_outer\n"); | |
803 return false; | |
804 } | |
805 | |
806 if (!single_exit (loop)) | |
807 { | |
808 if (dump_file) | |
809 { | |
810 rtx insn = BB_END (loop->header); | |
811 | |
812 fprintf (dump_file, "SMS loop many exits "); | |
813 fprintf (dump_file, " %s %d (file, line)\n", | |
814 insn_file (insn), insn_line (insn)); | |
815 } | |
816 return false; | |
817 } | |
818 | |
819 if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop)) | |
820 { | |
821 if (dump_file) | |
822 { | |
823 rtx insn = BB_END (loop->header); | |
824 | |
825 fprintf (dump_file, "SMS loop many BBs. "); | |
826 fprintf (dump_file, " %s %d (file, line)\n", | |
827 insn_file (insn), insn_line (insn)); | |
828 } | |
829 return false; | |
830 } | |
831 | |
832 return true; | |
833 } | |
834 | |
835 /* If there are more than one entry for the loop, | |
836 make it one by splitting the first entry edge and | |
837 redirecting the others to the new BB. */ | |
838 static void | |
839 canon_loop (struct loop *loop) | |
840 { | |
841 edge e; | |
842 edge_iterator i; | |
843 | |
844 /* Avoid annoying special cases of edges going to exit | |
845 block. */ | |
846 FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR->preds) | |
847 if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1)) | |
848 split_edge (e); | |
849 | |
850 if (loop->latch == loop->header | |
851 || EDGE_COUNT (loop->latch->succs) > 1) | |
852 { | |
853 FOR_EACH_EDGE (e, i, loop->header->preds) | |
854 if (e->src == loop->latch) | |
855 break; | |
856 split_edge (e); | |
857 } | |
858 } | |
859 | |
860 /* Setup infos. */ | |
861 static void | |
862 setup_sched_infos (void) | |
863 { | |
864 memcpy (&sms_common_sched_info, &haifa_common_sched_info, | |
865 sizeof (sms_common_sched_info)); | |
866 sms_common_sched_info.sched_pass_id = SCHED_SMS_PASS; | |
867 common_sched_info = &sms_common_sched_info; | |
868 | |
869 sched_deps_info = &sms_sched_deps_info; | |
870 current_sched_info = &sms_sched_info; | |
871 } | |
872 | |
873 /* Probability in % that the sms-ed loop rolls enough so that optimized | |
874 version may be entered. Just a guess. */ | |
875 #define PROB_SMS_ENOUGH_ITERATIONS 80 | |
876 | |
877 /* Used to calculate the upper bound of ii. */ | |
878 #define MAXII_FACTOR 2 | |
879 | |
880 /* Main entry point, perform SMS scheduling on the loops of the function | |
881 that consist of single basic blocks. */ | |
882 static void | |
883 sms_schedule (void) | |
884 { | |
885 rtx insn; | |
886 ddg_ptr *g_arr, g; | |
887 int * node_order; | |
888 int maxii, max_asap; | |
889 loop_iterator li; | |
890 partial_schedule_ptr ps; | |
891 basic_block bb = NULL; | |
892 struct loop *loop; | |
893 basic_block condition_bb = NULL; | |
894 edge latch_edge; | |
895 gcov_type trip_count = 0; | |
896 | |
897 loop_optimizer_init (LOOPS_HAVE_PREHEADERS | |
898 | LOOPS_HAVE_RECORDED_EXITS); | |
899 if (number_of_loops () <= 1) | |
900 { | |
901 loop_optimizer_finalize (); | |
902 return; /* There are no loops to schedule. */ | |
903 } | |
904 | |
905 /* Initialize issue_rate. */ | |
906 if (targetm.sched.issue_rate) | |
907 { | |
908 int temp = reload_completed; | |
909 | |
910 reload_completed = 1; | |
911 issue_rate = targetm.sched.issue_rate (); | |
912 reload_completed = temp; | |
913 } | |
914 else | |
915 issue_rate = 1; | |
916 | |
917 /* Initialize the scheduler. */ | |
918 setup_sched_infos (); | |
919 haifa_sched_init (); | |
920 | |
921 /* Allocate memory to hold the DDG array one entry for each loop. | |
922 We use loop->num as index into this array. */ | |
923 g_arr = XCNEWVEC (ddg_ptr, number_of_loops ()); | |
924 | |
925 if (dump_file) | |
926 { | |
927 fprintf (dump_file, "\n\nSMS analysis phase\n"); | |
928 fprintf (dump_file, "===================\n\n"); | |
929 } | |
930 | |
931 /* Build DDGs for all the relevant loops and hold them in G_ARR | |
932 indexed by the loop index. */ | |
933 FOR_EACH_LOOP (li, loop, 0) | |
934 { | |
935 rtx head, tail; | |
936 rtx count_reg; | |
937 | |
938 /* For debugging. */ | |
939 if (dbg_cnt (sms_sched_loop) == false) | |
940 { | |
941 if (dump_file) | |
942 fprintf (dump_file, "SMS reached max limit... \n"); | |
943 | |
944 break; | |
945 } | |
946 | |
947 if (dump_file) | |
948 { | |
949 rtx insn = BB_END (loop->header); | |
950 | |
951 fprintf (dump_file, "SMS loop num: %d, file: %s, line: %d\n", | |
952 loop->num, insn_file (insn), insn_line (insn)); | |
953 | |
954 } | |
955 | |
956 if (! loop_canon_p (loop)) | |
957 continue; | |
958 | |
959 if (! loop_single_full_bb_p (loop)) | |
960 { | |
961 if (dump_file) | |
962 fprintf (dump_file, "SMS not loop_single_full_bb_p\n"); | |
963 continue; | |
964 } | |
965 | |
966 bb = loop->header; | |
967 | |
968 get_ebb_head_tail (bb, bb, &head, &tail); | |
969 latch_edge = loop_latch_edge (loop); | |
970 gcc_assert (single_exit (loop)); | |
971 if (single_exit (loop)->count) | |
972 trip_count = latch_edge->count / single_exit (loop)->count; | |
973 | |
974 /* Perform SMS only on loops that their average count is above threshold. */ | |
975 | |
976 if ( latch_edge->count | |
977 && (latch_edge->count < single_exit (loop)->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD)) | |
978 { | |
979 if (dump_file) | |
980 { | |
981 fprintf (dump_file, " %s %d (file, line)\n", | |
982 insn_file (tail), insn_line (tail)); | |
983 fprintf (dump_file, "SMS single-bb-loop\n"); | |
984 if (profile_info && flag_branch_probabilities) | |
985 { | |
986 fprintf (dump_file, "SMS loop-count "); | |
987 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, | |
988 (HOST_WIDEST_INT) bb->count); | |
989 fprintf (dump_file, "\n"); | |
990 fprintf (dump_file, "SMS trip-count "); | |
991 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, | |
992 (HOST_WIDEST_INT) trip_count); | |
993 fprintf (dump_file, "\n"); | |
994 fprintf (dump_file, "SMS profile-sum-max "); | |
995 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, | |
996 (HOST_WIDEST_INT) profile_info->sum_max); | |
997 fprintf (dump_file, "\n"); | |
998 } | |
999 } | |
1000 continue; | |
1001 } | |
1002 | |
1003 /* Make sure this is a doloop. */ | |
1004 if ( !(count_reg = doloop_register_get (head, tail))) | |
1005 { | |
1006 if (dump_file) | |
1007 fprintf (dump_file, "SMS doloop_register_get failed\n"); | |
1008 continue; | |
1009 } | |
1010 | |
1011 /* Don't handle BBs with calls or barriers, or !single_set insns, | |
1012 or auto-increment insns (to avoid creating invalid reg-moves | |
1013 for the auto-increment insns). | |
1014 ??? Should handle auto-increment insns. | |
1015 ??? Should handle insns defining subregs. */ | |
1016 for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn)) | |
1017 { | |
1018 rtx set; | |
1019 | |
1020 if (CALL_P (insn) | |
1021 || BARRIER_P (insn) | |
1022 || (INSN_P (insn) && !JUMP_P (insn) | |
1023 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE) | |
1024 || (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0) | |
1025 || (INSN_P (insn) && (set = single_set (insn)) | |
1026 && GET_CODE (SET_DEST (set)) == SUBREG)) | |
1027 break; | |
1028 } | |
1029 | |
1030 if (insn != NEXT_INSN (tail)) | |
1031 { | |
1032 if (dump_file) | |
1033 { | |
1034 if (CALL_P (insn)) | |
1035 fprintf (dump_file, "SMS loop-with-call\n"); | |
1036 else if (BARRIER_P (insn)) | |
1037 fprintf (dump_file, "SMS loop-with-barrier\n"); | |
1038 else if (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0) | |
1039 fprintf (dump_file, "SMS reg inc\n"); | |
1040 else if ((INSN_P (insn) && !JUMP_P (insn) | |
1041 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE)) | |
1042 fprintf (dump_file, "SMS loop-with-not-single-set\n"); | |
1043 else | |
1044 fprintf (dump_file, "SMS loop with subreg in lhs\n"); | |
1045 print_rtl_single (dump_file, insn); | |
1046 } | |
1047 | |
1048 continue; | |
1049 } | |
1050 | |
1051 if (! (g = create_ddg (bb, 0))) | |
1052 { | |
1053 if (dump_file) | |
1054 fprintf (dump_file, "SMS create_ddg failed\n"); | |
1055 continue; | |
1056 } | |
1057 | |
1058 g_arr[loop->num] = g; | |
1059 if (dump_file) | |
1060 fprintf (dump_file, "...OK\n"); | |
1061 | |
1062 } | |
1063 if (dump_file) | |
1064 { | |
1065 fprintf (dump_file, "\nSMS transformation phase\n"); | |
1066 fprintf (dump_file, "=========================\n\n"); | |
1067 } | |
1068 | |
1069 /* We don't want to perform SMS on new loops - created by versioning. */ | |
1070 FOR_EACH_LOOP (li, loop, 0) | |
1071 { | |
1072 rtx head, tail; | |
1073 rtx count_reg, count_init; | |
1074 int mii, rec_mii; | |
1075 unsigned stage_count = 0; | |
1076 HOST_WIDEST_INT loop_count = 0; | |
1077 | |
1078 if (! (g = g_arr[loop->num])) | |
1079 continue; | |
1080 | |
1081 if (dump_file) | |
1082 { | |
1083 rtx insn = BB_END (loop->header); | |
1084 | |
1085 fprintf (dump_file, "SMS loop num: %d, file: %s, line: %d\n", | |
1086 loop->num, insn_file (insn), insn_line (insn)); | |
1087 | |
1088 print_ddg (dump_file, g); | |
1089 } | |
1090 | |
1091 get_ebb_head_tail (loop->header, loop->header, &head, &tail); | |
1092 | |
1093 latch_edge = loop_latch_edge (loop); | |
1094 gcc_assert (single_exit (loop)); | |
1095 if (single_exit (loop)->count) | |
1096 trip_count = latch_edge->count / single_exit (loop)->count; | |
1097 | |
1098 if (dump_file) | |
1099 { | |
1100 fprintf (dump_file, " %s %d (file, line)\n", | |
1101 insn_file (tail), insn_line (tail)); | |
1102 fprintf (dump_file, "SMS single-bb-loop\n"); | |
1103 if (profile_info && flag_branch_probabilities) | |
1104 { | |
1105 fprintf (dump_file, "SMS loop-count "); | |
1106 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, | |
1107 (HOST_WIDEST_INT) bb->count); | |
1108 fprintf (dump_file, "\n"); | |
1109 fprintf (dump_file, "SMS profile-sum-max "); | |
1110 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, | |
1111 (HOST_WIDEST_INT) profile_info->sum_max); | |
1112 fprintf (dump_file, "\n"); | |
1113 } | |
1114 fprintf (dump_file, "SMS doloop\n"); | |
1115 fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes); | |
1116 fprintf (dump_file, "SMS num-loads %d\n", g->num_loads); | |
1117 fprintf (dump_file, "SMS num-stores %d\n", g->num_stores); | |
1118 } | |
1119 | |
1120 | |
1121 /* In case of th loop have doloop register it gets special | |
1122 handling. */ | |
1123 count_init = NULL_RTX; | |
1124 if ((count_reg = doloop_register_get (head, tail))) | |
1125 { | |
1126 basic_block pre_header; | |
1127 | |
1128 pre_header = loop_preheader_edge (loop)->src; | |
1129 count_init = const_iteration_count (count_reg, pre_header, | |
1130 &loop_count); | |
1131 } | |
1132 gcc_assert (count_reg); | |
1133 | |
1134 if (dump_file && count_init) | |
1135 { | |
1136 fprintf (dump_file, "SMS const-doloop "); | |
1137 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, | |
1138 loop_count); | |
1139 fprintf (dump_file, "\n"); | |
1140 } | |
1141 | |
1142 node_order = XNEWVEC (int, g->num_nodes); | |
1143 | |
1144 mii = 1; /* Need to pass some estimate of mii. */ | |
1145 rec_mii = sms_order_nodes (g, mii, node_order, &max_asap); | |
1146 mii = MAX (res_MII (g), rec_mii); | |
1147 maxii = MAX (max_asap, MAXII_FACTOR * mii); | |
1148 | |
1149 if (dump_file) | |
1150 fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n", | |
1151 rec_mii, mii, maxii); | |
1152 | |
1153 /* After sms_order_nodes and before sms_schedule_by_order, to copy over | |
1154 ASAP. */ | |
1155 set_node_sched_params (g); | |
1156 | |
1157 ps = sms_schedule_by_order (g, mii, maxii, node_order); | |
1158 | |
1159 if (ps) | |
1160 stage_count = PS_STAGE_COUNT (ps); | |
1161 | |
1162 /* Stage count of 1 means that there is no interleaving between | |
1163 iterations, let the scheduling passes do the job. */ | |
1164 if (stage_count < 1 | |
1165 || (count_init && (loop_count <= stage_count)) | |
1166 || (flag_branch_probabilities && (trip_count <= stage_count))) | |
1167 { | |
1168 if (dump_file) | |
1169 { | |
1170 fprintf (dump_file, "SMS failed... \n"); | |
1171 fprintf (dump_file, "SMS sched-failed (stage-count=%d, loop-count=", stage_count); | |
1172 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count); | |
1173 fprintf (dump_file, ", trip-count="); | |
1174 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count); | |
1175 fprintf (dump_file, ")\n"); | |
1176 } | |
1177 continue; | |
1178 } | |
1179 else | |
1180 { | |
1181 struct undo_replace_buff_elem *reg_move_replaces; | |
1182 | |
1183 if (dump_file) | |
1184 { | |
1185 fprintf (dump_file, | |
1186 "SMS succeeded %d %d (with ii, sc)\n", ps->ii, | |
1187 stage_count); | |
1188 print_partial_schedule (ps, dump_file); | |
1189 fprintf (dump_file, | |
1190 "SMS Branch (%d) will later be scheduled at cycle %d.\n", | |
1191 g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1); | |
1192 } | |
1193 | |
1194 /* Set the stage boundaries. If the DDG is built with closing_branch_deps, | |
1195 the closing_branch was scheduled and should appear in the last (ii-1) | |
1196 row. Otherwise, we are free to schedule the branch, and we let nodes | |
1197 that were scheduled at the first PS_MIN_CYCLE cycle appear in the first | |
1198 row; this should reduce stage_count to minimum. | |
1199 TODO: Revisit the issue of scheduling the insns of the | |
1200 control part relative to the branch when the control part | |
1201 has more than one insn. */ | |
1202 normalize_sched_times (ps); | |
1203 rotate_partial_schedule (ps, PS_MIN_CYCLE (ps)); | |
1204 set_columns_for_ps (ps); | |
1205 | |
1206 canon_loop (loop); | |
1207 | |
1208 /* case the BCT count is not known , Do loop-versioning */ | |
1209 if (count_reg && ! count_init) | |
1210 { | |
1211 rtx comp_rtx = gen_rtx_fmt_ee (GT, VOIDmode, count_reg, | |
1212 GEN_INT(stage_count)); | |
1213 unsigned prob = (PROB_SMS_ENOUGH_ITERATIONS | |
1214 * REG_BR_PROB_BASE) / 100; | |
1215 | |
1216 loop_version (loop, comp_rtx, &condition_bb, | |
1217 prob, prob, REG_BR_PROB_BASE - prob, | |
1218 true); | |
1219 } | |
1220 | |
1221 /* Set new iteration count of loop kernel. */ | |
1222 if (count_reg && count_init) | |
1223 SET_SRC (single_set (count_init)) = GEN_INT (loop_count | |
1224 - stage_count + 1); | |
1225 | |
1226 /* Now apply the scheduled kernel to the RTL of the loop. */ | |
1227 permute_partial_schedule (ps, g->closing_branch->first_note); | |
1228 | |
1229 /* Mark this loop as software pipelined so the later | |
1230 scheduling passes doesn't touch it. */ | |
1231 if (! flag_resched_modulo_sched) | |
1232 g->bb->flags |= BB_DISABLE_SCHEDULE; | |
1233 /* The life-info is not valid any more. */ | |
1234 df_set_bb_dirty (g->bb); | |
1235 | |
1236 reg_move_replaces = generate_reg_moves (ps, true); | |
1237 if (dump_file) | |
1238 print_node_sched_params (dump_file, g->num_nodes, g); | |
1239 /* Generate prolog and epilog. */ | |
1240 generate_prolog_epilog (ps, loop, count_reg, count_init); | |
1241 | |
1242 free_undo_replace_buff (reg_move_replaces); | |
1243 } | |
1244 | |
1245 free_partial_schedule (ps); | |
1246 free (node_sched_params); | |
1247 free (node_order); | |
1248 free_ddg (g); | |
1249 } | |
1250 | |
1251 free (g_arr); | |
1252 | |
1253 /* Release scheduler data, needed until now because of DFA. */ | |
1254 haifa_sched_finish (); | |
1255 loop_optimizer_finalize (); | |
1256 } | |
1257 | |
1258 /* The SMS scheduling algorithm itself | |
1259 ----------------------------------- | |
1260 Input: 'O' an ordered list of insns of a loop. | |
1261 Output: A scheduling of the loop - kernel, prolog, and epilogue. | |
1262 | |
1263 'Q' is the empty Set | |
1264 'PS' is the partial schedule; it holds the currently scheduled nodes with | |
1265 their cycle/slot. | |
1266 'PSP' previously scheduled predecessors. | |
1267 'PSS' previously scheduled successors. | |
1268 't(u)' the cycle where u is scheduled. | |
1269 'l(u)' is the latency of u. | |
1270 'd(v,u)' is the dependence distance from v to u. | |
1271 'ASAP(u)' the earliest time at which u could be scheduled as computed in | |
1272 the node ordering phase. | |
1273 'check_hardware_resources_conflicts(u, PS, c)' | |
1274 run a trace around cycle/slot through DFA model | |
1275 to check resource conflicts involving instruction u | |
1276 at cycle c given the partial schedule PS. | |
1277 'add_to_partial_schedule_at_time(u, PS, c)' | |
1278 Add the node/instruction u to the partial schedule | |
1279 PS at time c. | |
1280 'calculate_register_pressure(PS)' | |
1281 Given a schedule of instructions, calculate the register | |
1282 pressure it implies. One implementation could be the | |
1283 maximum number of overlapping live ranges. | |
1284 'maxRP' The maximum allowed register pressure, it is usually derived from the number | |
1285 registers available in the hardware. | |
1286 | |
1287 1. II = MII. | |
1288 2. PS = empty list | |
1289 3. for each node u in O in pre-computed order | |
1290 4. if (PSP(u) != Q && PSS(u) == Q) then | |
1291 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u). | |
1292 6. start = Early_start; end = Early_start + II - 1; step = 1 | |
1293 11. else if (PSP(u) == Q && PSS(u) != Q) then | |
1294 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u). | |
1295 13. start = Late_start; end = Late_start - II + 1; step = -1 | |
1296 14. else if (PSP(u) != Q && PSS(u) != Q) then | |
1297 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u). | |
1298 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u). | |
1299 17. start = Early_start; | |
1300 18. end = min(Early_start + II - 1 , Late_start); | |
1301 19. step = 1 | |
1302 20. else "if (PSP(u) == Q && PSS(u) == Q)" | |
1303 21. start = ASAP(u); end = start + II - 1; step = 1 | |
1304 22. endif | |
1305 | |
1306 23. success = false | |
1307 24. for (c = start ; c != end ; c += step) | |
1308 25. if check_hardware_resources_conflicts(u, PS, c) then | |
1309 26. add_to_partial_schedule_at_time(u, PS, c) | |
1310 27. success = true | |
1311 28. break | |
1312 29. endif | |
1313 30. endfor | |
1314 31. if (success == false) then | |
1315 32. II = II + 1 | |
1316 33. if (II > maxII) then | |
1317 34. finish - failed to schedule | |
1318 35. endif | |
1319 36. goto 2. | |
1320 37. endif | |
1321 38. endfor | |
1322 39. if (calculate_register_pressure(PS) > maxRP) then | |
1323 40. goto 32. | |
1324 41. endif | |
1325 42. compute epilogue & prologue | |
1326 43. finish - succeeded to schedule | |
1327 */ | |
1328 | |
1329 /* A limit on the number of cycles that resource conflicts can span. ??? Should | |
1330 be provided by DFA, and be dependent on the type of insn scheduled. Currently | |
1331 set to 0 to save compile time. */ | |
1332 #define DFA_HISTORY SMS_DFA_HISTORY | |
1333 | |
1334 /* A threshold for the number of repeated unsuccessful attempts to insert | |
1335 an empty row, before we flush the partial schedule and start over. */ | |
1336 #define MAX_SPLIT_NUM 10 | |
1337 /* Given the partial schedule PS, this function calculates and returns the | |
1338 cycles in which we can schedule the node with the given index I. | |
1339 NOTE: Here we do the backtracking in SMS, in some special cases. We have | |
1340 noticed that there are several cases in which we fail to SMS the loop | |
1341 because the sched window of a node is empty due to tight data-deps. In | |
1342 such cases we want to unschedule some of the predecessors/successors | |
1343 until we get non-empty scheduling window. It returns -1 if the | |
1344 scheduling window is empty and zero otherwise. */ | |
1345 | |
1346 static int | |
1347 get_sched_window (partial_schedule_ptr ps, int *nodes_order, int i, | |
1348 sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p) | |
1349 { | |
1350 int start, step, end; | |
1351 ddg_edge_ptr e; | |
1352 int u = nodes_order [i]; | |
1353 ddg_node_ptr u_node = &ps->g->nodes[u]; | |
1354 sbitmap psp = sbitmap_alloc (ps->g->num_nodes); | |
1355 sbitmap pss = sbitmap_alloc (ps->g->num_nodes); | |
1356 sbitmap u_node_preds = NODE_PREDECESSORS (u_node); | |
1357 sbitmap u_node_succs = NODE_SUCCESSORS (u_node); | |
1358 int psp_not_empty; | |
1359 int pss_not_empty; | |
1360 | |
1361 /* 1. compute sched window for u (start, end, step). */ | |
1362 sbitmap_zero (psp); | |
1363 sbitmap_zero (pss); | |
1364 psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes); | |
1365 pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes); | |
1366 | |
1367 if (psp_not_empty && !pss_not_empty) | |
1368 { | |
1369 int early_start = INT_MIN; | |
1370 | |
1371 end = INT_MAX; | |
1372 for (e = u_node->in; e != 0; e = e->next_in) | |
1373 { | |
1374 ddg_node_ptr v_node = e->src; | |
1375 | |
1376 if (dump_file) | |
1377 { | |
1378 fprintf (dump_file, "\nProcessing edge: "); | |
1379 print_ddg_edge (dump_file, e); | |
1380 fprintf (dump_file, | |
1381 "\nScheduling %d (%d) in psp_not_empty," | |
1382 " checking p %d (%d): ", u_node->cuid, | |
1383 INSN_UID (u_node->insn), v_node->cuid, INSN_UID | |
1384 (v_node->insn)); | |
1385 } | |
1386 | |
1387 if (TEST_BIT (sched_nodes, v_node->cuid)) | |
1388 { | |
1389 int p_st = SCHED_TIME (v_node); | |
1390 | |
1391 early_start = | |
1392 MAX (early_start, p_st + e->latency - (e->distance * ii)); | |
1393 | |
1394 if (dump_file) | |
1395 fprintf (dump_file, | |
1396 "pred st = %d; early_start = %d; latency: %d", | |
1397 p_st, early_start, e->latency); | |
1398 | |
1399 if (e->data_type == MEM_DEP) | |
1400 end = MIN (end, SCHED_TIME (v_node) + ii - 1); | |
1401 } | |
1402 else if (dump_file) | |
1403 fprintf (dump_file, "the node is not scheduled\n"); | |
1404 } | |
1405 start = early_start; | |
1406 end = MIN (end, early_start + ii); | |
1407 /* Schedule the node close to it's predecessors. */ | |
1408 step = 1; | |
1409 | |
1410 if (dump_file) | |
1411 fprintf (dump_file, | |
1412 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n", | |
1413 u_node->cuid, INSN_UID (u_node->insn), start, end, step); | |
1414 } | |
1415 | |
1416 else if (!psp_not_empty && pss_not_empty) | |
1417 { | |
1418 int late_start = INT_MAX; | |
1419 | |
1420 end = INT_MIN; | |
1421 for (e = u_node->out; e != 0; e = e->next_out) | |
1422 { | |
1423 ddg_node_ptr v_node = e->dest; | |
1424 | |
1425 if (dump_file) | |
1426 { | |
1427 fprintf (dump_file, "\nProcessing edge:"); | |
1428 print_ddg_edge (dump_file, e); | |
1429 fprintf (dump_file, | |
1430 "\nScheduling %d (%d) in pss_not_empty," | |
1431 " checking s %d (%d): ", u_node->cuid, | |
1432 INSN_UID (u_node->insn), v_node->cuid, INSN_UID | |
1433 (v_node->insn)); | |
1434 } | |
1435 | |
1436 if (TEST_BIT (sched_nodes, v_node->cuid)) | |
1437 { | |
1438 int s_st = SCHED_TIME (v_node); | |
1439 | |
1440 late_start = MIN (late_start, | |
1441 s_st - e->latency + (e->distance * ii)); | |
1442 | |
1443 if (dump_file) | |
1444 fprintf (dump_file, | |
1445 "succ st = %d; late_start = %d; latency = %d", | |
1446 s_st, late_start, e->latency); | |
1447 | |
1448 if (e->data_type == MEM_DEP) | |
1449 end = MAX (end, SCHED_TIME (v_node) - ii + 1); | |
1450 if (dump_file) | |
1451 fprintf (dump_file, "end = %d\n", end); | |
1452 | |
1453 } | |
1454 else if (dump_file) | |
1455 fprintf (dump_file, "the node is not scheduled\n"); | |
1456 | |
1457 } | |
1458 start = late_start; | |
1459 end = MAX (end, late_start - ii); | |
1460 /* Schedule the node close to it's successors. */ | |
1461 step = -1; | |
1462 | |
1463 if (dump_file) | |
1464 fprintf (dump_file, | |
1465 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n", | |
1466 u_node->cuid, INSN_UID (u_node->insn), start, end, step); | |
1467 | |
1468 } | |
1469 | |
1470 else if (psp_not_empty && pss_not_empty) | |
1471 { | |
1472 int early_start = INT_MIN; | |
1473 int late_start = INT_MAX; | |
1474 int count_preds = 0; | |
1475 int count_succs = 0; | |
1476 | |
1477 start = INT_MIN; | |
1478 end = INT_MAX; | |
1479 for (e = u_node->in; e != 0; e = e->next_in) | |
1480 { | |
1481 ddg_node_ptr v_node = e->src; | |
1482 | |
1483 if (dump_file) | |
1484 { | |
1485 fprintf (dump_file, "\nProcessing edge:"); | |
1486 print_ddg_edge (dump_file, e); | |
1487 fprintf (dump_file, | |
1488 "\nScheduling %d (%d) in psp_pss_not_empty," | |
1489 " checking p %d (%d): ", u_node->cuid, INSN_UID | |
1490 (u_node->insn), v_node->cuid, INSN_UID | |
1491 (v_node->insn)); | |
1492 } | |
1493 | |
1494 if (TEST_BIT (sched_nodes, v_node->cuid)) | |
1495 { | |
1496 int p_st = SCHED_TIME (v_node); | |
1497 | |
1498 early_start = MAX (early_start, | |
1499 p_st + e->latency | |
1500 - (e->distance * ii)); | |
1501 | |
1502 if (dump_file) | |
1503 fprintf (dump_file, | |
1504 "pred st = %d; early_start = %d; latency = %d", | |
1505 p_st, early_start, e->latency); | |
1506 | |
1507 if (e->type == TRUE_DEP && e->data_type == REG_DEP) | |
1508 count_preds++; | |
1509 | |
1510 if (e->data_type == MEM_DEP) | |
1511 end = MIN (end, SCHED_TIME (v_node) + ii - 1); | |
1512 } | |
1513 else if (dump_file) | |
1514 fprintf (dump_file, "the node is not scheduled\n"); | |
1515 | |
1516 } | |
1517 for (e = u_node->out; e != 0; e = e->next_out) | |
1518 { | |
1519 ddg_node_ptr v_node = e->dest; | |
1520 | |
1521 if (dump_file) | |
1522 { | |
1523 fprintf (dump_file, "\nProcessing edge:"); | |
1524 print_ddg_edge (dump_file, e); | |
1525 fprintf (dump_file, | |
1526 "\nScheduling %d (%d) in psp_pss_not_empty," | |
1527 " checking s %d (%d): ", u_node->cuid, INSN_UID | |
1528 (u_node->insn), v_node->cuid, INSN_UID | |
1529 (v_node->insn)); | |
1530 } | |
1531 | |
1532 if (TEST_BIT (sched_nodes, v_node->cuid)) | |
1533 { | |
1534 int s_st = SCHED_TIME (v_node); | |
1535 | |
1536 late_start = MIN (late_start, | |
1537 s_st - e->latency | |
1538 + (e->distance * ii)); | |
1539 | |
1540 if (dump_file) | |
1541 fprintf (dump_file, | |
1542 "succ st = %d; late_start = %d; latency = %d", | |
1543 s_st, late_start, e->latency); | |
1544 | |
1545 if (e->type == TRUE_DEP && e->data_type == REG_DEP) | |
1546 count_succs++; | |
1547 | |
1548 if (e->data_type == MEM_DEP) | |
1549 start = MAX (start, SCHED_TIME (v_node) - ii + 1); | |
1550 } | |
1551 else if (dump_file) | |
1552 fprintf (dump_file, "the node is not scheduled\n"); | |
1553 | |
1554 } | |
1555 start = MAX (start, early_start); | |
1556 end = MIN (end, MIN (early_start + ii, late_start + 1)); | |
1557 step = 1; | |
1558 /* If there are more successors than predecessors schedule the | |
1559 node close to it's successors. */ | |
1560 if (count_succs >= count_preds) | |
1561 { | |
1562 int old_start = start; | |
1563 | |
1564 start = end - 1; | |
1565 end = old_start - 1; | |
1566 step = -1; | |
1567 } | |
1568 } | |
1569 else /* psp is empty && pss is empty. */ | |
1570 { | |
1571 start = SCHED_ASAP (u_node); | |
1572 end = start + ii; | |
1573 step = 1; | |
1574 } | |
1575 | |
1576 *start_p = start; | |
1577 *step_p = step; | |
1578 *end_p = end; | |
1579 sbitmap_free (psp); | |
1580 sbitmap_free (pss); | |
1581 | |
1582 if ((start >= end && step == 1) || (start <= end && step == -1)) | |
1583 { | |
1584 if (dump_file) | |
1585 fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n", | |
1586 start, end, step); | |
1587 return -1; | |
1588 } | |
1589 | |
1590 return 0; | |
1591 } | |
1592 | |
1593 /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the | |
1594 node currently been scheduled. At the end of the calculation | |
1595 MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of | |
1596 U_NODE which are (1) already scheduled in the first/last row of | |
1597 U_NODE's scheduling window, (2) whose dependence inequality with U | |
1598 becomes an equality when U is scheduled in this same row, and (3) | |
1599 whose dependence latency is zero. | |
1600 | |
1601 The first and last rows are calculated using the following parameters: | |
1602 START/END rows - The cycles that begins/ends the traversal on the window; | |
1603 searching for an empty cycle to schedule U_NODE. | |
1604 STEP - The direction in which we traverse the window. | |
1605 II - The initiation interval. */ | |
1606 | |
1607 static void | |
1608 calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end, | |
1609 int step, int ii, sbitmap sched_nodes, | |
1610 sbitmap must_precede, sbitmap must_follow) | |
1611 { | |
1612 ddg_edge_ptr e; | |
1613 int first_cycle_in_window, last_cycle_in_window; | |
1614 | |
1615 gcc_assert (must_precede && must_follow); | |
1616 | |
1617 /* Consider the following scheduling window: | |
1618 {first_cycle_in_window, first_cycle_in_window+1, ..., | |
1619 last_cycle_in_window}. If step is 1 then the following will be | |
1620 the order we traverse the window: {start=first_cycle_in_window, | |
1621 first_cycle_in_window+1, ..., end=last_cycle_in_window+1}, | |
1622 or {start=last_cycle_in_window, last_cycle_in_window-1, ..., | |
1623 end=first_cycle_in_window-1} if step is -1. */ | |
1624 first_cycle_in_window = (step == 1) ? start : end - step; | |
1625 last_cycle_in_window = (step == 1) ? end - step : start; | |
1626 | |
1627 sbitmap_zero (must_precede); | |
1628 sbitmap_zero (must_follow); | |
1629 | |
1630 if (dump_file) | |
1631 fprintf (dump_file, "\nmust_precede: "); | |
1632 | |
1633 /* Instead of checking if: | |
1634 (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window) | |
1635 && ((SCHED_TIME (e->src) + e->latency - (e->distance * ii)) == | |
1636 first_cycle_in_window) | |
1637 && e->latency == 0 | |
1638 we use the fact that latency is non-negative: | |
1639 SCHED_TIME (e->src) - (e->distance * ii) <= | |
1640 SCHED_TIME (e->src) + e->latency - (e->distance * ii)) <= | |
1641 first_cycle_in_window | |
1642 and check only if | |
1643 SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window */ | |
1644 for (e = u_node->in; e != 0; e = e->next_in) | |
1645 if (TEST_BIT (sched_nodes, e->src->cuid) | |
1646 && ((SCHED_TIME (e->src) - (e->distance * ii)) == | |
1647 first_cycle_in_window)) | |
1648 { | |
1649 if (dump_file) | |
1650 fprintf (dump_file, "%d ", e->src->cuid); | |
1651 | |
1652 SET_BIT (must_precede, e->src->cuid); | |
1653 } | |
1654 | |
1655 if (dump_file) | |
1656 fprintf (dump_file, "\nmust_follow: "); | |
1657 | |
1658 /* Instead of checking if: | |
1659 (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window) | |
1660 && ((SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) == | |
1661 last_cycle_in_window) | |
1662 && e->latency == 0 | |
1663 we use the fact that latency is non-negative: | |
1664 SCHED_TIME (e->dest) + (e->distance * ii) >= | |
1665 SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >= | |
1666 last_cycle_in_window | |
1667 and check only if | |
1668 SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window */ | |
1669 for (e = u_node->out; e != 0; e = e->next_out) | |
1670 if (TEST_BIT (sched_nodes, e->dest->cuid) | |
1671 && ((SCHED_TIME (e->dest) + (e->distance * ii)) == | |
1672 last_cycle_in_window)) | |
1673 { | |
1674 if (dump_file) | |
1675 fprintf (dump_file, "%d ", e->dest->cuid); | |
1676 | |
1677 SET_BIT (must_follow, e->dest->cuid); | |
1678 } | |
1679 | |
1680 if (dump_file) | |
1681 fprintf (dump_file, "\n"); | |
1682 } | |
1683 | |
1684 /* Return 1 if U_NODE can be scheduled in CYCLE. Use the following | |
1685 parameters to decide if that's possible: | |
1686 PS - The partial schedule. | |
1687 U - The serial number of U_NODE. | |
1688 NUM_SPLITS - The number of row splits made so far. | |
1689 MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at | |
1690 the first row of the scheduling window) | |
1691 MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the | |
1692 last row of the scheduling window) */ | |
1693 | |
1694 static bool | |
1695 try_scheduling_node_in_cycle (partial_schedule_ptr ps, ddg_node_ptr u_node, | |
1696 int u, int cycle, sbitmap sched_nodes, | |
1697 int *num_splits, sbitmap must_precede, | |
1698 sbitmap must_follow) | |
1699 { | |
1700 ps_insn_ptr psi; | |
1701 bool success = 0; | |
1702 | |
1703 verify_partial_schedule (ps, sched_nodes); | |
1704 psi = ps_add_node_check_conflicts (ps, u_node, cycle, | |
1705 must_precede, must_follow); | |
1706 if (psi) | |
1707 { | |
1708 SCHED_TIME (u_node) = cycle; | |
1709 SET_BIT (sched_nodes, u); | |
1710 success = 1; | |
1711 *num_splits = 0; | |
1712 if (dump_file) | |
1713 fprintf (dump_file, "Scheduled w/o split in %d\n", cycle); | |
1714 | |
1715 } | |
1716 | |
1717 return success; | |
1718 } | |
1719 | |
1720 /* This function implements the scheduling algorithm for SMS according to the | |
1721 above algorithm. */ | |
1722 static partial_schedule_ptr | |
1723 sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order) | |
1724 { | |
1725 int ii = mii; | |
1726 int i, c, success, num_splits = 0; | |
1727 int flush_and_start_over = true; | |
1728 int num_nodes = g->num_nodes; | |
1729 int start, end, step; /* Place together into one struct? */ | |
1730 sbitmap sched_nodes = sbitmap_alloc (num_nodes); | |
1731 sbitmap must_precede = sbitmap_alloc (num_nodes); | |
1732 sbitmap must_follow = sbitmap_alloc (num_nodes); | |
1733 sbitmap tobe_scheduled = sbitmap_alloc (num_nodes); | |
1734 | |
1735 partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY); | |
1736 | |
1737 sbitmap_ones (tobe_scheduled); | |
1738 sbitmap_zero (sched_nodes); | |
1739 | |
1740 while (flush_and_start_over && (ii < maxii)) | |
1741 { | |
1742 | |
1743 if (dump_file) | |
1744 fprintf (dump_file, "Starting with ii=%d\n", ii); | |
1745 flush_and_start_over = false; | |
1746 sbitmap_zero (sched_nodes); | |
1747 | |
1748 for (i = 0; i < num_nodes; i++) | |
1749 { | |
1750 int u = nodes_order[i]; | |
1751 ddg_node_ptr u_node = &ps->g->nodes[u]; | |
1752 rtx insn = u_node->insn; | |
1753 | |
1754 if (!INSN_P (insn)) | |
1755 { | |
1756 RESET_BIT (tobe_scheduled, u); | |
1757 continue; | |
1758 } | |
1759 | |
1760 if (JUMP_P (insn)) /* Closing branch handled later. */ | |
1761 { | |
1762 RESET_BIT (tobe_scheduled, u); | |
1763 continue; | |
1764 } | |
1765 | |
1766 if (TEST_BIT (sched_nodes, u)) | |
1767 continue; | |
1768 | |
1769 /* Try to get non-empty scheduling window. */ | |
1770 success = 0; | |
1771 if (get_sched_window (ps, nodes_order, i, sched_nodes, ii, &start, | |
1772 &step, &end) == 0) | |
1773 { | |
1774 if (dump_file) | |
1775 fprintf (dump_file, "\nTrying to schedule node %d \ | |
1776 INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID | |
1777 (g->nodes[u].insn)), start, end, step); | |
1778 | |
1779 gcc_assert ((step > 0 && start < end) | |
1780 || (step < 0 && start > end)); | |
1781 | |
1782 calculate_must_precede_follow (u_node, start, end, step, ii, | |
1783 sched_nodes, must_precede, | |
1784 must_follow); | |
1785 | |
1786 for (c = start; c != end; c += step) | |
1787 { | |
1788 sbitmap tmp_precede = NULL; | |
1789 sbitmap tmp_follow = NULL; | |
1790 | |
1791 if (c == start) | |
1792 { | |
1793 if (step == 1) | |
1794 tmp_precede = must_precede; | |
1795 else /* step == -1. */ | |
1796 tmp_follow = must_follow; | |
1797 } | |
1798 if (c == end - step) | |
1799 { | |
1800 if (step == 1) | |
1801 tmp_follow = must_follow; | |
1802 else /* step == -1. */ | |
1803 tmp_precede = must_precede; | |
1804 } | |
1805 | |
1806 success = | |
1807 try_scheduling_node_in_cycle (ps, u_node, u, c, | |
1808 sched_nodes, | |
1809 &num_splits, tmp_precede, | |
1810 tmp_follow); | |
1811 if (success) | |
1812 break; | |
1813 } | |
1814 | |
1815 verify_partial_schedule (ps, sched_nodes); | |
1816 } | |
1817 if (!success) | |
1818 { | |
1819 int split_row; | |
1820 | |
1821 if (ii++ == maxii) | |
1822 break; | |
1823 | |
1824 if (num_splits >= MAX_SPLIT_NUM) | |
1825 { | |
1826 num_splits = 0; | |
1827 flush_and_start_over = true; | |
1828 verify_partial_schedule (ps, sched_nodes); | |
1829 reset_partial_schedule (ps, ii); | |
1830 verify_partial_schedule (ps, sched_nodes); | |
1831 break; | |
1832 } | |
1833 | |
1834 num_splits++; | |
1835 if (step == 1) | |
1836 split_row = compute_split_row (sched_nodes, start, end, | |
1837 ps->ii, u_node); | |
1838 else | |
1839 split_row = compute_split_row (sched_nodes, end, start, | |
1840 ps->ii, u_node); | |
1841 | |
1842 ps_insert_empty_row (ps, split_row, sched_nodes); | |
1843 i--; /* Go back and retry node i. */ | |
1844 | |
1845 if (dump_file) | |
1846 fprintf (dump_file, "num_splits=%d\n", num_splits); | |
1847 } | |
1848 | |
1849 /* ??? If (success), check register pressure estimates. */ | |
1850 } /* Continue with next node. */ | |
1851 } /* While flush_and_start_over. */ | |
1852 if (ii >= maxii) | |
1853 { | |
1854 free_partial_schedule (ps); | |
1855 ps = NULL; | |
1856 } | |
1857 else | |
1858 gcc_assert (sbitmap_equal (tobe_scheduled, sched_nodes)); | |
1859 | |
1860 sbitmap_free (sched_nodes); | |
1861 sbitmap_free (must_precede); | |
1862 sbitmap_free (must_follow); | |
1863 sbitmap_free (tobe_scheduled); | |
1864 | |
1865 return ps; | |
1866 } | |
1867 | |
1868 /* This function inserts a new empty row into PS at the position | |
1869 according to SPLITROW, keeping all already scheduled instructions | |
1870 intact and updating their SCHED_TIME and cycle accordingly. */ | |
1871 static void | |
1872 ps_insert_empty_row (partial_schedule_ptr ps, int split_row, | |
1873 sbitmap sched_nodes) | |
1874 { | |
1875 ps_insn_ptr crr_insn; | |
1876 ps_insn_ptr *rows_new; | |
1877 int ii = ps->ii; | |
1878 int new_ii = ii + 1; | |
1879 int row; | |
1880 | |
1881 verify_partial_schedule (ps, sched_nodes); | |
1882 | |
1883 /* We normalize sched_time and rotate ps to have only non-negative sched | |
1884 times, for simplicity of updating cycles after inserting new row. */ | |
1885 split_row -= ps->min_cycle; | |
1886 split_row = SMODULO (split_row, ii); | |
1887 if (dump_file) | |
1888 fprintf (dump_file, "split_row=%d\n", split_row); | |
1889 | |
1890 normalize_sched_times (ps); | |
1891 rotate_partial_schedule (ps, ps->min_cycle); | |
1892 | |
1893 rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr)); | |
1894 for (row = 0; row < split_row; row++) | |
1895 { | |
1896 rows_new[row] = ps->rows[row]; | |
1897 ps->rows[row] = NULL; | |
1898 for (crr_insn = rows_new[row]; | |
1899 crr_insn; crr_insn = crr_insn->next_in_row) | |
1900 { | |
1901 ddg_node_ptr u = crr_insn->node; | |
1902 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii); | |
1903 | |
1904 SCHED_TIME (u) = new_time; | |
1905 crr_insn->cycle = new_time; | |
1906 SCHED_ROW (u) = new_time % new_ii; | |
1907 SCHED_STAGE (u) = new_time / new_ii; | |
1908 } | |
1909 | |
1910 } | |
1911 | |
1912 rows_new[split_row] = NULL; | |
1913 | |
1914 for (row = split_row; row < ii; row++) | |
1915 { | |
1916 rows_new[row + 1] = ps->rows[row]; | |
1917 ps->rows[row] = NULL; | |
1918 for (crr_insn = rows_new[row + 1]; | |
1919 crr_insn; crr_insn = crr_insn->next_in_row) | |
1920 { | |
1921 ddg_node_ptr u = crr_insn->node; | |
1922 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1; | |
1923 | |
1924 SCHED_TIME (u) = new_time; | |
1925 crr_insn->cycle = new_time; | |
1926 SCHED_ROW (u) = new_time % new_ii; | |
1927 SCHED_STAGE (u) = new_time / new_ii; | |
1928 } | |
1929 } | |
1930 | |
1931 /* Updating ps. */ | |
1932 ps->min_cycle = ps->min_cycle + ps->min_cycle / ii | |
1933 + (SMODULO (ps->min_cycle, ii) >= split_row ? 1 : 0); | |
1934 ps->max_cycle = ps->max_cycle + ps->max_cycle / ii | |
1935 + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0); | |
1936 free (ps->rows); | |
1937 ps->rows = rows_new; | |
1938 ps->ii = new_ii; | |
1939 gcc_assert (ps->min_cycle >= 0); | |
1940 | |
1941 verify_partial_schedule (ps, sched_nodes); | |
1942 | |
1943 if (dump_file) | |
1944 fprintf (dump_file, "min_cycle=%d, max_cycle=%d\n", ps->min_cycle, | |
1945 ps->max_cycle); | |
1946 } | |
1947 | |
1948 /* Given U_NODE which is the node that failed to be scheduled; LOW and | |
1949 UP which are the boundaries of it's scheduling window; compute using | |
1950 SCHED_NODES and II a row in the partial schedule that can be split | |
1951 which will separate a critical predecessor from a critical successor | |
1952 thereby expanding the window, and return it. */ | |
1953 static int | |
1954 compute_split_row (sbitmap sched_nodes, int low, int up, int ii, | |
1955 ddg_node_ptr u_node) | |
1956 { | |
1957 ddg_edge_ptr e; | |
1958 int lower = INT_MIN, upper = INT_MAX; | |
1959 ddg_node_ptr crit_pred = NULL; | |
1960 ddg_node_ptr crit_succ = NULL; | |
1961 int crit_cycle; | |
1962 | |
1963 for (e = u_node->in; e != 0; e = e->next_in) | |
1964 { | |
1965 ddg_node_ptr v_node = e->src; | |
1966 | |
1967 if (TEST_BIT (sched_nodes, v_node->cuid) | |
1968 && (low == SCHED_TIME (v_node) + e->latency - (e->distance * ii))) | |
1969 if (SCHED_TIME (v_node) > lower) | |
1970 { | |
1971 crit_pred = v_node; | |
1972 lower = SCHED_TIME (v_node); | |
1973 } | |
1974 } | |
1975 | |
1976 if (crit_pred != NULL) | |
1977 { | |
1978 crit_cycle = SCHED_TIME (crit_pred) + 1; | |
1979 return SMODULO (crit_cycle, ii); | |
1980 } | |
1981 | |
1982 for (e = u_node->out; e != 0; e = e->next_out) | |
1983 { | |
1984 ddg_node_ptr v_node = e->dest; | |
1985 if (TEST_BIT (sched_nodes, v_node->cuid) | |
1986 && (up == SCHED_TIME (v_node) - e->latency + (e->distance * ii))) | |
1987 if (SCHED_TIME (v_node) < upper) | |
1988 { | |
1989 crit_succ = v_node; | |
1990 upper = SCHED_TIME (v_node); | |
1991 } | |
1992 } | |
1993 | |
1994 if (crit_succ != NULL) | |
1995 { | |
1996 crit_cycle = SCHED_TIME (crit_succ); | |
1997 return SMODULO (crit_cycle, ii); | |
1998 } | |
1999 | |
2000 if (dump_file) | |
2001 fprintf (dump_file, "Both crit_pred and crit_succ are NULL\n"); | |
2002 | |
2003 return SMODULO ((low + up + 1) / 2, ii); | |
2004 } | |
2005 | |
2006 static void | |
2007 verify_partial_schedule (partial_schedule_ptr ps, sbitmap sched_nodes) | |
2008 { | |
2009 int row; | |
2010 ps_insn_ptr crr_insn; | |
2011 | |
2012 for (row = 0; row < ps->ii; row++) | |
2013 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row) | |
2014 { | |
2015 ddg_node_ptr u = crr_insn->node; | |
2016 | |
2017 gcc_assert (TEST_BIT (sched_nodes, u->cuid)); | |
2018 /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by | |
2019 popcount (sched_nodes) == number of insns in ps. */ | |
2020 gcc_assert (SCHED_TIME (u) >= ps->min_cycle); | |
2021 gcc_assert (SCHED_TIME (u) <= ps->max_cycle); | |
2022 } | |
2023 } | |
2024 | |
2025 | |
2026 /* This page implements the algorithm for ordering the nodes of a DDG | |
2027 for modulo scheduling, activated through the | |
2028 "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */ | |
2029 | |
2030 #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info) | |
2031 #define ASAP(x) (ORDER_PARAMS ((x))->asap) | |
2032 #define ALAP(x) (ORDER_PARAMS ((x))->alap) | |
2033 #define HEIGHT(x) (ORDER_PARAMS ((x))->height) | |
2034 #define MOB(x) (ALAP ((x)) - ASAP ((x))) | |
2035 #define DEPTH(x) (ASAP ((x))) | |
2036 | |
2037 typedef struct node_order_params * nopa; | |
2038 | |
2039 static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result); | |
2040 static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int); | |
2041 static nopa calculate_order_params (ddg_ptr, int, int *); | |
2042 static int find_max_asap (ddg_ptr, sbitmap); | |
2043 static int find_max_hv_min_mob (ddg_ptr, sbitmap); | |
2044 static int find_max_dv_min_mob (ddg_ptr, sbitmap); | |
2045 | |
2046 enum sms_direction {BOTTOMUP, TOPDOWN}; | |
2047 | |
2048 struct node_order_params | |
2049 { | |
2050 int asap; | |
2051 int alap; | |
2052 int height; | |
2053 }; | |
2054 | |
2055 /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */ | |
2056 static void | |
2057 check_nodes_order (int *node_order, int num_nodes) | |
2058 { | |
2059 int i; | |
2060 sbitmap tmp = sbitmap_alloc (num_nodes); | |
2061 | |
2062 sbitmap_zero (tmp); | |
2063 | |
2064 if (dump_file) | |
2065 fprintf (dump_file, "SMS final nodes order: \n"); | |
2066 | |
2067 for (i = 0; i < num_nodes; i++) | |
2068 { | |
2069 int u = node_order[i]; | |
2070 | |
2071 if (dump_file) | |
2072 fprintf (dump_file, "%d ", u); | |
2073 gcc_assert (u < num_nodes && u >= 0 && !TEST_BIT (tmp, u)); | |
2074 | |
2075 SET_BIT (tmp, u); | |
2076 } | |
2077 | |
2078 if (dump_file) | |
2079 fprintf (dump_file, "\n"); | |
2080 | |
2081 sbitmap_free (tmp); | |
2082 } | |
2083 | |
2084 /* Order the nodes of G for scheduling and pass the result in | |
2085 NODE_ORDER. Also set aux.count of each node to ASAP. | |
2086 Put maximal ASAP to PMAX_ASAP. Return the recMII for the given DDG. */ | |
2087 static int | |
2088 sms_order_nodes (ddg_ptr g, int mii, int * node_order, int *pmax_asap) | |
2089 { | |
2090 int i; | |
2091 int rec_mii = 0; | |
2092 ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g); | |
2093 | |
2094 nopa nops = calculate_order_params (g, mii, pmax_asap); | |
2095 | |
2096 if (dump_file) | |
2097 print_sccs (dump_file, sccs, g); | |
2098 | |
2099 order_nodes_of_sccs (sccs, node_order); | |
2100 | |
2101 if (sccs->num_sccs > 0) | |
2102 /* First SCC has the largest recurrence_length. */ | |
2103 rec_mii = sccs->sccs[0]->recurrence_length; | |
2104 | |
2105 /* Save ASAP before destroying node_order_params. */ | |
2106 for (i = 0; i < g->num_nodes; i++) | |
2107 { | |
2108 ddg_node_ptr v = &g->nodes[i]; | |
2109 v->aux.count = ASAP (v); | |
2110 } | |
2111 | |
2112 free (nops); | |
2113 free_ddg_all_sccs (sccs); | |
2114 check_nodes_order (node_order, g->num_nodes); | |
2115 | |
2116 return rec_mii; | |
2117 } | |
2118 | |
2119 static void | |
2120 order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order) | |
2121 { | |
2122 int i, pos = 0; | |
2123 ddg_ptr g = all_sccs->ddg; | |
2124 int num_nodes = g->num_nodes; | |
2125 sbitmap prev_sccs = sbitmap_alloc (num_nodes); | |
2126 sbitmap on_path = sbitmap_alloc (num_nodes); | |
2127 sbitmap tmp = sbitmap_alloc (num_nodes); | |
2128 sbitmap ones = sbitmap_alloc (num_nodes); | |
2129 | |
2130 sbitmap_zero (prev_sccs); | |
2131 sbitmap_ones (ones); | |
2132 | |
2133 /* Perform the node ordering starting from the SCC with the highest recMII. | |
2134 For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */ | |
2135 for (i = 0; i < all_sccs->num_sccs; i++) | |
2136 { | |
2137 ddg_scc_ptr scc = all_sccs->sccs[i]; | |
2138 | |
2139 /* Add nodes on paths from previous SCCs to the current SCC. */ | |
2140 find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes); | |
2141 sbitmap_a_or_b (tmp, scc->nodes, on_path); | |
2142 | |
2143 /* Add nodes on paths from the current SCC to previous SCCs. */ | |
2144 find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs); | |
2145 sbitmap_a_or_b (tmp, tmp, on_path); | |
2146 | |
2147 /* Remove nodes of previous SCCs from current extended SCC. */ | |
2148 sbitmap_difference (tmp, tmp, prev_sccs); | |
2149 | |
2150 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos); | |
2151 /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */ | |
2152 } | |
2153 | |
2154 /* Handle the remaining nodes that do not belong to any scc. Each call | |
2155 to order_nodes_in_scc handles a single connected component. */ | |
2156 while (pos < g->num_nodes) | |
2157 { | |
2158 sbitmap_difference (tmp, ones, prev_sccs); | |
2159 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos); | |
2160 } | |
2161 sbitmap_free (prev_sccs); | |
2162 sbitmap_free (on_path); | |
2163 sbitmap_free (tmp); | |
2164 sbitmap_free (ones); | |
2165 } | |
2166 | |
2167 /* MII is needed if we consider backarcs (that do not close recursive cycles). */ | |
2168 static struct node_order_params * | |
2169 calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED, int *pmax_asap) | |
2170 { | |
2171 int u; | |
2172 int max_asap; | |
2173 int num_nodes = g->num_nodes; | |
2174 ddg_edge_ptr e; | |
2175 /* Allocate a place to hold ordering params for each node in the DDG. */ | |
2176 nopa node_order_params_arr; | |
2177 | |
2178 /* Initialize of ASAP/ALAP/HEIGHT to zero. */ | |
2179 node_order_params_arr = (nopa) xcalloc (num_nodes, | |
2180 sizeof (struct node_order_params)); | |
2181 | |
2182 /* Set the aux pointer of each node to point to its order_params structure. */ | |
2183 for (u = 0; u < num_nodes; u++) | |
2184 g->nodes[u].aux.info = &node_order_params_arr[u]; | |
2185 | |
2186 /* Disregarding a backarc from each recursive cycle to obtain a DAG, | |
2187 calculate ASAP, ALAP, mobility, distance, and height for each node | |
2188 in the dependence (direct acyclic) graph. */ | |
2189 | |
2190 /* We assume that the nodes in the array are in topological order. */ | |
2191 | |
2192 max_asap = 0; | |
2193 for (u = 0; u < num_nodes; u++) | |
2194 { | |
2195 ddg_node_ptr u_node = &g->nodes[u]; | |
2196 | |
2197 ASAP (u_node) = 0; | |
2198 for (e = u_node->in; e; e = e->next_in) | |
2199 if (e->distance == 0) | |
2200 ASAP (u_node) = MAX (ASAP (u_node), | |
2201 ASAP (e->src) + e->latency); | |
2202 max_asap = MAX (max_asap, ASAP (u_node)); | |
2203 } | |
2204 | |
2205 for (u = num_nodes - 1; u > -1; u--) | |
2206 { | |
2207 ddg_node_ptr u_node = &g->nodes[u]; | |
2208 | |
2209 ALAP (u_node) = max_asap; | |
2210 HEIGHT (u_node) = 0; | |
2211 for (e = u_node->out; e; e = e->next_out) | |
2212 if (e->distance == 0) | |
2213 { | |
2214 ALAP (u_node) = MIN (ALAP (u_node), | |
2215 ALAP (e->dest) - e->latency); | |
2216 HEIGHT (u_node) = MAX (HEIGHT (u_node), | |
2217 HEIGHT (e->dest) + e->latency); | |
2218 } | |
2219 } | |
2220 if (dump_file) | |
2221 { | |
2222 fprintf (dump_file, "\nOrder params\n"); | |
2223 for (u = 0; u < num_nodes; u++) | |
2224 { | |
2225 ddg_node_ptr u_node = &g->nodes[u]; | |
2226 | |
2227 fprintf (dump_file, "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n", u, | |
2228 ASAP (u_node), ALAP (u_node), HEIGHT (u_node)); | |
2229 } | |
2230 } | |
2231 | |
2232 *pmax_asap = max_asap; | |
2233 return node_order_params_arr; | |
2234 } | |
2235 | |
2236 static int | |
2237 find_max_asap (ddg_ptr g, sbitmap nodes) | |
2238 { | |
2239 unsigned int u = 0; | |
2240 int max_asap = -1; | |
2241 int result = -1; | |
2242 sbitmap_iterator sbi; | |
2243 | |
2244 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi) | |
2245 { | |
2246 ddg_node_ptr u_node = &g->nodes[u]; | |
2247 | |
2248 if (max_asap < ASAP (u_node)) | |
2249 { | |
2250 max_asap = ASAP (u_node); | |
2251 result = u; | |
2252 } | |
2253 } | |
2254 return result; | |
2255 } | |
2256 | |
2257 static int | |
2258 find_max_hv_min_mob (ddg_ptr g, sbitmap nodes) | |
2259 { | |
2260 unsigned int u = 0; | |
2261 int max_hv = -1; | |
2262 int min_mob = INT_MAX; | |
2263 int result = -1; | |
2264 sbitmap_iterator sbi; | |
2265 | |
2266 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi) | |
2267 { | |
2268 ddg_node_ptr u_node = &g->nodes[u]; | |
2269 | |
2270 if (max_hv < HEIGHT (u_node)) | |
2271 { | |
2272 max_hv = HEIGHT (u_node); | |
2273 min_mob = MOB (u_node); | |
2274 result = u; | |
2275 } | |
2276 else if ((max_hv == HEIGHT (u_node)) | |
2277 && (min_mob > MOB (u_node))) | |
2278 { | |
2279 min_mob = MOB (u_node); | |
2280 result = u; | |
2281 } | |
2282 } | |
2283 return result; | |
2284 } | |
2285 | |
2286 static int | |
2287 find_max_dv_min_mob (ddg_ptr g, sbitmap nodes) | |
2288 { | |
2289 unsigned int u = 0; | |
2290 int max_dv = -1; | |
2291 int min_mob = INT_MAX; | |
2292 int result = -1; | |
2293 sbitmap_iterator sbi; | |
2294 | |
2295 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi) | |
2296 { | |
2297 ddg_node_ptr u_node = &g->nodes[u]; | |
2298 | |
2299 if (max_dv < DEPTH (u_node)) | |
2300 { | |
2301 max_dv = DEPTH (u_node); | |
2302 min_mob = MOB (u_node); | |
2303 result = u; | |
2304 } | |
2305 else if ((max_dv == DEPTH (u_node)) | |
2306 && (min_mob > MOB (u_node))) | |
2307 { | |
2308 min_mob = MOB (u_node); | |
2309 result = u; | |
2310 } | |
2311 } | |
2312 return result; | |
2313 } | |
2314 | |
2315 /* Places the nodes of SCC into the NODE_ORDER array starting | |
2316 at position POS, according to the SMS ordering algorithm. | |
2317 NODES_ORDERED (in&out parameter) holds the bitset of all nodes in | |
2318 the NODE_ORDER array, starting from position zero. */ | |
2319 static int | |
2320 order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc, | |
2321 int * node_order, int pos) | |
2322 { | |
2323 enum sms_direction dir; | |
2324 int num_nodes = g->num_nodes; | |
2325 sbitmap workset = sbitmap_alloc (num_nodes); | |
2326 sbitmap tmp = sbitmap_alloc (num_nodes); | |
2327 sbitmap zero_bitmap = sbitmap_alloc (num_nodes); | |
2328 sbitmap predecessors = sbitmap_alloc (num_nodes); | |
2329 sbitmap successors = sbitmap_alloc (num_nodes); | |
2330 | |
2331 sbitmap_zero (predecessors); | |
2332 find_predecessors (predecessors, g, nodes_ordered); | |
2333 | |
2334 sbitmap_zero (successors); | |
2335 find_successors (successors, g, nodes_ordered); | |
2336 | |
2337 sbitmap_zero (tmp); | |
2338 if (sbitmap_a_and_b_cg (tmp, predecessors, scc)) | |
2339 { | |
2340 sbitmap_copy (workset, tmp); | |
2341 dir = BOTTOMUP; | |
2342 } | |
2343 else if (sbitmap_a_and_b_cg (tmp, successors, scc)) | |
2344 { | |
2345 sbitmap_copy (workset, tmp); | |
2346 dir = TOPDOWN; | |
2347 } | |
2348 else | |
2349 { | |
2350 int u; | |
2351 | |
2352 sbitmap_zero (workset); | |
2353 if ((u = find_max_asap (g, scc)) >= 0) | |
2354 SET_BIT (workset, u); | |
2355 dir = BOTTOMUP; | |
2356 } | |
2357 | |
2358 sbitmap_zero (zero_bitmap); | |
2359 while (!sbitmap_equal (workset, zero_bitmap)) | |
2360 { | |
2361 int v; | |
2362 ddg_node_ptr v_node; | |
2363 sbitmap v_node_preds; | |
2364 sbitmap v_node_succs; | |
2365 | |
2366 if (dir == TOPDOWN) | |
2367 { | |
2368 while (!sbitmap_equal (workset, zero_bitmap)) | |
2369 { | |
2370 v = find_max_hv_min_mob (g, workset); | |
2371 v_node = &g->nodes[v]; | |
2372 node_order[pos++] = v; | |
2373 v_node_succs = NODE_SUCCESSORS (v_node); | |
2374 sbitmap_a_and_b (tmp, v_node_succs, scc); | |
2375 | |
2376 /* Don't consider the already ordered successors again. */ | |
2377 sbitmap_difference (tmp, tmp, nodes_ordered); | |
2378 sbitmap_a_or_b (workset, workset, tmp); | |
2379 RESET_BIT (workset, v); | |
2380 SET_BIT (nodes_ordered, v); | |
2381 } | |
2382 dir = BOTTOMUP; | |
2383 sbitmap_zero (predecessors); | |
2384 find_predecessors (predecessors, g, nodes_ordered); | |
2385 sbitmap_a_and_b (workset, predecessors, scc); | |
2386 } | |
2387 else | |
2388 { | |
2389 while (!sbitmap_equal (workset, zero_bitmap)) | |
2390 { | |
2391 v = find_max_dv_min_mob (g, workset); | |
2392 v_node = &g->nodes[v]; | |
2393 node_order[pos++] = v; | |
2394 v_node_preds = NODE_PREDECESSORS (v_node); | |
2395 sbitmap_a_and_b (tmp, v_node_preds, scc); | |
2396 | |
2397 /* Don't consider the already ordered predecessors again. */ | |
2398 sbitmap_difference (tmp, tmp, nodes_ordered); | |
2399 sbitmap_a_or_b (workset, workset, tmp); | |
2400 RESET_BIT (workset, v); | |
2401 SET_BIT (nodes_ordered, v); | |
2402 } | |
2403 dir = TOPDOWN; | |
2404 sbitmap_zero (successors); | |
2405 find_successors (successors, g, nodes_ordered); | |
2406 sbitmap_a_and_b (workset, successors, scc); | |
2407 } | |
2408 } | |
2409 sbitmap_free (tmp); | |
2410 sbitmap_free (workset); | |
2411 sbitmap_free (zero_bitmap); | |
2412 sbitmap_free (predecessors); | |
2413 sbitmap_free (successors); | |
2414 return pos; | |
2415 } | |
2416 | |
2417 | |
2418 /* This page contains functions for manipulating partial-schedules during | |
2419 modulo scheduling. */ | |
2420 | |
2421 /* Create a partial schedule and allocate a memory to hold II rows. */ | |
2422 | |
2423 static partial_schedule_ptr | |
2424 create_partial_schedule (int ii, ddg_ptr g, int history) | |
2425 { | |
2426 partial_schedule_ptr ps = XNEW (struct partial_schedule); | |
2427 ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr)); | |
2428 ps->ii = ii; | |
2429 ps->history = history; | |
2430 ps->min_cycle = INT_MAX; | |
2431 ps->max_cycle = INT_MIN; | |
2432 ps->g = g; | |
2433 | |
2434 return ps; | |
2435 } | |
2436 | |
2437 /* Free the PS_INSNs in rows array of the given partial schedule. | |
2438 ??? Consider caching the PS_INSN's. */ | |
2439 static void | |
2440 free_ps_insns (partial_schedule_ptr ps) | |
2441 { | |
2442 int i; | |
2443 | |
2444 for (i = 0; i < ps->ii; i++) | |
2445 { | |
2446 while (ps->rows[i]) | |
2447 { | |
2448 ps_insn_ptr ps_insn = ps->rows[i]->next_in_row; | |
2449 | |
2450 free (ps->rows[i]); | |
2451 ps->rows[i] = ps_insn; | |
2452 } | |
2453 ps->rows[i] = NULL; | |
2454 } | |
2455 } | |
2456 | |
2457 /* Free all the memory allocated to the partial schedule. */ | |
2458 | |
2459 static void | |
2460 free_partial_schedule (partial_schedule_ptr ps) | |
2461 { | |
2462 if (!ps) | |
2463 return; | |
2464 free_ps_insns (ps); | |
2465 free (ps->rows); | |
2466 free (ps); | |
2467 } | |
2468 | |
2469 /* Clear the rows array with its PS_INSNs, and create a new one with | |
2470 NEW_II rows. */ | |
2471 | |
2472 static void | |
2473 reset_partial_schedule (partial_schedule_ptr ps, int new_ii) | |
2474 { | |
2475 if (!ps) | |
2476 return; | |
2477 free_ps_insns (ps); | |
2478 if (new_ii == ps->ii) | |
2479 return; | |
2480 ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii | |
2481 * sizeof (ps_insn_ptr)); | |
2482 memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr)); | |
2483 ps->ii = new_ii; | |
2484 ps->min_cycle = INT_MAX; | |
2485 ps->max_cycle = INT_MIN; | |
2486 } | |
2487 | |
2488 /* Prints the partial schedule as an ii rows array, for each rows | |
2489 print the ids of the insns in it. */ | |
2490 void | |
2491 print_partial_schedule (partial_schedule_ptr ps, FILE *dump) | |
2492 { | |
2493 int i; | |
2494 | |
2495 for (i = 0; i < ps->ii; i++) | |
2496 { | |
2497 ps_insn_ptr ps_i = ps->rows[i]; | |
2498 | |
2499 fprintf (dump, "\n[ROW %d ]: ", i); | |
2500 while (ps_i) | |
2501 { | |
2502 fprintf (dump, "%d, ", | |
2503 INSN_UID (ps_i->node->insn)); | |
2504 ps_i = ps_i->next_in_row; | |
2505 } | |
2506 } | |
2507 } | |
2508 | |
2509 /* Creates an object of PS_INSN and initializes it to the given parameters. */ | |
2510 static ps_insn_ptr | |
2511 create_ps_insn (ddg_node_ptr node, int rest_count, int cycle) | |
2512 { | |
2513 ps_insn_ptr ps_i = XNEW (struct ps_insn); | |
2514 | |
2515 ps_i->node = node; | |
2516 ps_i->next_in_row = NULL; | |
2517 ps_i->prev_in_row = NULL; | |
2518 ps_i->row_rest_count = rest_count; | |
2519 ps_i->cycle = cycle; | |
2520 | |
2521 return ps_i; | |
2522 } | |
2523 | |
2524 | |
2525 /* Removes the given PS_INSN from the partial schedule. Returns false if the | |
2526 node is not found in the partial schedule, else returns true. */ | |
2527 static bool | |
2528 remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i) | |
2529 { | |
2530 int row; | |
2531 | |
2532 if (!ps || !ps_i) | |
2533 return false; | |
2534 | |
2535 row = SMODULO (ps_i->cycle, ps->ii); | |
2536 if (! ps_i->prev_in_row) | |
2537 { | |
2538 if (ps_i != ps->rows[row]) | |
2539 return false; | |
2540 | |
2541 ps->rows[row] = ps_i->next_in_row; | |
2542 if (ps->rows[row]) | |
2543 ps->rows[row]->prev_in_row = NULL; | |
2544 } | |
2545 else | |
2546 { | |
2547 ps_i->prev_in_row->next_in_row = ps_i->next_in_row; | |
2548 if (ps_i->next_in_row) | |
2549 ps_i->next_in_row->prev_in_row = ps_i->prev_in_row; | |
2550 } | |
2551 free (ps_i); | |
2552 return true; | |
2553 } | |
2554 | |
2555 /* Unlike what literature describes for modulo scheduling (which focuses | |
2556 on VLIW machines) the order of the instructions inside a cycle is | |
2557 important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know | |
2558 where the current instruction should go relative to the already | |
2559 scheduled instructions in the given cycle. Go over these | |
2560 instructions and find the first possible column to put it in. */ | |
2561 static bool | |
2562 ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i, | |
2563 sbitmap must_precede, sbitmap must_follow) | |
2564 { | |
2565 ps_insn_ptr next_ps_i; | |
2566 ps_insn_ptr first_must_follow = NULL; | |
2567 ps_insn_ptr last_must_precede = NULL; | |
2568 int row; | |
2569 | |
2570 if (! ps_i) | |
2571 return false; | |
2572 | |
2573 row = SMODULO (ps_i->cycle, ps->ii); | |
2574 | |
2575 /* Find the first must follow and the last must precede | |
2576 and insert the node immediately after the must precede | |
2577 but make sure that it there is no must follow after it. */ | |
2578 for (next_ps_i = ps->rows[row]; | |
2579 next_ps_i; | |
2580 next_ps_i = next_ps_i->next_in_row) | |
2581 { | |
2582 if (must_follow && TEST_BIT (must_follow, next_ps_i->node->cuid) | |
2583 && ! first_must_follow) | |
2584 first_must_follow = next_ps_i; | |
2585 if (must_precede && TEST_BIT (must_precede, next_ps_i->node->cuid)) | |
2586 { | |
2587 /* If we have already met a node that must follow, then | |
2588 there is no possible column. */ | |
2589 if (first_must_follow) | |
2590 return false; | |
2591 else | |
2592 last_must_precede = next_ps_i; | |
2593 } | |
2594 } | |
2595 | |
2596 /* Now insert the node after INSERT_AFTER_PSI. */ | |
2597 | |
2598 if (! last_must_precede) | |
2599 { | |
2600 ps_i->next_in_row = ps->rows[row]; | |
2601 ps_i->prev_in_row = NULL; | |
2602 if (ps_i->next_in_row) | |
2603 ps_i->next_in_row->prev_in_row = ps_i; | |
2604 ps->rows[row] = ps_i; | |
2605 } | |
2606 else | |
2607 { | |
2608 ps_i->next_in_row = last_must_precede->next_in_row; | |
2609 last_must_precede->next_in_row = ps_i; | |
2610 ps_i->prev_in_row = last_must_precede; | |
2611 if (ps_i->next_in_row) | |
2612 ps_i->next_in_row->prev_in_row = ps_i; | |
2613 } | |
2614 | |
2615 return true; | |
2616 } | |
2617 | |
2618 /* Advances the PS_INSN one column in its current row; returns false | |
2619 in failure and true in success. Bit N is set in MUST_FOLLOW if | |
2620 the node with cuid N must be come after the node pointed to by | |
2621 PS_I when scheduled in the same cycle. */ | |
2622 static int | |
2623 ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i, | |
2624 sbitmap must_follow) | |
2625 { | |
2626 ps_insn_ptr prev, next; | |
2627 int row; | |
2628 ddg_node_ptr next_node; | |
2629 | |
2630 if (!ps || !ps_i) | |
2631 return false; | |
2632 | |
2633 row = SMODULO (ps_i->cycle, ps->ii); | |
2634 | |
2635 if (! ps_i->next_in_row) | |
2636 return false; | |
2637 | |
2638 next_node = ps_i->next_in_row->node; | |
2639 | |
2640 /* Check if next_in_row is dependent on ps_i, both having same sched | |
2641 times (typically ANTI_DEP). If so, ps_i cannot skip over it. */ | |
2642 if (must_follow && TEST_BIT (must_follow, next_node->cuid)) | |
2643 return false; | |
2644 | |
2645 /* Advance PS_I over its next_in_row in the doubly linked list. */ | |
2646 prev = ps_i->prev_in_row; | |
2647 next = ps_i->next_in_row; | |
2648 | |
2649 if (ps_i == ps->rows[row]) | |
2650 ps->rows[row] = next; | |
2651 | |
2652 ps_i->next_in_row = next->next_in_row; | |
2653 | |
2654 if (next->next_in_row) | |
2655 next->next_in_row->prev_in_row = ps_i; | |
2656 | |
2657 next->next_in_row = ps_i; | |
2658 ps_i->prev_in_row = next; | |
2659 | |
2660 next->prev_in_row = prev; | |
2661 if (prev) | |
2662 prev->next_in_row = next; | |
2663 | |
2664 return true; | |
2665 } | |
2666 | |
2667 /* Inserts a DDG_NODE to the given partial schedule at the given cycle. | |
2668 Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is | |
2669 set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come | |
2670 before/after (respectively) the node pointed to by PS_I when scheduled | |
2671 in the same cycle. */ | |
2672 static ps_insn_ptr | |
2673 add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle, | |
2674 sbitmap must_precede, sbitmap must_follow) | |
2675 { | |
2676 ps_insn_ptr ps_i; | |
2677 int rest_count = 1; | |
2678 int row = SMODULO (cycle, ps->ii); | |
2679 | |
2680 if (ps->rows[row] | |
2681 && ps->rows[row]->row_rest_count >= issue_rate) | |
2682 return NULL; | |
2683 | |
2684 if (ps->rows[row]) | |
2685 rest_count += ps->rows[row]->row_rest_count; | |
2686 | |
2687 ps_i = create_ps_insn (node, rest_count, cycle); | |
2688 | |
2689 /* Finds and inserts PS_I according to MUST_FOLLOW and | |
2690 MUST_PRECEDE. */ | |
2691 if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow)) | |
2692 { | |
2693 free (ps_i); | |
2694 return NULL; | |
2695 } | |
2696 | |
2697 return ps_i; | |
2698 } | |
2699 | |
2700 /* Advance time one cycle. Assumes DFA is being used. */ | |
2701 static void | |
2702 advance_one_cycle (void) | |
2703 { | |
2704 if (targetm.sched.dfa_pre_cycle_insn) | |
2705 state_transition (curr_state, | |
2706 targetm.sched.dfa_pre_cycle_insn ()); | |
2707 | |
2708 state_transition (curr_state, NULL); | |
2709 | |
2710 if (targetm.sched.dfa_post_cycle_insn) | |
2711 state_transition (curr_state, | |
2712 targetm.sched.dfa_post_cycle_insn ()); | |
2713 } | |
2714 | |
2715 | |
2716 | |
2717 /* Checks if PS has resource conflicts according to DFA, starting from | |
2718 FROM cycle to TO cycle; returns true if there are conflicts and false | |
2719 if there are no conflicts. Assumes DFA is being used. */ | |
2720 static int | |
2721 ps_has_conflicts (partial_schedule_ptr ps, int from, int to) | |
2722 { | |
2723 int cycle; | |
2724 | |
2725 state_reset (curr_state); | |
2726 | |
2727 for (cycle = from; cycle <= to; cycle++) | |
2728 { | |
2729 ps_insn_ptr crr_insn; | |
2730 /* Holds the remaining issue slots in the current row. */ | |
2731 int can_issue_more = issue_rate; | |
2732 | |
2733 /* Walk through the DFA for the current row. */ | |
2734 for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)]; | |
2735 crr_insn; | |
2736 crr_insn = crr_insn->next_in_row) | |
2737 { | |
2738 rtx insn = crr_insn->node->insn; | |
2739 | |
2740 if (!INSN_P (insn)) | |
2741 continue; | |
2742 | |
2743 /* Check if there is room for the current insn. */ | |
2744 if (!can_issue_more || state_dead_lock_p (curr_state)) | |
2745 return true; | |
2746 | |
2747 /* Update the DFA state and return with failure if the DFA found | |
2748 resource conflicts. */ | |
2749 if (state_transition (curr_state, insn) >= 0) | |
2750 return true; | |
2751 | |
2752 if (targetm.sched.variable_issue) | |
2753 can_issue_more = | |
2754 targetm.sched.variable_issue (sched_dump, sched_verbose, | |
2755 insn, can_issue_more); | |
2756 /* A naked CLOBBER or USE generates no instruction, so don't | |
2757 let them consume issue slots. */ | |
2758 else if (GET_CODE (PATTERN (insn)) != USE | |
2759 && GET_CODE (PATTERN (insn)) != CLOBBER) | |
2760 can_issue_more--; | |
2761 } | |
2762 | |
2763 /* Advance the DFA to the next cycle. */ | |
2764 advance_one_cycle (); | |
2765 } | |
2766 return false; | |
2767 } | |
2768 | |
2769 /* Checks if the given node causes resource conflicts when added to PS at | |
2770 cycle C. If not the node is added to PS and returned; otherwise zero | |
2771 is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with | |
2772 cuid N must be come before/after (respectively) the node pointed to by | |
2773 PS_I when scheduled in the same cycle. */ | |
2774 ps_insn_ptr | |
2775 ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n, | |
2776 int c, sbitmap must_precede, | |
2777 sbitmap must_follow) | |
2778 { | |
2779 int has_conflicts = 0; | |
2780 ps_insn_ptr ps_i; | |
2781 | |
2782 /* First add the node to the PS, if this succeeds check for | |
2783 conflicts, trying different issue slots in the same row. */ | |
2784 if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow))) | |
2785 return NULL; /* Failed to insert the node at the given cycle. */ | |
2786 | |
2787 has_conflicts = ps_has_conflicts (ps, c, c) | |
2788 || (ps->history > 0 | |
2789 && ps_has_conflicts (ps, | |
2790 c - ps->history, | |
2791 c + ps->history)); | |
2792 | |
2793 /* Try different issue slots to find one that the given node can be | |
2794 scheduled in without conflicts. */ | |
2795 while (has_conflicts) | |
2796 { | |
2797 if (! ps_insn_advance_column (ps, ps_i, must_follow)) | |
2798 break; | |
2799 has_conflicts = ps_has_conflicts (ps, c, c) | |
2800 || (ps->history > 0 | |
2801 && ps_has_conflicts (ps, | |
2802 c - ps->history, | |
2803 c + ps->history)); | |
2804 } | |
2805 | |
2806 if (has_conflicts) | |
2807 { | |
2808 remove_node_from_ps (ps, ps_i); | |
2809 return NULL; | |
2810 } | |
2811 | |
2812 ps->min_cycle = MIN (ps->min_cycle, c); | |
2813 ps->max_cycle = MAX (ps->max_cycle, c); | |
2814 return ps_i; | |
2815 } | |
2816 | |
2817 /* Rotate the rows of PS such that insns scheduled at time | |
2818 START_CYCLE will appear in row 0. Updates max/min_cycles. */ | |
2819 void | |
2820 rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle) | |
2821 { | |
2822 int i, row, backward_rotates; | |
2823 int last_row = ps->ii - 1; | |
2824 | |
2825 if (start_cycle == 0) | |
2826 return; | |
2827 | |
2828 backward_rotates = SMODULO (start_cycle, ps->ii); | |
2829 | |
2830 /* Revisit later and optimize this into a single loop. */ | |
2831 for (i = 0; i < backward_rotates; i++) | |
2832 { | |
2833 ps_insn_ptr first_row = ps->rows[0]; | |
2834 | |
2835 for (row = 0; row < last_row; row++) | |
2836 ps->rows[row] = ps->rows[row+1]; | |
2837 | |
2838 ps->rows[last_row] = first_row; | |
2839 } | |
2840 | |
2841 ps->max_cycle -= start_cycle; | |
2842 ps->min_cycle -= start_cycle; | |
2843 } | |
2844 | |
2845 #endif /* INSN_SCHEDULING */ | |
2846 | |
2847 static bool | |
2848 gate_handle_sms (void) | |
2849 { | |
2850 return (optimize > 0 && flag_modulo_sched); | |
2851 } | |
2852 | |
2853 | |
2854 /* Run instruction scheduler. */ | |
2855 /* Perform SMS module scheduling. */ | |
2856 static unsigned int | |
2857 rest_of_handle_sms (void) | |
2858 { | |
2859 #ifdef INSN_SCHEDULING | |
2860 basic_block bb; | |
2861 | |
2862 /* Collect loop information to be used in SMS. */ | |
2863 cfg_layout_initialize (0); | |
2864 sms_schedule (); | |
2865 | |
2866 /* Update the life information, because we add pseudos. */ | |
2867 max_regno = max_reg_num (); | |
2868 | |
2869 /* Finalize layout changes. */ | |
2870 FOR_EACH_BB (bb) | |
2871 if (bb->next_bb != EXIT_BLOCK_PTR) | |
2872 bb->aux = bb->next_bb; | |
2873 free_dominance_info (CDI_DOMINATORS); | |
2874 cfg_layout_finalize (); | |
2875 #endif /* INSN_SCHEDULING */ | |
2876 return 0; | |
2877 } | |
2878 | |
2879 struct rtl_opt_pass pass_sms = | |
2880 { | |
2881 { | |
2882 RTL_PASS, | |
2883 "sms", /* name */ | |
2884 gate_handle_sms, /* gate */ | |
2885 rest_of_handle_sms, /* execute */ | |
2886 NULL, /* sub */ | |
2887 NULL, /* next */ | |
2888 0, /* static_pass_number */ | |
2889 TV_SMS, /* tv_id */ | |
2890 0, /* properties_required */ | |
2891 0, /* properties_provided */ | |
2892 0, /* properties_destroyed */ | |
2893 TODO_dump_func, /* todo_flags_start */ | |
2894 TODO_df_finish | TODO_verify_rtl_sharing | | |
2895 TODO_dump_func | | |
2896 TODO_ggc_collect /* todo_flags_finish */ | |
2897 } | |
2898 }; | |
2899 |