Mercurial > hg > CbC > CbC_gcc
annotate gcc/modulo-sched.c @ 80:d8bf5c8fdea8
fix comments
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Sat, 24 Sep 2011 02:41:02 +0900 |
parents | f6334be47118 |
children | 04ced10e8804 |
rev | line source |
---|---|
0 | 1 /* Swing Modulo Scheduling implementation. |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2 Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010 |
0 | 3 Free Software Foundation, Inc. |
4 Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com> | |
5 | |
6 This file is part of GCC. | |
7 | |
8 GCC is free software; you can redistribute it and/or modify it under | |
9 the terms of the GNU General Public License as published by the Free | |
10 Software Foundation; either version 3, or (at your option) any later | |
11 version. | |
12 | |
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 for more details. | |
17 | |
18 You should have received a copy of the GNU General Public License | |
19 along with GCC; see the file COPYING3. If not see | |
20 <http://www.gnu.org/licenses/>. */ | |
21 | |
22 | |
23 #include "config.h" | |
24 #include "system.h" | |
25 #include "coretypes.h" | |
26 #include "tm.h" | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
27 #include "diagnostic-core.h" |
0 | 28 #include "rtl.h" |
29 #include "tm_p.h" | |
30 #include "hard-reg-set.h" | |
31 #include "regs.h" | |
32 #include "function.h" | |
33 #include "flags.h" | |
34 #include "insn-config.h" | |
35 #include "insn-attr.h" | |
36 #include "except.h" | |
37 #include "recog.h" | |
38 #include "sched-int.h" | |
39 #include "target.h" | |
40 #include "cfglayout.h" | |
41 #include "cfgloop.h" | |
42 #include "cfghooks.h" | |
43 #include "expr.h" | |
44 #include "params.h" | |
45 #include "gcov-io.h" | |
46 #include "ddg.h" | |
47 #include "timevar.h" | |
48 #include "tree-pass.h" | |
49 #include "dbgcnt.h" | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
50 #include "df.h" |
0 | 51 |
52 #ifdef INSN_SCHEDULING | |
53 | |
54 /* This file contains the implementation of the Swing Modulo Scheduler, | |
55 described in the following references: | |
56 [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt. | |
57 Lifetime--sensitive modulo scheduling in a production environment. | |
58 IEEE Trans. on Comps., 50(3), March 2001 | |
59 [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero. | |
60 Swing Modulo Scheduling: A Lifetime Sensitive Approach. | |
61 PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA). | |
62 | |
63 The basic structure is: | |
64 1. Build a data-dependence graph (DDG) for each loop. | |
65 2. Use the DDG to order the insns of a loop (not in topological order | |
66 necessarily, but rather) trying to place each insn after all its | |
67 predecessors _or_ after all its successors. | |
68 3. Compute MII: a lower bound on the number of cycles to schedule the loop. | |
69 4. Use the ordering to perform list-scheduling of the loop: | |
70 1. Set II = MII. We will try to schedule the loop within II cycles. | |
71 2. Try to schedule the insns one by one according to the ordering. | |
72 For each insn compute an interval of cycles by considering already- | |
73 scheduled preds and succs (and associated latencies); try to place | |
74 the insn in the cycles of this window checking for potential | |
75 resource conflicts (using the DFA interface). | |
76 Note: this is different from the cycle-scheduling of schedule_insns; | |
77 here the insns are not scheduled monotonically top-down (nor bottom- | |
78 up). | |
79 3. If failed in scheduling all insns - bump II++ and try again, unless | |
80 II reaches an upper bound MaxII, in which case report failure. | |
81 5. If we succeeded in scheduling the loop within II cycles, we now | |
82 generate prolog and epilog, decrease the counter of the loop, and | |
83 perform modulo variable expansion for live ranges that span more than | |
84 II cycles (i.e. use register copies to prevent a def from overwriting | |
85 itself before reaching the use). | |
86 | |
87 SMS works with countable loops (1) whose control part can be easily | |
88 decoupled from the rest of the loop and (2) whose loop count can | |
89 be easily adjusted. This is because we peel a constant number of | |
90 iterations into a prologue and epilogue for which we want to avoid | |
91 emitting the control part, and a kernel which is to iterate that | |
92 constant number of iterations less than the original loop. So the | |
93 control part should be a set of insns clearly identified and having | |
94 its own iv, not otherwise used in the loop (at-least for now), which | |
95 initializes a register before the loop to the number of iterations. | |
96 Currently SMS relies on the do-loop pattern to recognize such loops, | |
97 where (1) the control part comprises of all insns defining and/or | |
98 using a certain 'count' register and (2) the loop count can be | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
99 adjusted by modifying this register prior to the loop. |
0 | 100 TODO: Rely on cfgloop analysis instead. */ |
101 | |
102 /* This page defines partial-schedule structures and functions for | |
103 modulo scheduling. */ | |
104 | |
105 typedef struct partial_schedule *partial_schedule_ptr; | |
106 typedef struct ps_insn *ps_insn_ptr; | |
107 | |
108 /* The minimum (absolute) cycle that a node of ps was scheduled in. */ | |
109 #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle) | |
110 | |
111 /* The maximum (absolute) cycle that a node of ps was scheduled in. */ | |
112 #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle) | |
113 | |
114 /* Perform signed modulo, always returning a non-negative value. */ | |
115 #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y)) | |
116 | |
117 /* The number of different iterations the nodes in ps span, assuming | |
118 the stage boundaries are placed efficiently. */ | |
119 #define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \ | |
120 + 1 + (ps)->ii - 1) / (ps)->ii) | |
121 | |
122 /* A single instruction in the partial schedule. */ | |
123 struct ps_insn | |
124 { | |
125 /* The corresponding DDG_NODE. */ | |
126 ddg_node_ptr node; | |
127 | |
128 /* The (absolute) cycle in which the PS instruction is scheduled. | |
129 Same as SCHED_TIME (node). */ | |
130 int cycle; | |
131 | |
132 /* The next/prev PS_INSN in the same row. */ | |
133 ps_insn_ptr next_in_row, | |
134 prev_in_row; | |
135 | |
136 /* The number of nodes in the same row that come after this node. */ | |
137 int row_rest_count; | |
138 }; | |
139 | |
140 /* Holds the partial schedule as an array of II rows. Each entry of the | |
141 array points to a linked list of PS_INSNs, which represents the | |
142 instructions that are scheduled for that row. */ | |
143 struct partial_schedule | |
144 { | |
145 int ii; /* Number of rows in the partial schedule. */ | |
146 int history; /* Threshold for conflict checking using DFA. */ | |
147 | |
148 /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */ | |
149 ps_insn_ptr *rows; | |
150 | |
151 /* The earliest absolute cycle of an insn in the partial schedule. */ | |
152 int min_cycle; | |
153 | |
154 /* The latest absolute cycle of an insn in the partial schedule. */ | |
155 int max_cycle; | |
156 | |
157 ddg_ptr g; /* The DDG of the insns in the partial schedule. */ | |
158 }; | |
159 | |
160 /* We use this to record all the register replacements we do in | |
161 the kernel so we can undo SMS if it is not profitable. */ | |
162 struct undo_replace_buff_elem | |
163 { | |
164 rtx insn; | |
165 rtx orig_reg; | |
166 rtx new_reg; | |
167 struct undo_replace_buff_elem *next; | |
168 }; | |
169 | |
170 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
171 |
0 | 172 static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history); |
173 static void free_partial_schedule (partial_schedule_ptr); | |
174 static void reset_partial_schedule (partial_schedule_ptr, int new_ii); | |
175 void print_partial_schedule (partial_schedule_ptr, FILE *); | |
176 static void verify_partial_schedule (partial_schedule_ptr, sbitmap); | |
177 static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr, | |
178 ddg_node_ptr node, int cycle, | |
179 sbitmap must_precede, | |
180 sbitmap must_follow); | |
181 static void rotate_partial_schedule (partial_schedule_ptr, int); | |
182 void set_row_column_for_ps (partial_schedule_ptr); | |
183 static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap); | |
184 static int compute_split_row (sbitmap, int, int, int, ddg_node_ptr); | |
185 | |
186 | |
187 /* This page defines constants and structures for the modulo scheduling | |
188 driver. */ | |
189 | |
190 static int sms_order_nodes (ddg_ptr, int, int *, int *); | |
191 static void set_node_sched_params (ddg_ptr); | |
192 static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *); | |
193 static void permute_partial_schedule (partial_schedule_ptr, rtx); | |
194 static void generate_prolog_epilog (partial_schedule_ptr, struct loop *, | |
195 rtx, rtx); | |
196 static void duplicate_insns_of_cycles (partial_schedule_ptr, | |
197 int, int, int, rtx); | |
198 | |
199 #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap) | |
200 #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time) | |
201 #define SCHED_FIRST_REG_MOVE(x) \ | |
202 (((node_sched_params_ptr)(x)->aux.info)->first_reg_move) | |
203 #define SCHED_NREG_MOVES(x) \ | |
204 (((node_sched_params_ptr)(x)->aux.info)->nreg_moves) | |
205 #define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row) | |
206 #define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage) | |
207 #define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column) | |
208 | |
209 /* The scheduling parameters held for each node. */ | |
210 typedef struct node_sched_params | |
211 { | |
212 int asap; /* A lower-bound on the absolute scheduling cycle. */ | |
213 int time; /* The absolute scheduling cycle (time >= asap). */ | |
214 | |
215 /* The following field (first_reg_move) is a pointer to the first | |
216 register-move instruction added to handle the modulo-variable-expansion | |
217 of the register defined by this node. This register-move copies the | |
218 original register defined by the node. */ | |
219 rtx first_reg_move; | |
220 | |
221 /* The number of register-move instructions added, immediately preceding | |
222 first_reg_move. */ | |
223 int nreg_moves; | |
224 | |
225 int row; /* Holds time % ii. */ | |
226 int stage; /* Holds time / ii. */ | |
227 | |
228 /* The column of a node inside the ps. If nodes u, v are on the same row, | |
229 u will precede v if column (u) < column (v). */ | |
230 int column; | |
231 } *node_sched_params_ptr; | |
232 | |
233 | |
234 /* The following three functions are copied from the current scheduler | |
235 code in order to use sched_analyze() for computing the dependencies. | |
236 They are used when initializing the sched_info structure. */ | |
237 static const char * | |
238 sms_print_insn (const_rtx insn, int aligned ATTRIBUTE_UNUSED) | |
239 { | |
240 static char tmp[80]; | |
241 | |
242 sprintf (tmp, "i%4d", INSN_UID (insn)); | |
243 return tmp; | |
244 } | |
245 | |
246 static void | |
247 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED, | |
248 regset cond_exec ATTRIBUTE_UNUSED, | |
249 regset used ATTRIBUTE_UNUSED, | |
250 regset set ATTRIBUTE_UNUSED) | |
251 { | |
252 } | |
253 | |
254 static struct common_sched_info_def sms_common_sched_info; | |
255 | |
256 static struct sched_deps_info_def sms_sched_deps_info = | |
257 { | |
258 compute_jump_reg_dependencies, | |
259 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | |
260 NULL, | |
261 0, 0, 0 | |
262 }; | |
263 | |
264 static struct haifa_sched_info sms_sched_info = | |
265 { | |
266 NULL, | |
267 NULL, | |
268 NULL, | |
269 NULL, | |
270 NULL, | |
271 sms_print_insn, | |
272 NULL, | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
273 NULL, /* insn_finishes_block_p */ |
0 | 274 NULL, NULL, |
275 NULL, NULL, | |
276 0, 0, | |
277 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
278 NULL, NULL, NULL, |
0 | 279 0 |
280 }; | |
281 | |
282 /* Given HEAD and TAIL which are the first and last insns in a loop; | |
283 return the register which controls the loop. Return zero if it has | |
284 more than one occurrence in the loop besides the control part or the | |
285 do-loop pattern is not of the form we expect. */ | |
286 static rtx | |
287 doloop_register_get (rtx head ATTRIBUTE_UNUSED, rtx tail ATTRIBUTE_UNUSED) | |
288 { | |
289 #ifdef HAVE_doloop_end | |
290 rtx reg, condition, insn, first_insn_not_to_check; | |
291 | |
292 if (!JUMP_P (tail)) | |
293 return NULL_RTX; | |
294 | |
295 /* TODO: Free SMS's dependence on doloop_condition_get. */ | |
296 condition = doloop_condition_get (tail); | |
297 if (! condition) | |
298 return NULL_RTX; | |
299 | |
300 if (REG_P (XEXP (condition, 0))) | |
301 reg = XEXP (condition, 0); | |
302 else if (GET_CODE (XEXP (condition, 0)) == PLUS | |
303 && REG_P (XEXP (XEXP (condition, 0), 0))) | |
304 reg = XEXP (XEXP (condition, 0), 0); | |
305 else | |
306 gcc_unreachable (); | |
307 | |
308 /* Check that the COUNT_REG has no other occurrences in the loop | |
309 until the decrement. We assume the control part consists of | |
310 either a single (parallel) branch-on-count or a (non-parallel) | |
311 branch immediately preceded by a single (decrement) insn. */ | |
312 first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail | |
313 : PREV_INSN (tail)); | |
314 | |
315 for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn)) | |
316 if (reg_mentioned_p (reg, insn)) | |
317 { | |
318 if (dump_file) | |
319 { | |
320 fprintf (dump_file, "SMS count_reg found "); | |
321 print_rtl_single (dump_file, reg); | |
322 fprintf (dump_file, " outside control in insn:\n"); | |
323 print_rtl_single (dump_file, insn); | |
324 } | |
325 | |
326 return NULL_RTX; | |
327 } | |
328 | |
329 return reg; | |
330 #else | |
331 return NULL_RTX; | |
332 #endif | |
333 } | |
334 | |
335 /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so | |
336 that the number of iterations is a compile-time constant. If so, | |
337 return the rtx that sets COUNT_REG to a constant, and set COUNT to | |
338 this constant. Otherwise return 0. */ | |
339 static rtx | |
340 const_iteration_count (rtx count_reg, basic_block pre_header, | |
341 HOST_WIDEST_INT * count) | |
342 { | |
343 rtx insn; | |
344 rtx head, tail; | |
345 | |
346 if (! pre_header) | |
347 return NULL_RTX; | |
348 | |
349 get_ebb_head_tail (pre_header, pre_header, &head, &tail); | |
350 | |
351 for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn)) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
352 if (NONDEBUG_INSN_P (insn) && single_set (insn) && |
0 | 353 rtx_equal_p (count_reg, SET_DEST (single_set (insn)))) |
354 { | |
355 rtx pat = single_set (insn); | |
356 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
357 if (CONST_INT_P (SET_SRC (pat))) |
0 | 358 { |
359 *count = INTVAL (SET_SRC (pat)); | |
360 return insn; | |
361 } | |
362 | |
363 return NULL_RTX; | |
364 } | |
365 | |
366 return NULL_RTX; | |
367 } | |
368 | |
369 /* A very simple resource-based lower bound on the initiation interval. | |
370 ??? Improve the accuracy of this bound by considering the | |
371 utilization of various units. */ | |
372 static int | |
373 res_MII (ddg_ptr g) | |
374 { | |
375 if (targetm.sched.sms_res_mii) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
376 return targetm.sched.sms_res_mii (g); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
377 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
378 return ((g->num_nodes - g->num_debug) / issue_rate); |
0 | 379 } |
380 | |
381 | |
382 /* Points to the array that contains the sched data for each node. */ | |
383 static node_sched_params_ptr node_sched_params; | |
384 | |
385 /* Allocate sched_params for each node and initialize it. Assumes that | |
386 the aux field of each node contain the asap bound (computed earlier), | |
387 and copies it into the sched_params field. */ | |
388 static void | |
389 set_node_sched_params (ddg_ptr g) | |
390 { | |
391 int i; | |
392 | |
393 /* Allocate for each node in the DDG a place to hold the "sched_data". */ | |
394 /* Initialize ASAP/ALAP/HIGHT to zero. */ | |
395 node_sched_params = (node_sched_params_ptr) | |
396 xcalloc (g->num_nodes, | |
397 sizeof (struct node_sched_params)); | |
398 | |
399 /* Set the pointer of the general data of the node to point to the | |
400 appropriate sched_params structure. */ | |
401 for (i = 0; i < g->num_nodes; i++) | |
402 { | |
403 /* Watch out for aliasing problems? */ | |
404 node_sched_params[i].asap = g->nodes[i].aux.count; | |
405 g->nodes[i].aux.info = &node_sched_params[i]; | |
406 } | |
407 } | |
408 | |
409 static void | |
410 print_node_sched_params (FILE *file, int num_nodes, ddg_ptr g) | |
411 { | |
412 int i; | |
413 | |
414 if (! file) | |
415 return; | |
416 for (i = 0; i < num_nodes; i++) | |
417 { | |
418 node_sched_params_ptr nsp = &node_sched_params[i]; | |
419 rtx reg_move = nsp->first_reg_move; | |
420 int j; | |
421 | |
422 fprintf (file, "Node = %d; INSN = %d\n", i, | |
423 (INSN_UID (g->nodes[i].insn))); | |
424 fprintf (file, " asap = %d:\n", nsp->asap); | |
425 fprintf (file, " time = %d:\n", nsp->time); | |
426 fprintf (file, " nreg_moves = %d:\n", nsp->nreg_moves); | |
427 for (j = 0; j < nsp->nreg_moves; j++) | |
428 { | |
429 fprintf (file, " reg_move = "); | |
430 print_rtl_single (file, reg_move); | |
431 reg_move = PREV_INSN (reg_move); | |
432 } | |
433 } | |
434 } | |
435 | |
436 /* | |
437 Breaking intra-loop register anti-dependences: | |
438 Each intra-loop register anti-dependence implies a cross-iteration true | |
439 dependence of distance 1. Therefore, we can remove such false dependencies | |
440 and figure out if the partial schedule broke them by checking if (for a | |
441 true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and | |
442 if so generate a register move. The number of such moves is equal to: | |
443 SCHED_TIME (use) - SCHED_TIME (def) { 0 broken | |
444 nreg_moves = ----------------------------------- + 1 - { dependence. | |
445 ii { 1 if not. | |
446 */ | |
447 static struct undo_replace_buff_elem * | |
448 generate_reg_moves (partial_schedule_ptr ps, bool rescan) | |
449 { | |
450 ddg_ptr g = ps->g; | |
451 int ii = ps->ii; | |
452 int i; | |
453 struct undo_replace_buff_elem *reg_move_replaces = NULL; | |
454 | |
455 for (i = 0; i < g->num_nodes; i++) | |
456 { | |
457 ddg_node_ptr u = &g->nodes[i]; | |
458 ddg_edge_ptr e; | |
459 int nreg_moves = 0, i_reg_move; | |
460 sbitmap *uses_of_defs; | |
461 rtx last_reg_move; | |
462 rtx prev_reg, old_reg; | |
463 | |
464 /* Compute the number of reg_moves needed for u, by looking at life | |
465 ranges started at u (excluding self-loops). */ | |
466 for (e = u->out; e; e = e->next_out) | |
467 if (e->type == TRUE_DEP && e->dest != e->src) | |
468 { | |
469 int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii; | |
470 | |
471 if (e->distance == 1) | |
472 nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii; | |
473 | |
474 /* If dest precedes src in the schedule of the kernel, then dest | |
475 will read before src writes and we can save one reg_copy. */ | |
476 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src) | |
477 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src)) | |
478 nreg_moves4e--; | |
479 | |
480 nreg_moves = MAX (nreg_moves, nreg_moves4e); | |
481 } | |
482 | |
483 if (nreg_moves == 0) | |
484 continue; | |
485 | |
486 /* Every use of the register defined by node may require a different | |
487 copy of this register, depending on the time the use is scheduled. | |
488 Set a bitmap vector, telling which nodes use each copy of this | |
489 register. */ | |
490 uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes); | |
491 sbitmap_vector_zero (uses_of_defs, nreg_moves); | |
492 for (e = u->out; e; e = e->next_out) | |
493 if (e->type == TRUE_DEP && e->dest != e->src) | |
494 { | |
495 int dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii; | |
496 | |
497 if (e->distance == 1) | |
498 dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii; | |
499 | |
500 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src) | |
501 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src)) | |
502 dest_copy--; | |
503 | |
504 if (dest_copy) | |
505 SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid); | |
506 } | |
507 | |
508 /* Now generate the reg_moves, attaching relevant uses to them. */ | |
509 SCHED_NREG_MOVES (u) = nreg_moves; | |
510 old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn))); | |
511 /* Insert the reg-moves right before the notes which precede | |
512 the insn they relates to. */ | |
513 last_reg_move = u->first_note; | |
514 | |
515 for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++) | |
516 { | |
517 unsigned int i_use = 0; | |
518 rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg)); | |
519 rtx reg_move = gen_move_insn (new_reg, prev_reg); | |
520 sbitmap_iterator sbi; | |
521 | |
522 add_insn_before (reg_move, last_reg_move, NULL); | |
523 last_reg_move = reg_move; | |
524 | |
525 if (!SCHED_FIRST_REG_MOVE (u)) | |
526 SCHED_FIRST_REG_MOVE (u) = reg_move; | |
527 | |
528 EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use, sbi) | |
529 { | |
530 struct undo_replace_buff_elem *rep; | |
531 | |
532 rep = (struct undo_replace_buff_elem *) | |
533 xcalloc (1, sizeof (struct undo_replace_buff_elem)); | |
534 rep->insn = g->nodes[i_use].insn; | |
535 rep->orig_reg = old_reg; | |
536 rep->new_reg = new_reg; | |
537 | |
538 if (! reg_move_replaces) | |
539 reg_move_replaces = rep; | |
540 else | |
541 { | |
542 rep->next = reg_move_replaces; | |
543 reg_move_replaces = rep; | |
544 } | |
545 | |
546 replace_rtx (g->nodes[i_use].insn, old_reg, new_reg); | |
547 if (rescan) | |
548 df_insn_rescan (g->nodes[i_use].insn); | |
549 } | |
550 | |
551 prev_reg = new_reg; | |
552 } | |
553 sbitmap_vector_free (uses_of_defs); | |
554 } | |
555 return reg_move_replaces; | |
556 } | |
557 | |
558 /* Free memory allocated for the undo buffer. */ | |
559 static void | |
560 free_undo_replace_buff (struct undo_replace_buff_elem *reg_move_replaces) | |
561 { | |
562 | |
563 while (reg_move_replaces) | |
564 { | |
565 struct undo_replace_buff_elem *rep = reg_move_replaces; | |
566 | |
567 reg_move_replaces = reg_move_replaces->next; | |
568 free (rep); | |
569 } | |
570 } | |
571 | |
572 /* Bump the SCHED_TIMEs of all nodes to start from zero. Set the values | |
573 of SCHED_ROW and SCHED_STAGE. */ | |
574 static void | |
575 normalize_sched_times (partial_schedule_ptr ps) | |
576 { | |
577 int row; | |
578 int amount = PS_MIN_CYCLE (ps); | |
579 int ii = ps->ii; | |
580 ps_insn_ptr crr_insn; | |
581 | |
582 for (row = 0; row < ii; row++) | |
583 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row) | |
584 { | |
585 ddg_node_ptr u = crr_insn->node; | |
586 int normalized_time = SCHED_TIME (u) - amount; | |
587 | |
588 if (dump_file) | |
589 fprintf (dump_file, "crr_insn->node=%d, crr_insn->cycle=%d,\ | |
590 min_cycle=%d\n", crr_insn->node->cuid, SCHED_TIME | |
591 (u), ps->min_cycle); | |
592 gcc_assert (SCHED_TIME (u) >= ps->min_cycle); | |
593 gcc_assert (SCHED_TIME (u) <= ps->max_cycle); | |
594 SCHED_TIME (u) = normalized_time; | |
595 SCHED_ROW (u) = normalized_time % ii; | |
596 SCHED_STAGE (u) = normalized_time / ii; | |
597 } | |
598 } | |
599 | |
600 /* Set SCHED_COLUMN of each node according to its position in PS. */ | |
601 static void | |
602 set_columns_for_ps (partial_schedule_ptr ps) | |
603 { | |
604 int row; | |
605 | |
606 for (row = 0; row < ps->ii; row++) | |
607 { | |
608 ps_insn_ptr cur_insn = ps->rows[row]; | |
609 int column = 0; | |
610 | |
611 for (; cur_insn; cur_insn = cur_insn->next_in_row) | |
612 SCHED_COLUMN (cur_insn->node) = column++; | |
613 } | |
614 } | |
615 | |
616 /* Permute the insns according to their order in PS, from row 0 to | |
617 row ii-1, and position them right before LAST. This schedules | |
618 the insns of the loop kernel. */ | |
619 static void | |
620 permute_partial_schedule (partial_schedule_ptr ps, rtx last) | |
621 { | |
622 int ii = ps->ii; | |
623 int row; | |
624 ps_insn_ptr ps_ij; | |
625 | |
626 for (row = 0; row < ii ; row++) | |
627 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row) | |
628 if (PREV_INSN (last) != ps_ij->node->insn) | |
629 reorder_insns_nobb (ps_ij->node->first_note, ps_ij->node->insn, | |
630 PREV_INSN (last)); | |
631 } | |
632 | |
633 static void | |
634 duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage, | |
635 int to_stage, int for_prolog, rtx count_reg) | |
636 { | |
637 int row; | |
638 ps_insn_ptr ps_ij; | |
639 | |
640 for (row = 0; row < ps->ii; row++) | |
641 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row) | |
642 { | |
643 ddg_node_ptr u_node = ps_ij->node; | |
644 int j, i_reg_moves; | |
645 rtx reg_move = NULL_RTX; | |
646 | |
647 /* Do not duplicate any insn which refers to count_reg as it | |
648 belongs to the control part. | |
649 TODO: This should be done by analyzing the control part of | |
650 the loop. */ | |
651 if (reg_mentioned_p (count_reg, u_node->insn)) | |
652 continue; | |
653 | |
654 if (for_prolog) | |
655 { | |
656 /* SCHED_STAGE (u_node) >= from_stage == 0. Generate increasing | |
657 number of reg_moves starting with the second occurrence of | |
658 u_node, which is generated if its SCHED_STAGE <= to_stage. */ | |
659 i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1; | |
660 i_reg_moves = MAX (i_reg_moves, 0); | |
661 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node)); | |
662 | |
663 /* The reg_moves start from the *first* reg_move backwards. */ | |
664 if (i_reg_moves) | |
665 { | |
666 reg_move = SCHED_FIRST_REG_MOVE (u_node); | |
667 for (j = 1; j < i_reg_moves; j++) | |
668 reg_move = PREV_INSN (reg_move); | |
669 } | |
670 } | |
671 else /* It's for the epilog. */ | |
672 { | |
673 /* SCHED_STAGE (u_node) <= to_stage. Generate all reg_moves, | |
674 starting to decrease one stage after u_node no longer occurs; | |
675 that is, generate all reg_moves until | |
676 SCHED_STAGE (u_node) == from_stage - 1. */ | |
677 i_reg_moves = SCHED_NREG_MOVES (u_node) | |
678 - (from_stage - SCHED_STAGE (u_node) - 1); | |
679 i_reg_moves = MAX (i_reg_moves, 0); | |
680 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node)); | |
681 | |
682 /* The reg_moves start from the *last* reg_move forwards. */ | |
683 if (i_reg_moves) | |
684 { | |
685 reg_move = SCHED_FIRST_REG_MOVE (u_node); | |
686 for (j = 1; j < SCHED_NREG_MOVES (u_node); j++) | |
687 reg_move = PREV_INSN (reg_move); | |
688 } | |
689 } | |
690 | |
691 for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move)) | |
692 emit_insn (copy_rtx (PATTERN (reg_move))); | |
693 if (SCHED_STAGE (u_node) >= from_stage | |
694 && SCHED_STAGE (u_node) <= to_stage) | |
695 duplicate_insn_chain (u_node->first_note, u_node->insn); | |
696 } | |
697 } | |
698 | |
699 | |
700 /* Generate the instructions (including reg_moves) for prolog & epilog. */ | |
701 static void | |
702 generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop, | |
703 rtx count_reg, rtx count_init) | |
704 { | |
705 int i; | |
706 int last_stage = PS_STAGE_COUNT (ps) - 1; | |
707 edge e; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
708 |
0 | 709 /* Generate the prolog, inserting its insns on the loop-entry edge. */ |
710 start_sequence (); | |
711 | |
712 if (!count_init) | |
713 { | |
714 /* Generate instructions at the beginning of the prolog to | |
715 adjust the loop count by STAGE_COUNT. If loop count is constant | |
716 (count_init), this constant is adjusted by STAGE_COUNT in | |
717 generate_prolog_epilog function. */ | |
718 rtx sub_reg = NULL_RTX; | |
719 | |
720 sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS, | |
721 count_reg, GEN_INT (last_stage), | |
722 count_reg, 1, OPTAB_DIRECT); | |
723 gcc_assert (REG_P (sub_reg)); | |
724 if (REGNO (sub_reg) != REGNO (count_reg)) | |
725 emit_move_insn (count_reg, sub_reg); | |
726 } | |
727 | |
728 for (i = 0; i < last_stage; i++) | |
729 duplicate_insns_of_cycles (ps, 0, i, 1, count_reg); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
730 |
0 | 731 /* Put the prolog on the entry edge. */ |
732 e = loop_preheader_edge (loop); | |
733 split_edge_and_insert (e, get_insns ()); | |
734 | |
735 end_sequence (); | |
736 | |
737 /* Generate the epilog, inserting its insns on the loop-exit edge. */ | |
738 start_sequence (); | |
739 | |
740 for (i = 0; i < last_stage; i++) | |
741 duplicate_insns_of_cycles (ps, i + 1, last_stage, 0, count_reg); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
742 |
0 | 743 /* Put the epilogue on the exit edge. */ |
744 gcc_assert (single_exit (loop)); | |
745 e = single_exit (loop); | |
746 split_edge_and_insert (e, get_insns ()); | |
747 end_sequence (); | |
748 } | |
749 | |
750 /* Return true if all the BBs of the loop are empty except the | |
751 loop header. */ | |
752 static bool | |
753 loop_single_full_bb_p (struct loop *loop) | |
754 { | |
755 unsigned i; | |
756 basic_block *bbs = get_loop_body (loop); | |
757 | |
758 for (i = 0; i < loop->num_nodes ; i++) | |
759 { | |
760 rtx head, tail; | |
761 bool empty_bb = true; | |
762 | |
763 if (bbs[i] == loop->header) | |
764 continue; | |
765 | |
766 /* Make sure that basic blocks other than the header | |
767 have only notes labels or jumps. */ | |
768 get_ebb_head_tail (bbs[i], bbs[i], &head, &tail); | |
769 for (; head != NEXT_INSN (tail); head = NEXT_INSN (head)) | |
770 { | |
771 if (NOTE_P (head) || LABEL_P (head) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
772 || (INSN_P (head) && (DEBUG_INSN_P (head) || JUMP_P (head)))) |
0 | 773 continue; |
774 empty_bb = false; | |
775 break; | |
776 } | |
777 | |
778 if (! empty_bb) | |
779 { | |
780 free (bbs); | |
781 return false; | |
782 } | |
783 } | |
784 free (bbs); | |
785 return true; | |
786 } | |
787 | |
788 /* A simple loop from SMS point of view; it is a loop that is composed of | |
789 either a single basic block or two BBs - a header and a latch. */ | |
790 #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) \ | |
791 && (EDGE_COUNT (loop->latch->preds) == 1) \ | |
792 && (EDGE_COUNT (loop->latch->succs) == 1)) | |
793 | |
794 /* Return true if the loop is in its canonical form and false if not. | |
795 i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit. */ | |
796 static bool | |
797 loop_canon_p (struct loop *loop) | |
798 { | |
799 | |
800 if (loop->inner || !loop_outer (loop)) | |
801 { | |
802 if (dump_file) | |
803 fprintf (dump_file, "SMS loop inner or !loop_outer\n"); | |
804 return false; | |
805 } | |
806 | |
807 if (!single_exit (loop)) | |
808 { | |
809 if (dump_file) | |
810 { | |
811 rtx insn = BB_END (loop->header); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
812 |
0 | 813 fprintf (dump_file, "SMS loop many exits "); |
814 fprintf (dump_file, " %s %d (file, line)\n", | |
815 insn_file (insn), insn_line (insn)); | |
816 } | |
817 return false; | |
818 } | |
819 | |
820 if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop)) | |
821 { | |
822 if (dump_file) | |
823 { | |
824 rtx insn = BB_END (loop->header); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
825 |
0 | 826 fprintf (dump_file, "SMS loop many BBs. "); |
827 fprintf (dump_file, " %s %d (file, line)\n", | |
828 insn_file (insn), insn_line (insn)); | |
829 } | |
830 return false; | |
831 } | |
832 | |
833 return true; | |
834 } | |
835 | |
836 /* If there are more than one entry for the loop, | |
837 make it one by splitting the first entry edge and | |
838 redirecting the others to the new BB. */ | |
839 static void | |
840 canon_loop (struct loop *loop) | |
841 { | |
842 edge e; | |
843 edge_iterator i; | |
844 | |
845 /* Avoid annoying special cases of edges going to exit | |
846 block. */ | |
847 FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR->preds) | |
848 if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1)) | |
849 split_edge (e); | |
850 | |
851 if (loop->latch == loop->header | |
852 || EDGE_COUNT (loop->latch->succs) > 1) | |
853 { | |
854 FOR_EACH_EDGE (e, i, loop->header->preds) | |
855 if (e->src == loop->latch) | |
856 break; | |
857 split_edge (e); | |
858 } | |
859 } | |
860 | |
861 /* Setup infos. */ | |
862 static void | |
863 setup_sched_infos (void) | |
864 { | |
865 memcpy (&sms_common_sched_info, &haifa_common_sched_info, | |
866 sizeof (sms_common_sched_info)); | |
867 sms_common_sched_info.sched_pass_id = SCHED_SMS_PASS; | |
868 common_sched_info = &sms_common_sched_info; | |
869 | |
870 sched_deps_info = &sms_sched_deps_info; | |
871 current_sched_info = &sms_sched_info; | |
872 } | |
873 | |
874 /* Probability in % that the sms-ed loop rolls enough so that optimized | |
875 version may be entered. Just a guess. */ | |
876 #define PROB_SMS_ENOUGH_ITERATIONS 80 | |
877 | |
878 /* Used to calculate the upper bound of ii. */ | |
879 #define MAXII_FACTOR 2 | |
880 | |
881 /* Main entry point, perform SMS scheduling on the loops of the function | |
882 that consist of single basic blocks. */ | |
883 static void | |
884 sms_schedule (void) | |
885 { | |
886 rtx insn; | |
887 ddg_ptr *g_arr, g; | |
888 int * node_order; | |
889 int maxii, max_asap; | |
890 loop_iterator li; | |
891 partial_schedule_ptr ps; | |
892 basic_block bb = NULL; | |
893 struct loop *loop; | |
894 basic_block condition_bb = NULL; | |
895 edge latch_edge; | |
896 gcov_type trip_count = 0; | |
897 | |
898 loop_optimizer_init (LOOPS_HAVE_PREHEADERS | |
899 | LOOPS_HAVE_RECORDED_EXITS); | |
900 if (number_of_loops () <= 1) | |
901 { | |
902 loop_optimizer_finalize (); | |
903 return; /* There are no loops to schedule. */ | |
904 } | |
905 | |
906 /* Initialize issue_rate. */ | |
907 if (targetm.sched.issue_rate) | |
908 { | |
909 int temp = reload_completed; | |
910 | |
911 reload_completed = 1; | |
912 issue_rate = targetm.sched.issue_rate (); | |
913 reload_completed = temp; | |
914 } | |
915 else | |
916 issue_rate = 1; | |
917 | |
918 /* Initialize the scheduler. */ | |
919 setup_sched_infos (); | |
920 haifa_sched_init (); | |
921 | |
922 /* Allocate memory to hold the DDG array one entry for each loop. | |
923 We use loop->num as index into this array. */ | |
924 g_arr = XCNEWVEC (ddg_ptr, number_of_loops ()); | |
925 | |
926 if (dump_file) | |
927 { | |
928 fprintf (dump_file, "\n\nSMS analysis phase\n"); | |
929 fprintf (dump_file, "===================\n\n"); | |
930 } | |
931 | |
932 /* Build DDGs for all the relevant loops and hold them in G_ARR | |
933 indexed by the loop index. */ | |
934 FOR_EACH_LOOP (li, loop, 0) | |
935 { | |
936 rtx head, tail; | |
937 rtx count_reg; | |
938 | |
939 /* For debugging. */ | |
940 if (dbg_cnt (sms_sched_loop) == false) | |
941 { | |
942 if (dump_file) | |
943 fprintf (dump_file, "SMS reached max limit... \n"); | |
944 | |
945 break; | |
946 } | |
947 | |
948 if (dump_file) | |
949 { | |
950 rtx insn = BB_END (loop->header); | |
951 | |
952 fprintf (dump_file, "SMS loop num: %d, file: %s, line: %d\n", | |
953 loop->num, insn_file (insn), insn_line (insn)); | |
954 | |
955 } | |
956 | |
957 if (! loop_canon_p (loop)) | |
958 continue; | |
959 | |
960 if (! loop_single_full_bb_p (loop)) | |
961 { | |
962 if (dump_file) | |
963 fprintf (dump_file, "SMS not loop_single_full_bb_p\n"); | |
964 continue; | |
965 } | |
966 | |
967 bb = loop->header; | |
968 | |
969 get_ebb_head_tail (bb, bb, &head, &tail); | |
970 latch_edge = loop_latch_edge (loop); | |
971 gcc_assert (single_exit (loop)); | |
972 if (single_exit (loop)->count) | |
973 trip_count = latch_edge->count / single_exit (loop)->count; | |
974 | |
975 /* Perform SMS only on loops that their average count is above threshold. */ | |
976 | |
977 if ( latch_edge->count | |
978 && (latch_edge->count < single_exit (loop)->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD)) | |
979 { | |
980 if (dump_file) | |
981 { | |
982 fprintf (dump_file, " %s %d (file, line)\n", | |
983 insn_file (tail), insn_line (tail)); | |
984 fprintf (dump_file, "SMS single-bb-loop\n"); | |
985 if (profile_info && flag_branch_probabilities) | |
986 { | |
987 fprintf (dump_file, "SMS loop-count "); | |
988 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, | |
989 (HOST_WIDEST_INT) bb->count); | |
990 fprintf (dump_file, "\n"); | |
991 fprintf (dump_file, "SMS trip-count "); | |
992 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, | |
993 (HOST_WIDEST_INT) trip_count); | |
994 fprintf (dump_file, "\n"); | |
995 fprintf (dump_file, "SMS profile-sum-max "); | |
996 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, | |
997 (HOST_WIDEST_INT) profile_info->sum_max); | |
998 fprintf (dump_file, "\n"); | |
999 } | |
1000 } | |
1001 continue; | |
1002 } | |
1003 | |
1004 /* Make sure this is a doloop. */ | |
1005 if ( !(count_reg = doloop_register_get (head, tail))) | |
1006 { | |
1007 if (dump_file) | |
1008 fprintf (dump_file, "SMS doloop_register_get failed\n"); | |
1009 continue; | |
1010 } | |
1011 | |
1012 /* Don't handle BBs with calls or barriers, or !single_set insns, | |
1013 or auto-increment insns (to avoid creating invalid reg-moves | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1014 for the auto-increment insns). |
0 | 1015 ??? Should handle auto-increment insns. |
1016 ??? Should handle insns defining subregs. */ | |
1017 for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn)) | |
1018 { | |
1019 rtx set; | |
1020 | |
1021 if (CALL_P (insn) | |
1022 || BARRIER_P (insn) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1023 || (NONDEBUG_INSN_P (insn) && !JUMP_P (insn) |
0 | 1024 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE) |
1025 || (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0) | |
1026 || (INSN_P (insn) && (set = single_set (insn)) | |
1027 && GET_CODE (SET_DEST (set)) == SUBREG)) | |
1028 break; | |
1029 } | |
1030 | |
1031 if (insn != NEXT_INSN (tail)) | |
1032 { | |
1033 if (dump_file) | |
1034 { | |
1035 if (CALL_P (insn)) | |
1036 fprintf (dump_file, "SMS loop-with-call\n"); | |
1037 else if (BARRIER_P (insn)) | |
1038 fprintf (dump_file, "SMS loop-with-barrier\n"); | |
1039 else if (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0) | |
1040 fprintf (dump_file, "SMS reg inc\n"); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1041 else if ((NONDEBUG_INSN_P (insn) && !JUMP_P (insn) |
0 | 1042 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE)) |
1043 fprintf (dump_file, "SMS loop-with-not-single-set\n"); | |
1044 else | |
1045 fprintf (dump_file, "SMS loop with subreg in lhs\n"); | |
1046 print_rtl_single (dump_file, insn); | |
1047 } | |
1048 | |
1049 continue; | |
1050 } | |
1051 | |
1052 if (! (g = create_ddg (bb, 0))) | |
1053 { | |
1054 if (dump_file) | |
1055 fprintf (dump_file, "SMS create_ddg failed\n"); | |
1056 continue; | |
1057 } | |
1058 | |
1059 g_arr[loop->num] = g; | |
1060 if (dump_file) | |
1061 fprintf (dump_file, "...OK\n"); | |
1062 | |
1063 } | |
1064 if (dump_file) | |
1065 { | |
1066 fprintf (dump_file, "\nSMS transformation phase\n"); | |
1067 fprintf (dump_file, "=========================\n\n"); | |
1068 } | |
1069 | |
1070 /* We don't want to perform SMS on new loops - created by versioning. */ | |
1071 FOR_EACH_LOOP (li, loop, 0) | |
1072 { | |
1073 rtx head, tail; | |
1074 rtx count_reg, count_init; | |
1075 int mii, rec_mii; | |
1076 unsigned stage_count = 0; | |
1077 HOST_WIDEST_INT loop_count = 0; | |
1078 | |
1079 if (! (g = g_arr[loop->num])) | |
1080 continue; | |
1081 | |
1082 if (dump_file) | |
1083 { | |
1084 rtx insn = BB_END (loop->header); | |
1085 | |
1086 fprintf (dump_file, "SMS loop num: %d, file: %s, line: %d\n", | |
1087 loop->num, insn_file (insn), insn_line (insn)); | |
1088 | |
1089 print_ddg (dump_file, g); | |
1090 } | |
1091 | |
1092 get_ebb_head_tail (loop->header, loop->header, &head, &tail); | |
1093 | |
1094 latch_edge = loop_latch_edge (loop); | |
1095 gcc_assert (single_exit (loop)); | |
1096 if (single_exit (loop)->count) | |
1097 trip_count = latch_edge->count / single_exit (loop)->count; | |
1098 | |
1099 if (dump_file) | |
1100 { | |
1101 fprintf (dump_file, " %s %d (file, line)\n", | |
1102 insn_file (tail), insn_line (tail)); | |
1103 fprintf (dump_file, "SMS single-bb-loop\n"); | |
1104 if (profile_info && flag_branch_probabilities) | |
1105 { | |
1106 fprintf (dump_file, "SMS loop-count "); | |
1107 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, | |
1108 (HOST_WIDEST_INT) bb->count); | |
1109 fprintf (dump_file, "\n"); | |
1110 fprintf (dump_file, "SMS profile-sum-max "); | |
1111 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, | |
1112 (HOST_WIDEST_INT) profile_info->sum_max); | |
1113 fprintf (dump_file, "\n"); | |
1114 } | |
1115 fprintf (dump_file, "SMS doloop\n"); | |
1116 fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes); | |
1117 fprintf (dump_file, "SMS num-loads %d\n", g->num_loads); | |
1118 fprintf (dump_file, "SMS num-stores %d\n", g->num_stores); | |
1119 } | |
1120 | |
1121 | |
1122 /* In case of th loop have doloop register it gets special | |
1123 handling. */ | |
1124 count_init = NULL_RTX; | |
1125 if ((count_reg = doloop_register_get (head, tail))) | |
1126 { | |
1127 basic_block pre_header; | |
1128 | |
1129 pre_header = loop_preheader_edge (loop)->src; | |
1130 count_init = const_iteration_count (count_reg, pre_header, | |
1131 &loop_count); | |
1132 } | |
1133 gcc_assert (count_reg); | |
1134 | |
1135 if (dump_file && count_init) | |
1136 { | |
1137 fprintf (dump_file, "SMS const-doloop "); | |
1138 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, | |
1139 loop_count); | |
1140 fprintf (dump_file, "\n"); | |
1141 } | |
1142 | |
1143 node_order = XNEWVEC (int, g->num_nodes); | |
1144 | |
1145 mii = 1; /* Need to pass some estimate of mii. */ | |
1146 rec_mii = sms_order_nodes (g, mii, node_order, &max_asap); | |
1147 mii = MAX (res_MII (g), rec_mii); | |
1148 maxii = MAX (max_asap, MAXII_FACTOR * mii); | |
1149 | |
1150 if (dump_file) | |
1151 fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n", | |
1152 rec_mii, mii, maxii); | |
1153 | |
1154 /* After sms_order_nodes and before sms_schedule_by_order, to copy over | |
1155 ASAP. */ | |
1156 set_node_sched_params (g); | |
1157 | |
1158 ps = sms_schedule_by_order (g, mii, maxii, node_order); | |
1159 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1160 if (ps){ |
0 | 1161 stage_count = PS_STAGE_COUNT (ps); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1162 gcc_assert(stage_count >= 1); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1163 } |
0 | 1164 |
1165 /* Stage count of 1 means that there is no interleaving between | |
1166 iterations, let the scheduling passes do the job. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1167 if (stage_count <= 1 |
0 | 1168 || (count_init && (loop_count <= stage_count)) |
1169 || (flag_branch_probabilities && (trip_count <= stage_count))) | |
1170 { | |
1171 if (dump_file) | |
1172 { | |
1173 fprintf (dump_file, "SMS failed... \n"); | |
1174 fprintf (dump_file, "SMS sched-failed (stage-count=%d, loop-count=", stage_count); | |
1175 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count); | |
1176 fprintf (dump_file, ", trip-count="); | |
1177 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count); | |
1178 fprintf (dump_file, ")\n"); | |
1179 } | |
1180 continue; | |
1181 } | |
1182 else | |
1183 { | |
1184 struct undo_replace_buff_elem *reg_move_replaces; | |
1185 | |
1186 if (dump_file) | |
1187 { | |
1188 fprintf (dump_file, | |
1189 "SMS succeeded %d %d (with ii, sc)\n", ps->ii, | |
1190 stage_count); | |
1191 print_partial_schedule (ps, dump_file); | |
1192 fprintf (dump_file, | |
1193 "SMS Branch (%d) will later be scheduled at cycle %d.\n", | |
1194 g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1); | |
1195 } | |
1196 | |
1197 /* Set the stage boundaries. If the DDG is built with closing_branch_deps, | |
1198 the closing_branch was scheduled and should appear in the last (ii-1) | |
1199 row. Otherwise, we are free to schedule the branch, and we let nodes | |
1200 that were scheduled at the first PS_MIN_CYCLE cycle appear in the first | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1201 row; this should reduce stage_count to minimum. |
0 | 1202 TODO: Revisit the issue of scheduling the insns of the |
1203 control part relative to the branch when the control part | |
1204 has more than one insn. */ | |
1205 normalize_sched_times (ps); | |
1206 rotate_partial_schedule (ps, PS_MIN_CYCLE (ps)); | |
1207 set_columns_for_ps (ps); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1208 |
0 | 1209 canon_loop (loop); |
1210 | |
1211 /* case the BCT count is not known , Do loop-versioning */ | |
1212 if (count_reg && ! count_init) | |
1213 { | |
1214 rtx comp_rtx = gen_rtx_fmt_ee (GT, VOIDmode, count_reg, | |
1215 GEN_INT(stage_count)); | |
1216 unsigned prob = (PROB_SMS_ENOUGH_ITERATIONS | |
1217 * REG_BR_PROB_BASE) / 100; | |
1218 | |
1219 loop_version (loop, comp_rtx, &condition_bb, | |
1220 prob, prob, REG_BR_PROB_BASE - prob, | |
1221 true); | |
1222 } | |
1223 | |
1224 /* Set new iteration count of loop kernel. */ | |
1225 if (count_reg && count_init) | |
1226 SET_SRC (single_set (count_init)) = GEN_INT (loop_count | |
1227 - stage_count + 1); | |
1228 | |
1229 /* Now apply the scheduled kernel to the RTL of the loop. */ | |
1230 permute_partial_schedule (ps, g->closing_branch->first_note); | |
1231 | |
1232 /* Mark this loop as software pipelined so the later | |
1233 scheduling passes doesn't touch it. */ | |
1234 if (! flag_resched_modulo_sched) | |
1235 g->bb->flags |= BB_DISABLE_SCHEDULE; | |
1236 /* The life-info is not valid any more. */ | |
1237 df_set_bb_dirty (g->bb); | |
1238 | |
1239 reg_move_replaces = generate_reg_moves (ps, true); | |
1240 if (dump_file) | |
1241 print_node_sched_params (dump_file, g->num_nodes, g); | |
1242 /* Generate prolog and epilog. */ | |
1243 generate_prolog_epilog (ps, loop, count_reg, count_init); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1244 |
0 | 1245 free_undo_replace_buff (reg_move_replaces); |
1246 } | |
1247 | |
1248 free_partial_schedule (ps); | |
1249 free (node_sched_params); | |
1250 free (node_order); | |
1251 free_ddg (g); | |
1252 } | |
1253 | |
1254 free (g_arr); | |
1255 | |
1256 /* Release scheduler data, needed until now because of DFA. */ | |
1257 haifa_sched_finish (); | |
1258 loop_optimizer_finalize (); | |
1259 } | |
1260 | |
1261 /* The SMS scheduling algorithm itself | |
1262 ----------------------------------- | |
1263 Input: 'O' an ordered list of insns of a loop. | |
1264 Output: A scheduling of the loop - kernel, prolog, and epilogue. | |
1265 | |
1266 'Q' is the empty Set | |
1267 'PS' is the partial schedule; it holds the currently scheduled nodes with | |
1268 their cycle/slot. | |
1269 'PSP' previously scheduled predecessors. | |
1270 'PSS' previously scheduled successors. | |
1271 't(u)' the cycle where u is scheduled. | |
1272 'l(u)' is the latency of u. | |
1273 'd(v,u)' is the dependence distance from v to u. | |
1274 'ASAP(u)' the earliest time at which u could be scheduled as computed in | |
1275 the node ordering phase. | |
1276 'check_hardware_resources_conflicts(u, PS, c)' | |
1277 run a trace around cycle/slot through DFA model | |
1278 to check resource conflicts involving instruction u | |
1279 at cycle c given the partial schedule PS. | |
1280 'add_to_partial_schedule_at_time(u, PS, c)' | |
1281 Add the node/instruction u to the partial schedule | |
1282 PS at time c. | |
1283 'calculate_register_pressure(PS)' | |
1284 Given a schedule of instructions, calculate the register | |
1285 pressure it implies. One implementation could be the | |
1286 maximum number of overlapping live ranges. | |
1287 'maxRP' The maximum allowed register pressure, it is usually derived from the number | |
1288 registers available in the hardware. | |
1289 | |
1290 1. II = MII. | |
1291 2. PS = empty list | |
1292 3. for each node u in O in pre-computed order | |
1293 4. if (PSP(u) != Q && PSS(u) == Q) then | |
1294 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u). | |
1295 6. start = Early_start; end = Early_start + II - 1; step = 1 | |
1296 11. else if (PSP(u) == Q && PSS(u) != Q) then | |
1297 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u). | |
1298 13. start = Late_start; end = Late_start - II + 1; step = -1 | |
1299 14. else if (PSP(u) != Q && PSS(u) != Q) then | |
1300 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u). | |
1301 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u). | |
1302 17. start = Early_start; | |
1303 18. end = min(Early_start + II - 1 , Late_start); | |
1304 19. step = 1 | |
1305 20. else "if (PSP(u) == Q && PSS(u) == Q)" | |
1306 21. start = ASAP(u); end = start + II - 1; step = 1 | |
1307 22. endif | |
1308 | |
1309 23. success = false | |
1310 24. for (c = start ; c != end ; c += step) | |
1311 25. if check_hardware_resources_conflicts(u, PS, c) then | |
1312 26. add_to_partial_schedule_at_time(u, PS, c) | |
1313 27. success = true | |
1314 28. break | |
1315 29. endif | |
1316 30. endfor | |
1317 31. if (success == false) then | |
1318 32. II = II + 1 | |
1319 33. if (II > maxII) then | |
1320 34. finish - failed to schedule | |
1321 35. endif | |
1322 36. goto 2. | |
1323 37. endif | |
1324 38. endfor | |
1325 39. if (calculate_register_pressure(PS) > maxRP) then | |
1326 40. goto 32. | |
1327 41. endif | |
1328 42. compute epilogue & prologue | |
1329 43. finish - succeeded to schedule | |
1330 */ | |
1331 | |
1332 /* A limit on the number of cycles that resource conflicts can span. ??? Should | |
1333 be provided by DFA, and be dependent on the type of insn scheduled. Currently | |
1334 set to 0 to save compile time. */ | |
1335 #define DFA_HISTORY SMS_DFA_HISTORY | |
1336 | |
1337 /* A threshold for the number of repeated unsuccessful attempts to insert | |
1338 an empty row, before we flush the partial schedule and start over. */ | |
1339 #define MAX_SPLIT_NUM 10 | |
1340 /* Given the partial schedule PS, this function calculates and returns the | |
1341 cycles in which we can schedule the node with the given index I. | |
1342 NOTE: Here we do the backtracking in SMS, in some special cases. We have | |
1343 noticed that there are several cases in which we fail to SMS the loop | |
1344 because the sched window of a node is empty due to tight data-deps. In | |
1345 such cases we want to unschedule some of the predecessors/successors | |
1346 until we get non-empty scheduling window. It returns -1 if the | |
1347 scheduling window is empty and zero otherwise. */ | |
1348 | |
1349 static int | |
1350 get_sched_window (partial_schedule_ptr ps, int *nodes_order, int i, | |
1351 sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p) | |
1352 { | |
1353 int start, step, end; | |
1354 ddg_edge_ptr e; | |
1355 int u = nodes_order [i]; | |
1356 ddg_node_ptr u_node = &ps->g->nodes[u]; | |
1357 sbitmap psp = sbitmap_alloc (ps->g->num_nodes); | |
1358 sbitmap pss = sbitmap_alloc (ps->g->num_nodes); | |
1359 sbitmap u_node_preds = NODE_PREDECESSORS (u_node); | |
1360 sbitmap u_node_succs = NODE_SUCCESSORS (u_node); | |
1361 int psp_not_empty; | |
1362 int pss_not_empty; | |
1363 | |
1364 /* 1. compute sched window for u (start, end, step). */ | |
1365 sbitmap_zero (psp); | |
1366 sbitmap_zero (pss); | |
1367 psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes); | |
1368 pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes); | |
1369 | |
1370 if (psp_not_empty && !pss_not_empty) | |
1371 { | |
1372 int early_start = INT_MIN; | |
1373 | |
1374 end = INT_MAX; | |
1375 for (e = u_node->in; e != 0; e = e->next_in) | |
1376 { | |
1377 ddg_node_ptr v_node = e->src; | |
1378 | |
1379 if (dump_file) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1380 { |
0 | 1381 fprintf (dump_file, "\nProcessing edge: "); |
1382 print_ddg_edge (dump_file, e); | |
1383 fprintf (dump_file, | |
1384 "\nScheduling %d (%d) in psp_not_empty," | |
1385 " checking p %d (%d): ", u_node->cuid, | |
1386 INSN_UID (u_node->insn), v_node->cuid, INSN_UID | |
1387 (v_node->insn)); | |
1388 } | |
1389 | |
1390 if (TEST_BIT (sched_nodes, v_node->cuid)) | |
1391 { | |
1392 int p_st = SCHED_TIME (v_node); | |
1393 | |
1394 early_start = | |
1395 MAX (early_start, p_st + e->latency - (e->distance * ii)); | |
1396 | |
1397 if (dump_file) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1398 fprintf (dump_file, |
0 | 1399 "pred st = %d; early_start = %d; latency: %d", |
1400 p_st, early_start, e->latency); | |
1401 | |
1402 if (e->data_type == MEM_DEP) | |
1403 end = MIN (end, SCHED_TIME (v_node) + ii - 1); | |
1404 } | |
1405 else if (dump_file) | |
1406 fprintf (dump_file, "the node is not scheduled\n"); | |
1407 } | |
1408 start = early_start; | |
1409 end = MIN (end, early_start + ii); | |
1410 /* Schedule the node close to it's predecessors. */ | |
1411 step = 1; | |
1412 | |
1413 if (dump_file) | |
1414 fprintf (dump_file, | |
1415 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n", | |
1416 u_node->cuid, INSN_UID (u_node->insn), start, end, step); | |
1417 } | |
1418 | |
1419 else if (!psp_not_empty && pss_not_empty) | |
1420 { | |
1421 int late_start = INT_MAX; | |
1422 | |
1423 end = INT_MIN; | |
1424 for (e = u_node->out; e != 0; e = e->next_out) | |
1425 { | |
1426 ddg_node_ptr v_node = e->dest; | |
1427 | |
1428 if (dump_file) | |
1429 { | |
1430 fprintf (dump_file, "\nProcessing edge:"); | |
1431 print_ddg_edge (dump_file, e); | |
1432 fprintf (dump_file, | |
1433 "\nScheduling %d (%d) in pss_not_empty," | |
1434 " checking s %d (%d): ", u_node->cuid, | |
1435 INSN_UID (u_node->insn), v_node->cuid, INSN_UID | |
1436 (v_node->insn)); | |
1437 } | |
1438 | |
1439 if (TEST_BIT (sched_nodes, v_node->cuid)) | |
1440 { | |
1441 int s_st = SCHED_TIME (v_node); | |
1442 | |
1443 late_start = MIN (late_start, | |
1444 s_st - e->latency + (e->distance * ii)); | |
1445 | |
1446 if (dump_file) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1447 fprintf (dump_file, |
0 | 1448 "succ st = %d; late_start = %d; latency = %d", |
1449 s_st, late_start, e->latency); | |
1450 | |
1451 if (e->data_type == MEM_DEP) | |
1452 end = MAX (end, SCHED_TIME (v_node) - ii + 1); | |
1453 if (dump_file) | |
1454 fprintf (dump_file, "end = %d\n", end); | |
1455 | |
1456 } | |
1457 else if (dump_file) | |
1458 fprintf (dump_file, "the node is not scheduled\n"); | |
1459 | |
1460 } | |
1461 start = late_start; | |
1462 end = MAX (end, late_start - ii); | |
1463 /* Schedule the node close to it's successors. */ | |
1464 step = -1; | |
1465 | |
1466 if (dump_file) | |
1467 fprintf (dump_file, | |
1468 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n", | |
1469 u_node->cuid, INSN_UID (u_node->insn), start, end, step); | |
1470 | |
1471 } | |
1472 | |
1473 else if (psp_not_empty && pss_not_empty) | |
1474 { | |
1475 int early_start = INT_MIN; | |
1476 int late_start = INT_MAX; | |
1477 int count_preds = 0; | |
1478 int count_succs = 0; | |
1479 | |
1480 start = INT_MIN; | |
1481 end = INT_MAX; | |
1482 for (e = u_node->in; e != 0; e = e->next_in) | |
1483 { | |
1484 ddg_node_ptr v_node = e->src; | |
1485 | |
1486 if (dump_file) | |
1487 { | |
1488 fprintf (dump_file, "\nProcessing edge:"); | |
1489 print_ddg_edge (dump_file, e); | |
1490 fprintf (dump_file, | |
1491 "\nScheduling %d (%d) in psp_pss_not_empty," | |
1492 " checking p %d (%d): ", u_node->cuid, INSN_UID | |
1493 (u_node->insn), v_node->cuid, INSN_UID | |
1494 (v_node->insn)); | |
1495 } | |
1496 | |
1497 if (TEST_BIT (sched_nodes, v_node->cuid)) | |
1498 { | |
1499 int p_st = SCHED_TIME (v_node); | |
1500 | |
1501 early_start = MAX (early_start, | |
1502 p_st + e->latency | |
1503 - (e->distance * ii)); | |
1504 | |
1505 if (dump_file) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1506 fprintf (dump_file, |
0 | 1507 "pred st = %d; early_start = %d; latency = %d", |
1508 p_st, early_start, e->latency); | |
1509 | |
1510 if (e->type == TRUE_DEP && e->data_type == REG_DEP) | |
1511 count_preds++; | |
1512 | |
1513 if (e->data_type == MEM_DEP) | |
1514 end = MIN (end, SCHED_TIME (v_node) + ii - 1); | |
1515 } | |
1516 else if (dump_file) | |
1517 fprintf (dump_file, "the node is not scheduled\n"); | |
1518 | |
1519 } | |
1520 for (e = u_node->out; e != 0; e = e->next_out) | |
1521 { | |
1522 ddg_node_ptr v_node = e->dest; | |
1523 | |
1524 if (dump_file) | |
1525 { | |
1526 fprintf (dump_file, "\nProcessing edge:"); | |
1527 print_ddg_edge (dump_file, e); | |
1528 fprintf (dump_file, | |
1529 "\nScheduling %d (%d) in psp_pss_not_empty," | |
1530 " checking s %d (%d): ", u_node->cuid, INSN_UID | |
1531 (u_node->insn), v_node->cuid, INSN_UID | |
1532 (v_node->insn)); | |
1533 } | |
1534 | |
1535 if (TEST_BIT (sched_nodes, v_node->cuid)) | |
1536 { | |
1537 int s_st = SCHED_TIME (v_node); | |
1538 | |
1539 late_start = MIN (late_start, | |
1540 s_st - e->latency | |
1541 + (e->distance * ii)); | |
1542 | |
1543 if (dump_file) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1544 fprintf (dump_file, |
0 | 1545 "succ st = %d; late_start = %d; latency = %d", |
1546 s_st, late_start, e->latency); | |
1547 | |
1548 if (e->type == TRUE_DEP && e->data_type == REG_DEP) | |
1549 count_succs++; | |
1550 | |
1551 if (e->data_type == MEM_DEP) | |
1552 start = MAX (start, SCHED_TIME (v_node) - ii + 1); | |
1553 } | |
1554 else if (dump_file) | |
1555 fprintf (dump_file, "the node is not scheduled\n"); | |
1556 | |
1557 } | |
1558 start = MAX (start, early_start); | |
1559 end = MIN (end, MIN (early_start + ii, late_start + 1)); | |
1560 step = 1; | |
1561 /* If there are more successors than predecessors schedule the | |
1562 node close to it's successors. */ | |
1563 if (count_succs >= count_preds) | |
1564 { | |
1565 int old_start = start; | |
1566 | |
1567 start = end - 1; | |
1568 end = old_start - 1; | |
1569 step = -1; | |
1570 } | |
1571 } | |
1572 else /* psp is empty && pss is empty. */ | |
1573 { | |
1574 start = SCHED_ASAP (u_node); | |
1575 end = start + ii; | |
1576 step = 1; | |
1577 } | |
1578 | |
1579 *start_p = start; | |
1580 *step_p = step; | |
1581 *end_p = end; | |
1582 sbitmap_free (psp); | |
1583 sbitmap_free (pss); | |
1584 | |
1585 if ((start >= end && step == 1) || (start <= end && step == -1)) | |
1586 { | |
1587 if (dump_file) | |
1588 fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n", | |
1589 start, end, step); | |
1590 return -1; | |
1591 } | |
1592 | |
1593 return 0; | |
1594 } | |
1595 | |
1596 /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the | |
1597 node currently been scheduled. At the end of the calculation | |
1598 MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of | |
1599 U_NODE which are (1) already scheduled in the first/last row of | |
1600 U_NODE's scheduling window, (2) whose dependence inequality with U | |
1601 becomes an equality when U is scheduled in this same row, and (3) | |
1602 whose dependence latency is zero. | |
1603 | |
1604 The first and last rows are calculated using the following parameters: | |
1605 START/END rows - The cycles that begins/ends the traversal on the window; | |
1606 searching for an empty cycle to schedule U_NODE. | |
1607 STEP - The direction in which we traverse the window. | |
1608 II - The initiation interval. */ | |
1609 | |
1610 static void | |
1611 calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end, | |
1612 int step, int ii, sbitmap sched_nodes, | |
1613 sbitmap must_precede, sbitmap must_follow) | |
1614 { | |
1615 ddg_edge_ptr e; | |
1616 int first_cycle_in_window, last_cycle_in_window; | |
1617 | |
1618 gcc_assert (must_precede && must_follow); | |
1619 | |
1620 /* Consider the following scheduling window: | |
1621 {first_cycle_in_window, first_cycle_in_window+1, ..., | |
1622 last_cycle_in_window}. If step is 1 then the following will be | |
1623 the order we traverse the window: {start=first_cycle_in_window, | |
1624 first_cycle_in_window+1, ..., end=last_cycle_in_window+1}, | |
1625 or {start=last_cycle_in_window, last_cycle_in_window-1, ..., | |
1626 end=first_cycle_in_window-1} if step is -1. */ | |
1627 first_cycle_in_window = (step == 1) ? start : end - step; | |
1628 last_cycle_in_window = (step == 1) ? end - step : start; | |
1629 | |
1630 sbitmap_zero (must_precede); | |
1631 sbitmap_zero (must_follow); | |
1632 | |
1633 if (dump_file) | |
1634 fprintf (dump_file, "\nmust_precede: "); | |
1635 | |
1636 /* Instead of checking if: | |
1637 (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window) | |
1638 && ((SCHED_TIME (e->src) + e->latency - (e->distance * ii)) == | |
1639 first_cycle_in_window) | |
1640 && e->latency == 0 | |
1641 we use the fact that latency is non-negative: | |
1642 SCHED_TIME (e->src) - (e->distance * ii) <= | |
1643 SCHED_TIME (e->src) + e->latency - (e->distance * ii)) <= | |
1644 first_cycle_in_window | |
1645 and check only if | |
1646 SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window */ | |
1647 for (e = u_node->in; e != 0; e = e->next_in) | |
1648 if (TEST_BIT (sched_nodes, e->src->cuid) | |
1649 && ((SCHED_TIME (e->src) - (e->distance * ii)) == | |
1650 first_cycle_in_window)) | |
1651 { | |
1652 if (dump_file) | |
1653 fprintf (dump_file, "%d ", e->src->cuid); | |
1654 | |
1655 SET_BIT (must_precede, e->src->cuid); | |
1656 } | |
1657 | |
1658 if (dump_file) | |
1659 fprintf (dump_file, "\nmust_follow: "); | |
1660 | |
1661 /* Instead of checking if: | |
1662 (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window) | |
1663 && ((SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) == | |
1664 last_cycle_in_window) | |
1665 && e->latency == 0 | |
1666 we use the fact that latency is non-negative: | |
1667 SCHED_TIME (e->dest) + (e->distance * ii) >= | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1668 SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >= |
0 | 1669 last_cycle_in_window |
1670 and check only if | |
1671 SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window */ | |
1672 for (e = u_node->out; e != 0; e = e->next_out) | |
1673 if (TEST_BIT (sched_nodes, e->dest->cuid) | |
1674 && ((SCHED_TIME (e->dest) + (e->distance * ii)) == | |
1675 last_cycle_in_window)) | |
1676 { | |
1677 if (dump_file) | |
1678 fprintf (dump_file, "%d ", e->dest->cuid); | |
1679 | |
1680 SET_BIT (must_follow, e->dest->cuid); | |
1681 } | |
1682 | |
1683 if (dump_file) | |
1684 fprintf (dump_file, "\n"); | |
1685 } | |
1686 | |
1687 /* Return 1 if U_NODE can be scheduled in CYCLE. Use the following | |
1688 parameters to decide if that's possible: | |
1689 PS - The partial schedule. | |
1690 U - The serial number of U_NODE. | |
1691 NUM_SPLITS - The number of row splits made so far. | |
1692 MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at | |
1693 the first row of the scheduling window) | |
1694 MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the | |
1695 last row of the scheduling window) */ | |
1696 | |
1697 static bool | |
1698 try_scheduling_node_in_cycle (partial_schedule_ptr ps, ddg_node_ptr u_node, | |
1699 int u, int cycle, sbitmap sched_nodes, | |
1700 int *num_splits, sbitmap must_precede, | |
1701 sbitmap must_follow) | |
1702 { | |
1703 ps_insn_ptr psi; | |
1704 bool success = 0; | |
1705 | |
1706 verify_partial_schedule (ps, sched_nodes); | |
1707 psi = ps_add_node_check_conflicts (ps, u_node, cycle, | |
1708 must_precede, must_follow); | |
1709 if (psi) | |
1710 { | |
1711 SCHED_TIME (u_node) = cycle; | |
1712 SET_BIT (sched_nodes, u); | |
1713 success = 1; | |
1714 *num_splits = 0; | |
1715 if (dump_file) | |
1716 fprintf (dump_file, "Scheduled w/o split in %d\n", cycle); | |
1717 | |
1718 } | |
1719 | |
1720 return success; | |
1721 } | |
1722 | |
1723 /* This function implements the scheduling algorithm for SMS according to the | |
1724 above algorithm. */ | |
1725 static partial_schedule_ptr | |
1726 sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order) | |
1727 { | |
1728 int ii = mii; | |
1729 int i, c, success, num_splits = 0; | |
1730 int flush_and_start_over = true; | |
1731 int num_nodes = g->num_nodes; | |
1732 int start, end, step; /* Place together into one struct? */ | |
1733 sbitmap sched_nodes = sbitmap_alloc (num_nodes); | |
1734 sbitmap must_precede = sbitmap_alloc (num_nodes); | |
1735 sbitmap must_follow = sbitmap_alloc (num_nodes); | |
1736 sbitmap tobe_scheduled = sbitmap_alloc (num_nodes); | |
1737 | |
1738 partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY); | |
1739 | |
1740 sbitmap_ones (tobe_scheduled); | |
1741 sbitmap_zero (sched_nodes); | |
1742 | |
1743 while (flush_and_start_over && (ii < maxii)) | |
1744 { | |
1745 | |
1746 if (dump_file) | |
1747 fprintf (dump_file, "Starting with ii=%d\n", ii); | |
1748 flush_and_start_over = false; | |
1749 sbitmap_zero (sched_nodes); | |
1750 | |
1751 for (i = 0; i < num_nodes; i++) | |
1752 { | |
1753 int u = nodes_order[i]; | |
1754 ddg_node_ptr u_node = &ps->g->nodes[u]; | |
1755 rtx insn = u_node->insn; | |
1756 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1757 if (!NONDEBUG_INSN_P (insn)) |
0 | 1758 { |
1759 RESET_BIT (tobe_scheduled, u); | |
1760 continue; | |
1761 } | |
1762 | |
1763 if (JUMP_P (insn)) /* Closing branch handled later. */ | |
1764 { | |
1765 RESET_BIT (tobe_scheduled, u); | |
1766 continue; | |
1767 } | |
1768 | |
1769 if (TEST_BIT (sched_nodes, u)) | |
1770 continue; | |
1771 | |
1772 /* Try to get non-empty scheduling window. */ | |
1773 success = 0; | |
1774 if (get_sched_window (ps, nodes_order, i, sched_nodes, ii, &start, | |
1775 &step, &end) == 0) | |
1776 { | |
1777 if (dump_file) | |
1778 fprintf (dump_file, "\nTrying to schedule node %d \ | |
1779 INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID | |
1780 (g->nodes[u].insn)), start, end, step); | |
1781 | |
1782 gcc_assert ((step > 0 && start < end) | |
1783 || (step < 0 && start > end)); | |
1784 | |
1785 calculate_must_precede_follow (u_node, start, end, step, ii, | |
1786 sched_nodes, must_precede, | |
1787 must_follow); | |
1788 | |
1789 for (c = start; c != end; c += step) | |
1790 { | |
1791 sbitmap tmp_precede = NULL; | |
1792 sbitmap tmp_follow = NULL; | |
1793 | |
1794 if (c == start) | |
1795 { | |
1796 if (step == 1) | |
1797 tmp_precede = must_precede; | |
1798 else /* step == -1. */ | |
1799 tmp_follow = must_follow; | |
1800 } | |
1801 if (c == end - step) | |
1802 { | |
1803 if (step == 1) | |
1804 tmp_follow = must_follow; | |
1805 else /* step == -1. */ | |
1806 tmp_precede = must_precede; | |
1807 } | |
1808 | |
1809 success = | |
1810 try_scheduling_node_in_cycle (ps, u_node, u, c, | |
1811 sched_nodes, | |
1812 &num_splits, tmp_precede, | |
1813 tmp_follow); | |
1814 if (success) | |
1815 break; | |
1816 } | |
1817 | |
1818 verify_partial_schedule (ps, sched_nodes); | |
1819 } | |
1820 if (!success) | |
1821 { | |
1822 int split_row; | |
1823 | |
1824 if (ii++ == maxii) | |
1825 break; | |
1826 | |
1827 if (num_splits >= MAX_SPLIT_NUM) | |
1828 { | |
1829 num_splits = 0; | |
1830 flush_and_start_over = true; | |
1831 verify_partial_schedule (ps, sched_nodes); | |
1832 reset_partial_schedule (ps, ii); | |
1833 verify_partial_schedule (ps, sched_nodes); | |
1834 break; | |
1835 } | |
1836 | |
1837 num_splits++; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1838 /* The scheduling window is exclusive of 'end' |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1839 whereas compute_split_window() expects an inclusive, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1840 ordered range. */ |
0 | 1841 if (step == 1) |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1842 split_row = compute_split_row (sched_nodes, start, end - 1, |
0 | 1843 ps->ii, u_node); |
1844 else | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1845 split_row = compute_split_row (sched_nodes, end + 1, start, |
0 | 1846 ps->ii, u_node); |
1847 | |
1848 ps_insert_empty_row (ps, split_row, sched_nodes); | |
1849 i--; /* Go back and retry node i. */ | |
1850 | |
1851 if (dump_file) | |
1852 fprintf (dump_file, "num_splits=%d\n", num_splits); | |
1853 } | |
1854 | |
1855 /* ??? If (success), check register pressure estimates. */ | |
1856 } /* Continue with next node. */ | |
1857 } /* While flush_and_start_over. */ | |
1858 if (ii >= maxii) | |
1859 { | |
1860 free_partial_schedule (ps); | |
1861 ps = NULL; | |
1862 } | |
1863 else | |
1864 gcc_assert (sbitmap_equal (tobe_scheduled, sched_nodes)); | |
1865 | |
1866 sbitmap_free (sched_nodes); | |
1867 sbitmap_free (must_precede); | |
1868 sbitmap_free (must_follow); | |
1869 sbitmap_free (tobe_scheduled); | |
1870 | |
1871 return ps; | |
1872 } | |
1873 | |
1874 /* This function inserts a new empty row into PS at the position | |
1875 according to SPLITROW, keeping all already scheduled instructions | |
1876 intact and updating their SCHED_TIME and cycle accordingly. */ | |
1877 static void | |
1878 ps_insert_empty_row (partial_schedule_ptr ps, int split_row, | |
1879 sbitmap sched_nodes) | |
1880 { | |
1881 ps_insn_ptr crr_insn; | |
1882 ps_insn_ptr *rows_new; | |
1883 int ii = ps->ii; | |
1884 int new_ii = ii + 1; | |
1885 int row; | |
1886 | |
1887 verify_partial_schedule (ps, sched_nodes); | |
1888 | |
1889 /* We normalize sched_time and rotate ps to have only non-negative sched | |
1890 times, for simplicity of updating cycles after inserting new row. */ | |
1891 split_row -= ps->min_cycle; | |
1892 split_row = SMODULO (split_row, ii); | |
1893 if (dump_file) | |
1894 fprintf (dump_file, "split_row=%d\n", split_row); | |
1895 | |
1896 normalize_sched_times (ps); | |
1897 rotate_partial_schedule (ps, ps->min_cycle); | |
1898 | |
1899 rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr)); | |
1900 for (row = 0; row < split_row; row++) | |
1901 { | |
1902 rows_new[row] = ps->rows[row]; | |
1903 ps->rows[row] = NULL; | |
1904 for (crr_insn = rows_new[row]; | |
1905 crr_insn; crr_insn = crr_insn->next_in_row) | |
1906 { | |
1907 ddg_node_ptr u = crr_insn->node; | |
1908 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii); | |
1909 | |
1910 SCHED_TIME (u) = new_time; | |
1911 crr_insn->cycle = new_time; | |
1912 SCHED_ROW (u) = new_time % new_ii; | |
1913 SCHED_STAGE (u) = new_time / new_ii; | |
1914 } | |
1915 | |
1916 } | |
1917 | |
1918 rows_new[split_row] = NULL; | |
1919 | |
1920 for (row = split_row; row < ii; row++) | |
1921 { | |
1922 rows_new[row + 1] = ps->rows[row]; | |
1923 ps->rows[row] = NULL; | |
1924 for (crr_insn = rows_new[row + 1]; | |
1925 crr_insn; crr_insn = crr_insn->next_in_row) | |
1926 { | |
1927 ddg_node_ptr u = crr_insn->node; | |
1928 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1; | |
1929 | |
1930 SCHED_TIME (u) = new_time; | |
1931 crr_insn->cycle = new_time; | |
1932 SCHED_ROW (u) = new_time % new_ii; | |
1933 SCHED_STAGE (u) = new_time / new_ii; | |
1934 } | |
1935 } | |
1936 | |
1937 /* Updating ps. */ | |
1938 ps->min_cycle = ps->min_cycle + ps->min_cycle / ii | |
1939 + (SMODULO (ps->min_cycle, ii) >= split_row ? 1 : 0); | |
1940 ps->max_cycle = ps->max_cycle + ps->max_cycle / ii | |
1941 + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0); | |
1942 free (ps->rows); | |
1943 ps->rows = rows_new; | |
1944 ps->ii = new_ii; | |
1945 gcc_assert (ps->min_cycle >= 0); | |
1946 | |
1947 verify_partial_schedule (ps, sched_nodes); | |
1948 | |
1949 if (dump_file) | |
1950 fprintf (dump_file, "min_cycle=%d, max_cycle=%d\n", ps->min_cycle, | |
1951 ps->max_cycle); | |
1952 } | |
1953 | |
1954 /* Given U_NODE which is the node that failed to be scheduled; LOW and | |
1955 UP which are the boundaries of it's scheduling window; compute using | |
1956 SCHED_NODES and II a row in the partial schedule that can be split | |
1957 which will separate a critical predecessor from a critical successor | |
1958 thereby expanding the window, and return it. */ | |
1959 static int | |
1960 compute_split_row (sbitmap sched_nodes, int low, int up, int ii, | |
1961 ddg_node_ptr u_node) | |
1962 { | |
1963 ddg_edge_ptr e; | |
1964 int lower = INT_MIN, upper = INT_MAX; | |
1965 ddg_node_ptr crit_pred = NULL; | |
1966 ddg_node_ptr crit_succ = NULL; | |
1967 int crit_cycle; | |
1968 | |
1969 for (e = u_node->in; e != 0; e = e->next_in) | |
1970 { | |
1971 ddg_node_ptr v_node = e->src; | |
1972 | |
1973 if (TEST_BIT (sched_nodes, v_node->cuid) | |
1974 && (low == SCHED_TIME (v_node) + e->latency - (e->distance * ii))) | |
1975 if (SCHED_TIME (v_node) > lower) | |
1976 { | |
1977 crit_pred = v_node; | |
1978 lower = SCHED_TIME (v_node); | |
1979 } | |
1980 } | |
1981 | |
1982 if (crit_pred != NULL) | |
1983 { | |
1984 crit_cycle = SCHED_TIME (crit_pred) + 1; | |
1985 return SMODULO (crit_cycle, ii); | |
1986 } | |
1987 | |
1988 for (e = u_node->out; e != 0; e = e->next_out) | |
1989 { | |
1990 ddg_node_ptr v_node = e->dest; | |
1991 if (TEST_BIT (sched_nodes, v_node->cuid) | |
1992 && (up == SCHED_TIME (v_node) - e->latency + (e->distance * ii))) | |
1993 if (SCHED_TIME (v_node) < upper) | |
1994 { | |
1995 crit_succ = v_node; | |
1996 upper = SCHED_TIME (v_node); | |
1997 } | |
1998 } | |
1999 | |
2000 if (crit_succ != NULL) | |
2001 { | |
2002 crit_cycle = SCHED_TIME (crit_succ); | |
2003 return SMODULO (crit_cycle, ii); | |
2004 } | |
2005 | |
2006 if (dump_file) | |
2007 fprintf (dump_file, "Both crit_pred and crit_succ are NULL\n"); | |
2008 | |
2009 return SMODULO ((low + up + 1) / 2, ii); | |
2010 } | |
2011 | |
2012 static void | |
2013 verify_partial_schedule (partial_schedule_ptr ps, sbitmap sched_nodes) | |
2014 { | |
2015 int row; | |
2016 ps_insn_ptr crr_insn; | |
2017 | |
2018 for (row = 0; row < ps->ii; row++) | |
2019 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row) | |
2020 { | |
2021 ddg_node_ptr u = crr_insn->node; | |
2022 | |
2023 gcc_assert (TEST_BIT (sched_nodes, u->cuid)); | |
2024 /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by | |
2025 popcount (sched_nodes) == number of insns in ps. */ | |
2026 gcc_assert (SCHED_TIME (u) >= ps->min_cycle); | |
2027 gcc_assert (SCHED_TIME (u) <= ps->max_cycle); | |
2028 } | |
2029 } | |
2030 | |
2031 | |
2032 /* This page implements the algorithm for ordering the nodes of a DDG | |
2033 for modulo scheduling, activated through the | |
2034 "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */ | |
2035 | |
2036 #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info) | |
2037 #define ASAP(x) (ORDER_PARAMS ((x))->asap) | |
2038 #define ALAP(x) (ORDER_PARAMS ((x))->alap) | |
2039 #define HEIGHT(x) (ORDER_PARAMS ((x))->height) | |
2040 #define MOB(x) (ALAP ((x)) - ASAP ((x))) | |
2041 #define DEPTH(x) (ASAP ((x))) | |
2042 | |
2043 typedef struct node_order_params * nopa; | |
2044 | |
2045 static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result); | |
2046 static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int); | |
2047 static nopa calculate_order_params (ddg_ptr, int, int *); | |
2048 static int find_max_asap (ddg_ptr, sbitmap); | |
2049 static int find_max_hv_min_mob (ddg_ptr, sbitmap); | |
2050 static int find_max_dv_min_mob (ddg_ptr, sbitmap); | |
2051 | |
2052 enum sms_direction {BOTTOMUP, TOPDOWN}; | |
2053 | |
2054 struct node_order_params | |
2055 { | |
2056 int asap; | |
2057 int alap; | |
2058 int height; | |
2059 }; | |
2060 | |
2061 /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */ | |
2062 static void | |
2063 check_nodes_order (int *node_order, int num_nodes) | |
2064 { | |
2065 int i; | |
2066 sbitmap tmp = sbitmap_alloc (num_nodes); | |
2067 | |
2068 sbitmap_zero (tmp); | |
2069 | |
2070 if (dump_file) | |
2071 fprintf (dump_file, "SMS final nodes order: \n"); | |
2072 | |
2073 for (i = 0; i < num_nodes; i++) | |
2074 { | |
2075 int u = node_order[i]; | |
2076 | |
2077 if (dump_file) | |
2078 fprintf (dump_file, "%d ", u); | |
2079 gcc_assert (u < num_nodes && u >= 0 && !TEST_BIT (tmp, u)); | |
2080 | |
2081 SET_BIT (tmp, u); | |
2082 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2083 |
0 | 2084 if (dump_file) |
2085 fprintf (dump_file, "\n"); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2086 |
0 | 2087 sbitmap_free (tmp); |
2088 } | |
2089 | |
2090 /* Order the nodes of G for scheduling and pass the result in | |
2091 NODE_ORDER. Also set aux.count of each node to ASAP. | |
2092 Put maximal ASAP to PMAX_ASAP. Return the recMII for the given DDG. */ | |
2093 static int | |
2094 sms_order_nodes (ddg_ptr g, int mii, int * node_order, int *pmax_asap) | |
2095 { | |
2096 int i; | |
2097 int rec_mii = 0; | |
2098 ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g); | |
2099 | |
2100 nopa nops = calculate_order_params (g, mii, pmax_asap); | |
2101 | |
2102 if (dump_file) | |
2103 print_sccs (dump_file, sccs, g); | |
2104 | |
2105 order_nodes_of_sccs (sccs, node_order); | |
2106 | |
2107 if (sccs->num_sccs > 0) | |
2108 /* First SCC has the largest recurrence_length. */ | |
2109 rec_mii = sccs->sccs[0]->recurrence_length; | |
2110 | |
2111 /* Save ASAP before destroying node_order_params. */ | |
2112 for (i = 0; i < g->num_nodes; i++) | |
2113 { | |
2114 ddg_node_ptr v = &g->nodes[i]; | |
2115 v->aux.count = ASAP (v); | |
2116 } | |
2117 | |
2118 free (nops); | |
2119 free_ddg_all_sccs (sccs); | |
2120 check_nodes_order (node_order, g->num_nodes); | |
2121 | |
2122 return rec_mii; | |
2123 } | |
2124 | |
2125 static void | |
2126 order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order) | |
2127 { | |
2128 int i, pos = 0; | |
2129 ddg_ptr g = all_sccs->ddg; | |
2130 int num_nodes = g->num_nodes; | |
2131 sbitmap prev_sccs = sbitmap_alloc (num_nodes); | |
2132 sbitmap on_path = sbitmap_alloc (num_nodes); | |
2133 sbitmap tmp = sbitmap_alloc (num_nodes); | |
2134 sbitmap ones = sbitmap_alloc (num_nodes); | |
2135 | |
2136 sbitmap_zero (prev_sccs); | |
2137 sbitmap_ones (ones); | |
2138 | |
2139 /* Perform the node ordering starting from the SCC with the highest recMII. | |
2140 For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */ | |
2141 for (i = 0; i < all_sccs->num_sccs; i++) | |
2142 { | |
2143 ddg_scc_ptr scc = all_sccs->sccs[i]; | |
2144 | |
2145 /* Add nodes on paths from previous SCCs to the current SCC. */ | |
2146 find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes); | |
2147 sbitmap_a_or_b (tmp, scc->nodes, on_path); | |
2148 | |
2149 /* Add nodes on paths from the current SCC to previous SCCs. */ | |
2150 find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs); | |
2151 sbitmap_a_or_b (tmp, tmp, on_path); | |
2152 | |
2153 /* Remove nodes of previous SCCs from current extended SCC. */ | |
2154 sbitmap_difference (tmp, tmp, prev_sccs); | |
2155 | |
2156 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos); | |
2157 /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */ | |
2158 } | |
2159 | |
2160 /* Handle the remaining nodes that do not belong to any scc. Each call | |
2161 to order_nodes_in_scc handles a single connected component. */ | |
2162 while (pos < g->num_nodes) | |
2163 { | |
2164 sbitmap_difference (tmp, ones, prev_sccs); | |
2165 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos); | |
2166 } | |
2167 sbitmap_free (prev_sccs); | |
2168 sbitmap_free (on_path); | |
2169 sbitmap_free (tmp); | |
2170 sbitmap_free (ones); | |
2171 } | |
2172 | |
2173 /* MII is needed if we consider backarcs (that do not close recursive cycles). */ | |
2174 static struct node_order_params * | |
2175 calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED, int *pmax_asap) | |
2176 { | |
2177 int u; | |
2178 int max_asap; | |
2179 int num_nodes = g->num_nodes; | |
2180 ddg_edge_ptr e; | |
2181 /* Allocate a place to hold ordering params for each node in the DDG. */ | |
2182 nopa node_order_params_arr; | |
2183 | |
2184 /* Initialize of ASAP/ALAP/HEIGHT to zero. */ | |
2185 node_order_params_arr = (nopa) xcalloc (num_nodes, | |
2186 sizeof (struct node_order_params)); | |
2187 | |
2188 /* Set the aux pointer of each node to point to its order_params structure. */ | |
2189 for (u = 0; u < num_nodes; u++) | |
2190 g->nodes[u].aux.info = &node_order_params_arr[u]; | |
2191 | |
2192 /* Disregarding a backarc from each recursive cycle to obtain a DAG, | |
2193 calculate ASAP, ALAP, mobility, distance, and height for each node | |
2194 in the dependence (direct acyclic) graph. */ | |
2195 | |
2196 /* We assume that the nodes in the array are in topological order. */ | |
2197 | |
2198 max_asap = 0; | |
2199 for (u = 0; u < num_nodes; u++) | |
2200 { | |
2201 ddg_node_ptr u_node = &g->nodes[u]; | |
2202 | |
2203 ASAP (u_node) = 0; | |
2204 for (e = u_node->in; e; e = e->next_in) | |
2205 if (e->distance == 0) | |
2206 ASAP (u_node) = MAX (ASAP (u_node), | |
2207 ASAP (e->src) + e->latency); | |
2208 max_asap = MAX (max_asap, ASAP (u_node)); | |
2209 } | |
2210 | |
2211 for (u = num_nodes - 1; u > -1; u--) | |
2212 { | |
2213 ddg_node_ptr u_node = &g->nodes[u]; | |
2214 | |
2215 ALAP (u_node) = max_asap; | |
2216 HEIGHT (u_node) = 0; | |
2217 for (e = u_node->out; e; e = e->next_out) | |
2218 if (e->distance == 0) | |
2219 { | |
2220 ALAP (u_node) = MIN (ALAP (u_node), | |
2221 ALAP (e->dest) - e->latency); | |
2222 HEIGHT (u_node) = MAX (HEIGHT (u_node), | |
2223 HEIGHT (e->dest) + e->latency); | |
2224 } | |
2225 } | |
2226 if (dump_file) | |
2227 { | |
2228 fprintf (dump_file, "\nOrder params\n"); | |
2229 for (u = 0; u < num_nodes; u++) | |
2230 { | |
2231 ddg_node_ptr u_node = &g->nodes[u]; | |
2232 | |
2233 fprintf (dump_file, "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n", u, | |
2234 ASAP (u_node), ALAP (u_node), HEIGHT (u_node)); | |
2235 } | |
2236 } | |
2237 | |
2238 *pmax_asap = max_asap; | |
2239 return node_order_params_arr; | |
2240 } | |
2241 | |
2242 static int | |
2243 find_max_asap (ddg_ptr g, sbitmap nodes) | |
2244 { | |
2245 unsigned int u = 0; | |
2246 int max_asap = -1; | |
2247 int result = -1; | |
2248 sbitmap_iterator sbi; | |
2249 | |
2250 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi) | |
2251 { | |
2252 ddg_node_ptr u_node = &g->nodes[u]; | |
2253 | |
2254 if (max_asap < ASAP (u_node)) | |
2255 { | |
2256 max_asap = ASAP (u_node); | |
2257 result = u; | |
2258 } | |
2259 } | |
2260 return result; | |
2261 } | |
2262 | |
2263 static int | |
2264 find_max_hv_min_mob (ddg_ptr g, sbitmap nodes) | |
2265 { | |
2266 unsigned int u = 0; | |
2267 int max_hv = -1; | |
2268 int min_mob = INT_MAX; | |
2269 int result = -1; | |
2270 sbitmap_iterator sbi; | |
2271 | |
2272 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi) | |
2273 { | |
2274 ddg_node_ptr u_node = &g->nodes[u]; | |
2275 | |
2276 if (max_hv < HEIGHT (u_node)) | |
2277 { | |
2278 max_hv = HEIGHT (u_node); | |
2279 min_mob = MOB (u_node); | |
2280 result = u; | |
2281 } | |
2282 else if ((max_hv == HEIGHT (u_node)) | |
2283 && (min_mob > MOB (u_node))) | |
2284 { | |
2285 min_mob = MOB (u_node); | |
2286 result = u; | |
2287 } | |
2288 } | |
2289 return result; | |
2290 } | |
2291 | |
2292 static int | |
2293 find_max_dv_min_mob (ddg_ptr g, sbitmap nodes) | |
2294 { | |
2295 unsigned int u = 0; | |
2296 int max_dv = -1; | |
2297 int min_mob = INT_MAX; | |
2298 int result = -1; | |
2299 sbitmap_iterator sbi; | |
2300 | |
2301 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi) | |
2302 { | |
2303 ddg_node_ptr u_node = &g->nodes[u]; | |
2304 | |
2305 if (max_dv < DEPTH (u_node)) | |
2306 { | |
2307 max_dv = DEPTH (u_node); | |
2308 min_mob = MOB (u_node); | |
2309 result = u; | |
2310 } | |
2311 else if ((max_dv == DEPTH (u_node)) | |
2312 && (min_mob > MOB (u_node))) | |
2313 { | |
2314 min_mob = MOB (u_node); | |
2315 result = u; | |
2316 } | |
2317 } | |
2318 return result; | |
2319 } | |
2320 | |
2321 /* Places the nodes of SCC into the NODE_ORDER array starting | |
2322 at position POS, according to the SMS ordering algorithm. | |
2323 NODES_ORDERED (in&out parameter) holds the bitset of all nodes in | |
2324 the NODE_ORDER array, starting from position zero. */ | |
2325 static int | |
2326 order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc, | |
2327 int * node_order, int pos) | |
2328 { | |
2329 enum sms_direction dir; | |
2330 int num_nodes = g->num_nodes; | |
2331 sbitmap workset = sbitmap_alloc (num_nodes); | |
2332 sbitmap tmp = sbitmap_alloc (num_nodes); | |
2333 sbitmap zero_bitmap = sbitmap_alloc (num_nodes); | |
2334 sbitmap predecessors = sbitmap_alloc (num_nodes); | |
2335 sbitmap successors = sbitmap_alloc (num_nodes); | |
2336 | |
2337 sbitmap_zero (predecessors); | |
2338 find_predecessors (predecessors, g, nodes_ordered); | |
2339 | |
2340 sbitmap_zero (successors); | |
2341 find_successors (successors, g, nodes_ordered); | |
2342 | |
2343 sbitmap_zero (tmp); | |
2344 if (sbitmap_a_and_b_cg (tmp, predecessors, scc)) | |
2345 { | |
2346 sbitmap_copy (workset, tmp); | |
2347 dir = BOTTOMUP; | |
2348 } | |
2349 else if (sbitmap_a_and_b_cg (tmp, successors, scc)) | |
2350 { | |
2351 sbitmap_copy (workset, tmp); | |
2352 dir = TOPDOWN; | |
2353 } | |
2354 else | |
2355 { | |
2356 int u; | |
2357 | |
2358 sbitmap_zero (workset); | |
2359 if ((u = find_max_asap (g, scc)) >= 0) | |
2360 SET_BIT (workset, u); | |
2361 dir = BOTTOMUP; | |
2362 } | |
2363 | |
2364 sbitmap_zero (zero_bitmap); | |
2365 while (!sbitmap_equal (workset, zero_bitmap)) | |
2366 { | |
2367 int v; | |
2368 ddg_node_ptr v_node; | |
2369 sbitmap v_node_preds; | |
2370 sbitmap v_node_succs; | |
2371 | |
2372 if (dir == TOPDOWN) | |
2373 { | |
2374 while (!sbitmap_equal (workset, zero_bitmap)) | |
2375 { | |
2376 v = find_max_hv_min_mob (g, workset); | |
2377 v_node = &g->nodes[v]; | |
2378 node_order[pos++] = v; | |
2379 v_node_succs = NODE_SUCCESSORS (v_node); | |
2380 sbitmap_a_and_b (tmp, v_node_succs, scc); | |
2381 | |
2382 /* Don't consider the already ordered successors again. */ | |
2383 sbitmap_difference (tmp, tmp, nodes_ordered); | |
2384 sbitmap_a_or_b (workset, workset, tmp); | |
2385 RESET_BIT (workset, v); | |
2386 SET_BIT (nodes_ordered, v); | |
2387 } | |
2388 dir = BOTTOMUP; | |
2389 sbitmap_zero (predecessors); | |
2390 find_predecessors (predecessors, g, nodes_ordered); | |
2391 sbitmap_a_and_b (workset, predecessors, scc); | |
2392 } | |
2393 else | |
2394 { | |
2395 while (!sbitmap_equal (workset, zero_bitmap)) | |
2396 { | |
2397 v = find_max_dv_min_mob (g, workset); | |
2398 v_node = &g->nodes[v]; | |
2399 node_order[pos++] = v; | |
2400 v_node_preds = NODE_PREDECESSORS (v_node); | |
2401 sbitmap_a_and_b (tmp, v_node_preds, scc); | |
2402 | |
2403 /* Don't consider the already ordered predecessors again. */ | |
2404 sbitmap_difference (tmp, tmp, nodes_ordered); | |
2405 sbitmap_a_or_b (workset, workset, tmp); | |
2406 RESET_BIT (workset, v); | |
2407 SET_BIT (nodes_ordered, v); | |
2408 } | |
2409 dir = TOPDOWN; | |
2410 sbitmap_zero (successors); | |
2411 find_successors (successors, g, nodes_ordered); | |
2412 sbitmap_a_and_b (workset, successors, scc); | |
2413 } | |
2414 } | |
2415 sbitmap_free (tmp); | |
2416 sbitmap_free (workset); | |
2417 sbitmap_free (zero_bitmap); | |
2418 sbitmap_free (predecessors); | |
2419 sbitmap_free (successors); | |
2420 return pos; | |
2421 } | |
2422 | |
2423 | |
2424 /* This page contains functions for manipulating partial-schedules during | |
2425 modulo scheduling. */ | |
2426 | |
2427 /* Create a partial schedule and allocate a memory to hold II rows. */ | |
2428 | |
2429 static partial_schedule_ptr | |
2430 create_partial_schedule (int ii, ddg_ptr g, int history) | |
2431 { | |
2432 partial_schedule_ptr ps = XNEW (struct partial_schedule); | |
2433 ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr)); | |
2434 ps->ii = ii; | |
2435 ps->history = history; | |
2436 ps->min_cycle = INT_MAX; | |
2437 ps->max_cycle = INT_MIN; | |
2438 ps->g = g; | |
2439 | |
2440 return ps; | |
2441 } | |
2442 | |
2443 /* Free the PS_INSNs in rows array of the given partial schedule. | |
2444 ??? Consider caching the PS_INSN's. */ | |
2445 static void | |
2446 free_ps_insns (partial_schedule_ptr ps) | |
2447 { | |
2448 int i; | |
2449 | |
2450 for (i = 0; i < ps->ii; i++) | |
2451 { | |
2452 while (ps->rows[i]) | |
2453 { | |
2454 ps_insn_ptr ps_insn = ps->rows[i]->next_in_row; | |
2455 | |
2456 free (ps->rows[i]); | |
2457 ps->rows[i] = ps_insn; | |
2458 } | |
2459 ps->rows[i] = NULL; | |
2460 } | |
2461 } | |
2462 | |
2463 /* Free all the memory allocated to the partial schedule. */ | |
2464 | |
2465 static void | |
2466 free_partial_schedule (partial_schedule_ptr ps) | |
2467 { | |
2468 if (!ps) | |
2469 return; | |
2470 free_ps_insns (ps); | |
2471 free (ps->rows); | |
2472 free (ps); | |
2473 } | |
2474 | |
2475 /* Clear the rows array with its PS_INSNs, and create a new one with | |
2476 NEW_II rows. */ | |
2477 | |
2478 static void | |
2479 reset_partial_schedule (partial_schedule_ptr ps, int new_ii) | |
2480 { | |
2481 if (!ps) | |
2482 return; | |
2483 free_ps_insns (ps); | |
2484 if (new_ii == ps->ii) | |
2485 return; | |
2486 ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii | |
2487 * sizeof (ps_insn_ptr)); | |
2488 memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr)); | |
2489 ps->ii = new_ii; | |
2490 ps->min_cycle = INT_MAX; | |
2491 ps->max_cycle = INT_MIN; | |
2492 } | |
2493 | |
2494 /* Prints the partial schedule as an ii rows array, for each rows | |
2495 print the ids of the insns in it. */ | |
2496 void | |
2497 print_partial_schedule (partial_schedule_ptr ps, FILE *dump) | |
2498 { | |
2499 int i; | |
2500 | |
2501 for (i = 0; i < ps->ii; i++) | |
2502 { | |
2503 ps_insn_ptr ps_i = ps->rows[i]; | |
2504 | |
2505 fprintf (dump, "\n[ROW %d ]: ", i); | |
2506 while (ps_i) | |
2507 { | |
2508 fprintf (dump, "%d, ", | |
2509 INSN_UID (ps_i->node->insn)); | |
2510 ps_i = ps_i->next_in_row; | |
2511 } | |
2512 } | |
2513 } | |
2514 | |
2515 /* Creates an object of PS_INSN and initializes it to the given parameters. */ | |
2516 static ps_insn_ptr | |
2517 create_ps_insn (ddg_node_ptr node, int rest_count, int cycle) | |
2518 { | |
2519 ps_insn_ptr ps_i = XNEW (struct ps_insn); | |
2520 | |
2521 ps_i->node = node; | |
2522 ps_i->next_in_row = NULL; | |
2523 ps_i->prev_in_row = NULL; | |
2524 ps_i->row_rest_count = rest_count; | |
2525 ps_i->cycle = cycle; | |
2526 | |
2527 return ps_i; | |
2528 } | |
2529 | |
2530 | |
2531 /* Removes the given PS_INSN from the partial schedule. Returns false if the | |
2532 node is not found in the partial schedule, else returns true. */ | |
2533 static bool | |
2534 remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i) | |
2535 { | |
2536 int row; | |
2537 | |
2538 if (!ps || !ps_i) | |
2539 return false; | |
2540 | |
2541 row = SMODULO (ps_i->cycle, ps->ii); | |
2542 if (! ps_i->prev_in_row) | |
2543 { | |
2544 if (ps_i != ps->rows[row]) | |
2545 return false; | |
2546 | |
2547 ps->rows[row] = ps_i->next_in_row; | |
2548 if (ps->rows[row]) | |
2549 ps->rows[row]->prev_in_row = NULL; | |
2550 } | |
2551 else | |
2552 { | |
2553 ps_i->prev_in_row->next_in_row = ps_i->next_in_row; | |
2554 if (ps_i->next_in_row) | |
2555 ps_i->next_in_row->prev_in_row = ps_i->prev_in_row; | |
2556 } | |
2557 free (ps_i); | |
2558 return true; | |
2559 } | |
2560 | |
2561 /* Unlike what literature describes for modulo scheduling (which focuses | |
2562 on VLIW machines) the order of the instructions inside a cycle is | |
2563 important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know | |
2564 where the current instruction should go relative to the already | |
2565 scheduled instructions in the given cycle. Go over these | |
2566 instructions and find the first possible column to put it in. */ | |
2567 static bool | |
2568 ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i, | |
2569 sbitmap must_precede, sbitmap must_follow) | |
2570 { | |
2571 ps_insn_ptr next_ps_i; | |
2572 ps_insn_ptr first_must_follow = NULL; | |
2573 ps_insn_ptr last_must_precede = NULL; | |
2574 int row; | |
2575 | |
2576 if (! ps_i) | |
2577 return false; | |
2578 | |
2579 row = SMODULO (ps_i->cycle, ps->ii); | |
2580 | |
2581 /* Find the first must follow and the last must precede | |
2582 and insert the node immediately after the must precede | |
2583 but make sure that it there is no must follow after it. */ | |
2584 for (next_ps_i = ps->rows[row]; | |
2585 next_ps_i; | |
2586 next_ps_i = next_ps_i->next_in_row) | |
2587 { | |
2588 if (must_follow && TEST_BIT (must_follow, next_ps_i->node->cuid) | |
2589 && ! first_must_follow) | |
2590 first_must_follow = next_ps_i; | |
2591 if (must_precede && TEST_BIT (must_precede, next_ps_i->node->cuid)) | |
2592 { | |
2593 /* If we have already met a node that must follow, then | |
2594 there is no possible column. */ | |
2595 if (first_must_follow) | |
2596 return false; | |
2597 else | |
2598 last_must_precede = next_ps_i; | |
2599 } | |
2600 } | |
2601 | |
2602 /* Now insert the node after INSERT_AFTER_PSI. */ | |
2603 | |
2604 if (! last_must_precede) | |
2605 { | |
2606 ps_i->next_in_row = ps->rows[row]; | |
2607 ps_i->prev_in_row = NULL; | |
2608 if (ps_i->next_in_row) | |
2609 ps_i->next_in_row->prev_in_row = ps_i; | |
2610 ps->rows[row] = ps_i; | |
2611 } | |
2612 else | |
2613 { | |
2614 ps_i->next_in_row = last_must_precede->next_in_row; | |
2615 last_must_precede->next_in_row = ps_i; | |
2616 ps_i->prev_in_row = last_must_precede; | |
2617 if (ps_i->next_in_row) | |
2618 ps_i->next_in_row->prev_in_row = ps_i; | |
2619 } | |
2620 | |
2621 return true; | |
2622 } | |
2623 | |
2624 /* Advances the PS_INSN one column in its current row; returns false | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2625 in failure and true in success. Bit N is set in MUST_FOLLOW if |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2626 the node with cuid N must be come after the node pointed to by |
0 | 2627 PS_I when scheduled in the same cycle. */ |
2628 static int | |
2629 ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i, | |
2630 sbitmap must_follow) | |
2631 { | |
2632 ps_insn_ptr prev, next; | |
2633 int row; | |
2634 ddg_node_ptr next_node; | |
2635 | |
2636 if (!ps || !ps_i) | |
2637 return false; | |
2638 | |
2639 row = SMODULO (ps_i->cycle, ps->ii); | |
2640 | |
2641 if (! ps_i->next_in_row) | |
2642 return false; | |
2643 | |
2644 next_node = ps_i->next_in_row->node; | |
2645 | |
2646 /* Check if next_in_row is dependent on ps_i, both having same sched | |
2647 times (typically ANTI_DEP). If so, ps_i cannot skip over it. */ | |
2648 if (must_follow && TEST_BIT (must_follow, next_node->cuid)) | |
2649 return false; | |
2650 | |
2651 /* Advance PS_I over its next_in_row in the doubly linked list. */ | |
2652 prev = ps_i->prev_in_row; | |
2653 next = ps_i->next_in_row; | |
2654 | |
2655 if (ps_i == ps->rows[row]) | |
2656 ps->rows[row] = next; | |
2657 | |
2658 ps_i->next_in_row = next->next_in_row; | |
2659 | |
2660 if (next->next_in_row) | |
2661 next->next_in_row->prev_in_row = ps_i; | |
2662 | |
2663 next->next_in_row = ps_i; | |
2664 ps_i->prev_in_row = next; | |
2665 | |
2666 next->prev_in_row = prev; | |
2667 if (prev) | |
2668 prev->next_in_row = next; | |
2669 | |
2670 return true; | |
2671 } | |
2672 | |
2673 /* Inserts a DDG_NODE to the given partial schedule at the given cycle. | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2674 Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2675 set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2676 before/after (respectively) the node pointed to by PS_I when scheduled |
0 | 2677 in the same cycle. */ |
2678 static ps_insn_ptr | |
2679 add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle, | |
2680 sbitmap must_precede, sbitmap must_follow) | |
2681 { | |
2682 ps_insn_ptr ps_i; | |
2683 int rest_count = 1; | |
2684 int row = SMODULO (cycle, ps->ii); | |
2685 | |
2686 if (ps->rows[row] | |
2687 && ps->rows[row]->row_rest_count >= issue_rate) | |
2688 return NULL; | |
2689 | |
2690 if (ps->rows[row]) | |
2691 rest_count += ps->rows[row]->row_rest_count; | |
2692 | |
2693 ps_i = create_ps_insn (node, rest_count, cycle); | |
2694 | |
2695 /* Finds and inserts PS_I according to MUST_FOLLOW and | |
2696 MUST_PRECEDE. */ | |
2697 if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow)) | |
2698 { | |
2699 free (ps_i); | |
2700 return NULL; | |
2701 } | |
2702 | |
2703 return ps_i; | |
2704 } | |
2705 | |
2706 /* Advance time one cycle. Assumes DFA is being used. */ | |
2707 static void | |
2708 advance_one_cycle (void) | |
2709 { | |
2710 if (targetm.sched.dfa_pre_cycle_insn) | |
2711 state_transition (curr_state, | |
2712 targetm.sched.dfa_pre_cycle_insn ()); | |
2713 | |
2714 state_transition (curr_state, NULL); | |
2715 | |
2716 if (targetm.sched.dfa_post_cycle_insn) | |
2717 state_transition (curr_state, | |
2718 targetm.sched.dfa_post_cycle_insn ()); | |
2719 } | |
2720 | |
2721 | |
2722 | |
2723 /* Checks if PS has resource conflicts according to DFA, starting from | |
2724 FROM cycle to TO cycle; returns true if there are conflicts and false | |
2725 if there are no conflicts. Assumes DFA is being used. */ | |
2726 static int | |
2727 ps_has_conflicts (partial_schedule_ptr ps, int from, int to) | |
2728 { | |
2729 int cycle; | |
2730 | |
2731 state_reset (curr_state); | |
2732 | |
2733 for (cycle = from; cycle <= to; cycle++) | |
2734 { | |
2735 ps_insn_ptr crr_insn; | |
2736 /* Holds the remaining issue slots in the current row. */ | |
2737 int can_issue_more = issue_rate; | |
2738 | |
2739 /* Walk through the DFA for the current row. */ | |
2740 for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)]; | |
2741 crr_insn; | |
2742 crr_insn = crr_insn->next_in_row) | |
2743 { | |
2744 rtx insn = crr_insn->node->insn; | |
2745 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2746 if (!NONDEBUG_INSN_P (insn)) |
0 | 2747 continue; |
2748 | |
2749 /* Check if there is room for the current insn. */ | |
2750 if (!can_issue_more || state_dead_lock_p (curr_state)) | |
2751 return true; | |
2752 | |
2753 /* Update the DFA state and return with failure if the DFA found | |
2754 resource conflicts. */ | |
2755 if (state_transition (curr_state, insn) >= 0) | |
2756 return true; | |
2757 | |
2758 if (targetm.sched.variable_issue) | |
2759 can_issue_more = | |
2760 targetm.sched.variable_issue (sched_dump, sched_verbose, | |
2761 insn, can_issue_more); | |
2762 /* A naked CLOBBER or USE generates no instruction, so don't | |
2763 let them consume issue slots. */ | |
2764 else if (GET_CODE (PATTERN (insn)) != USE | |
2765 && GET_CODE (PATTERN (insn)) != CLOBBER) | |
2766 can_issue_more--; | |
2767 } | |
2768 | |
2769 /* Advance the DFA to the next cycle. */ | |
2770 advance_one_cycle (); | |
2771 } | |
2772 return false; | |
2773 } | |
2774 | |
2775 /* Checks if the given node causes resource conflicts when added to PS at | |
2776 cycle C. If not the node is added to PS and returned; otherwise zero | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2777 is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2778 cuid N must be come before/after (respectively) the node pointed to by |
0 | 2779 PS_I when scheduled in the same cycle. */ |
2780 ps_insn_ptr | |
2781 ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n, | |
2782 int c, sbitmap must_precede, | |
2783 sbitmap must_follow) | |
2784 { | |
2785 int has_conflicts = 0; | |
2786 ps_insn_ptr ps_i; | |
2787 | |
2788 /* First add the node to the PS, if this succeeds check for | |
2789 conflicts, trying different issue slots in the same row. */ | |
2790 if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow))) | |
2791 return NULL; /* Failed to insert the node at the given cycle. */ | |
2792 | |
2793 has_conflicts = ps_has_conflicts (ps, c, c) | |
2794 || (ps->history > 0 | |
2795 && ps_has_conflicts (ps, | |
2796 c - ps->history, | |
2797 c + ps->history)); | |
2798 | |
2799 /* Try different issue slots to find one that the given node can be | |
2800 scheduled in without conflicts. */ | |
2801 while (has_conflicts) | |
2802 { | |
2803 if (! ps_insn_advance_column (ps, ps_i, must_follow)) | |
2804 break; | |
2805 has_conflicts = ps_has_conflicts (ps, c, c) | |
2806 || (ps->history > 0 | |
2807 && ps_has_conflicts (ps, | |
2808 c - ps->history, | |
2809 c + ps->history)); | |
2810 } | |
2811 | |
2812 if (has_conflicts) | |
2813 { | |
2814 remove_node_from_ps (ps, ps_i); | |
2815 return NULL; | |
2816 } | |
2817 | |
2818 ps->min_cycle = MIN (ps->min_cycle, c); | |
2819 ps->max_cycle = MAX (ps->max_cycle, c); | |
2820 return ps_i; | |
2821 } | |
2822 | |
2823 /* Rotate the rows of PS such that insns scheduled at time | |
2824 START_CYCLE will appear in row 0. Updates max/min_cycles. */ | |
2825 void | |
2826 rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle) | |
2827 { | |
2828 int i, row, backward_rotates; | |
2829 int last_row = ps->ii - 1; | |
2830 | |
2831 if (start_cycle == 0) | |
2832 return; | |
2833 | |
2834 backward_rotates = SMODULO (start_cycle, ps->ii); | |
2835 | |
2836 /* Revisit later and optimize this into a single loop. */ | |
2837 for (i = 0; i < backward_rotates; i++) | |
2838 { | |
2839 ps_insn_ptr first_row = ps->rows[0]; | |
2840 | |
2841 for (row = 0; row < last_row; row++) | |
2842 ps->rows[row] = ps->rows[row+1]; | |
2843 | |
2844 ps->rows[last_row] = first_row; | |
2845 } | |
2846 | |
2847 ps->max_cycle -= start_cycle; | |
2848 ps->min_cycle -= start_cycle; | |
2849 } | |
2850 | |
2851 #endif /* INSN_SCHEDULING */ | |
2852 | |
2853 static bool | |
2854 gate_handle_sms (void) | |
2855 { | |
2856 return (optimize > 0 && flag_modulo_sched); | |
2857 } | |
2858 | |
2859 | |
2860 /* Run instruction scheduler. */ | |
2861 /* Perform SMS module scheduling. */ | |
2862 static unsigned int | |
2863 rest_of_handle_sms (void) | |
2864 { | |
2865 #ifdef INSN_SCHEDULING | |
2866 basic_block bb; | |
2867 | |
2868 /* Collect loop information to be used in SMS. */ | |
2869 cfg_layout_initialize (0); | |
2870 sms_schedule (); | |
2871 | |
2872 /* Update the life information, because we add pseudos. */ | |
2873 max_regno = max_reg_num (); | |
2874 | |
2875 /* Finalize layout changes. */ | |
2876 FOR_EACH_BB (bb) | |
2877 if (bb->next_bb != EXIT_BLOCK_PTR) | |
2878 bb->aux = bb->next_bb; | |
2879 free_dominance_info (CDI_DOMINATORS); | |
2880 cfg_layout_finalize (); | |
2881 #endif /* INSN_SCHEDULING */ | |
2882 return 0; | |
2883 } | |
2884 | |
2885 struct rtl_opt_pass pass_sms = | |
2886 { | |
2887 { | |
2888 RTL_PASS, | |
2889 "sms", /* name */ | |
2890 gate_handle_sms, /* gate */ | |
2891 rest_of_handle_sms, /* execute */ | |
2892 NULL, /* sub */ | |
2893 NULL, /* next */ | |
2894 0, /* static_pass_number */ | |
2895 TV_SMS, /* tv_id */ | |
2896 0, /* properties_required */ | |
2897 0, /* properties_provided */ | |
2898 0, /* properties_destroyed */ | |
2899 TODO_dump_func, /* todo_flags_start */ | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2900 TODO_df_finish |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2901 | TODO_verify_flow |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2902 | TODO_verify_rtl_sharing |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2903 | TODO_dump_func |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2904 | TODO_ggc_collect /* todo_flags_finish */ |
0 | 2905 } |
2906 }; | |
2907 |