0
|
1 /* Swing Modulo Scheduling implementation.
|
|
2 Copyright (C) 2004, 2005, 2006, 2007, 2008
|
|
3 Free Software Foundation, Inc.
|
|
4 Contributed by Ayal Zaks and Mustafa Hagog <zaks,mustafa@il.ibm.com>
|
|
5
|
|
6 This file is part of GCC.
|
|
7
|
|
8 GCC is free software; you can redistribute it and/or modify it under
|
|
9 the terms of the GNU General Public License as published by the Free
|
|
10 Software Foundation; either version 3, or (at your option) any later
|
|
11 version.
|
|
12
|
|
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
16 for more details.
|
|
17
|
|
18 You should have received a copy of the GNU General Public License
|
|
19 along with GCC; see the file COPYING3. If not see
|
|
20 <http://www.gnu.org/licenses/>. */
|
|
21
|
|
22
|
|
23 #include "config.h"
|
|
24 #include "system.h"
|
|
25 #include "coretypes.h"
|
|
26 #include "tm.h"
|
|
27 #include "toplev.h"
|
|
28 #include "rtl.h"
|
|
29 #include "tm_p.h"
|
|
30 #include "hard-reg-set.h"
|
|
31 #include "regs.h"
|
|
32 #include "function.h"
|
|
33 #include "flags.h"
|
|
34 #include "insn-config.h"
|
|
35 #include "insn-attr.h"
|
|
36 #include "except.h"
|
|
37 #include "toplev.h"
|
|
38 #include "recog.h"
|
|
39 #include "sched-int.h"
|
|
40 #include "target.h"
|
|
41 #include "cfglayout.h"
|
|
42 #include "cfgloop.h"
|
|
43 #include "cfghooks.h"
|
|
44 #include "expr.h"
|
|
45 #include "params.h"
|
|
46 #include "gcov-io.h"
|
|
47 #include "ddg.h"
|
|
48 #include "timevar.h"
|
|
49 #include "tree-pass.h"
|
|
50 #include "dbgcnt.h"
|
|
51
|
|
52 #ifdef INSN_SCHEDULING
|
|
53
|
|
54 /* This file contains the implementation of the Swing Modulo Scheduler,
|
|
55 described in the following references:
|
|
56 [1] J. Llosa, A. Gonzalez, E. Ayguade, M. Valero., and J. Eckhardt.
|
|
57 Lifetime--sensitive modulo scheduling in a production environment.
|
|
58 IEEE Trans. on Comps., 50(3), March 2001
|
|
59 [2] J. Llosa, A. Gonzalez, E. Ayguade, and M. Valero.
|
|
60 Swing Modulo Scheduling: A Lifetime Sensitive Approach.
|
|
61 PACT '96 , pages 80-87, October 1996 (Boston - Massachusetts - USA).
|
|
62
|
|
63 The basic structure is:
|
|
64 1. Build a data-dependence graph (DDG) for each loop.
|
|
65 2. Use the DDG to order the insns of a loop (not in topological order
|
|
66 necessarily, but rather) trying to place each insn after all its
|
|
67 predecessors _or_ after all its successors.
|
|
68 3. Compute MII: a lower bound on the number of cycles to schedule the loop.
|
|
69 4. Use the ordering to perform list-scheduling of the loop:
|
|
70 1. Set II = MII. We will try to schedule the loop within II cycles.
|
|
71 2. Try to schedule the insns one by one according to the ordering.
|
|
72 For each insn compute an interval of cycles by considering already-
|
|
73 scheduled preds and succs (and associated latencies); try to place
|
|
74 the insn in the cycles of this window checking for potential
|
|
75 resource conflicts (using the DFA interface).
|
|
76 Note: this is different from the cycle-scheduling of schedule_insns;
|
|
77 here the insns are not scheduled monotonically top-down (nor bottom-
|
|
78 up).
|
|
79 3. If failed in scheduling all insns - bump II++ and try again, unless
|
|
80 II reaches an upper bound MaxII, in which case report failure.
|
|
81 5. If we succeeded in scheduling the loop within II cycles, we now
|
|
82 generate prolog and epilog, decrease the counter of the loop, and
|
|
83 perform modulo variable expansion for live ranges that span more than
|
|
84 II cycles (i.e. use register copies to prevent a def from overwriting
|
|
85 itself before reaching the use).
|
|
86
|
|
87 SMS works with countable loops (1) whose control part can be easily
|
|
88 decoupled from the rest of the loop and (2) whose loop count can
|
|
89 be easily adjusted. This is because we peel a constant number of
|
|
90 iterations into a prologue and epilogue for which we want to avoid
|
|
91 emitting the control part, and a kernel which is to iterate that
|
|
92 constant number of iterations less than the original loop. So the
|
|
93 control part should be a set of insns clearly identified and having
|
|
94 its own iv, not otherwise used in the loop (at-least for now), which
|
|
95 initializes a register before the loop to the number of iterations.
|
|
96 Currently SMS relies on the do-loop pattern to recognize such loops,
|
|
97 where (1) the control part comprises of all insns defining and/or
|
|
98 using a certain 'count' register and (2) the loop count can be
|
|
99 adjusted by modifying this register prior to the loop.
|
|
100 TODO: Rely on cfgloop analysis instead. */
|
|
101
|
|
102 /* This page defines partial-schedule structures and functions for
|
|
103 modulo scheduling. */
|
|
104
|
|
105 typedef struct partial_schedule *partial_schedule_ptr;
|
|
106 typedef struct ps_insn *ps_insn_ptr;
|
|
107
|
|
108 /* The minimum (absolute) cycle that a node of ps was scheduled in. */
|
|
109 #define PS_MIN_CYCLE(ps) (((partial_schedule_ptr)(ps))->min_cycle)
|
|
110
|
|
111 /* The maximum (absolute) cycle that a node of ps was scheduled in. */
|
|
112 #define PS_MAX_CYCLE(ps) (((partial_schedule_ptr)(ps))->max_cycle)
|
|
113
|
|
114 /* Perform signed modulo, always returning a non-negative value. */
|
|
115 #define SMODULO(x,y) ((x) % (y) < 0 ? ((x) % (y) + (y)) : (x) % (y))
|
|
116
|
|
117 /* The number of different iterations the nodes in ps span, assuming
|
|
118 the stage boundaries are placed efficiently. */
|
|
119 #define PS_STAGE_COUNT(ps) ((PS_MAX_CYCLE (ps) - PS_MIN_CYCLE (ps) \
|
|
120 + 1 + (ps)->ii - 1) / (ps)->ii)
|
|
121
|
|
122 /* A single instruction in the partial schedule. */
|
|
123 struct ps_insn
|
|
124 {
|
|
125 /* The corresponding DDG_NODE. */
|
|
126 ddg_node_ptr node;
|
|
127
|
|
128 /* The (absolute) cycle in which the PS instruction is scheduled.
|
|
129 Same as SCHED_TIME (node). */
|
|
130 int cycle;
|
|
131
|
|
132 /* The next/prev PS_INSN in the same row. */
|
|
133 ps_insn_ptr next_in_row,
|
|
134 prev_in_row;
|
|
135
|
|
136 /* The number of nodes in the same row that come after this node. */
|
|
137 int row_rest_count;
|
|
138 };
|
|
139
|
|
140 /* Holds the partial schedule as an array of II rows. Each entry of the
|
|
141 array points to a linked list of PS_INSNs, which represents the
|
|
142 instructions that are scheduled for that row. */
|
|
143 struct partial_schedule
|
|
144 {
|
|
145 int ii; /* Number of rows in the partial schedule. */
|
|
146 int history; /* Threshold for conflict checking using DFA. */
|
|
147
|
|
148 /* rows[i] points to linked list of insns scheduled in row i (0<=i<ii). */
|
|
149 ps_insn_ptr *rows;
|
|
150
|
|
151 /* The earliest absolute cycle of an insn in the partial schedule. */
|
|
152 int min_cycle;
|
|
153
|
|
154 /* The latest absolute cycle of an insn in the partial schedule. */
|
|
155 int max_cycle;
|
|
156
|
|
157 ddg_ptr g; /* The DDG of the insns in the partial schedule. */
|
|
158 };
|
|
159
|
|
160 /* We use this to record all the register replacements we do in
|
|
161 the kernel so we can undo SMS if it is not profitable. */
|
|
162 struct undo_replace_buff_elem
|
|
163 {
|
|
164 rtx insn;
|
|
165 rtx orig_reg;
|
|
166 rtx new_reg;
|
|
167 struct undo_replace_buff_elem *next;
|
|
168 };
|
|
169
|
|
170
|
|
171
|
|
172 static partial_schedule_ptr create_partial_schedule (int ii, ddg_ptr, int history);
|
|
173 static void free_partial_schedule (partial_schedule_ptr);
|
|
174 static void reset_partial_schedule (partial_schedule_ptr, int new_ii);
|
|
175 void print_partial_schedule (partial_schedule_ptr, FILE *);
|
|
176 static void verify_partial_schedule (partial_schedule_ptr, sbitmap);
|
|
177 static ps_insn_ptr ps_add_node_check_conflicts (partial_schedule_ptr,
|
|
178 ddg_node_ptr node, int cycle,
|
|
179 sbitmap must_precede,
|
|
180 sbitmap must_follow);
|
|
181 static void rotate_partial_schedule (partial_schedule_ptr, int);
|
|
182 void set_row_column_for_ps (partial_schedule_ptr);
|
|
183 static void ps_insert_empty_row (partial_schedule_ptr, int, sbitmap);
|
|
184 static int compute_split_row (sbitmap, int, int, int, ddg_node_ptr);
|
|
185
|
|
186
|
|
187 /* This page defines constants and structures for the modulo scheduling
|
|
188 driver. */
|
|
189
|
|
190 static int sms_order_nodes (ddg_ptr, int, int *, int *);
|
|
191 static void set_node_sched_params (ddg_ptr);
|
|
192 static partial_schedule_ptr sms_schedule_by_order (ddg_ptr, int, int, int *);
|
|
193 static void permute_partial_schedule (partial_schedule_ptr, rtx);
|
|
194 static void generate_prolog_epilog (partial_schedule_ptr, struct loop *,
|
|
195 rtx, rtx);
|
|
196 static void duplicate_insns_of_cycles (partial_schedule_ptr,
|
|
197 int, int, int, rtx);
|
|
198
|
|
199 #define SCHED_ASAP(x) (((node_sched_params_ptr)(x)->aux.info)->asap)
|
|
200 #define SCHED_TIME(x) (((node_sched_params_ptr)(x)->aux.info)->time)
|
|
201 #define SCHED_FIRST_REG_MOVE(x) \
|
|
202 (((node_sched_params_ptr)(x)->aux.info)->first_reg_move)
|
|
203 #define SCHED_NREG_MOVES(x) \
|
|
204 (((node_sched_params_ptr)(x)->aux.info)->nreg_moves)
|
|
205 #define SCHED_ROW(x) (((node_sched_params_ptr)(x)->aux.info)->row)
|
|
206 #define SCHED_STAGE(x) (((node_sched_params_ptr)(x)->aux.info)->stage)
|
|
207 #define SCHED_COLUMN(x) (((node_sched_params_ptr)(x)->aux.info)->column)
|
|
208
|
|
209 /* The scheduling parameters held for each node. */
|
|
210 typedef struct node_sched_params
|
|
211 {
|
|
212 int asap; /* A lower-bound on the absolute scheduling cycle. */
|
|
213 int time; /* The absolute scheduling cycle (time >= asap). */
|
|
214
|
|
215 /* The following field (first_reg_move) is a pointer to the first
|
|
216 register-move instruction added to handle the modulo-variable-expansion
|
|
217 of the register defined by this node. This register-move copies the
|
|
218 original register defined by the node. */
|
|
219 rtx first_reg_move;
|
|
220
|
|
221 /* The number of register-move instructions added, immediately preceding
|
|
222 first_reg_move. */
|
|
223 int nreg_moves;
|
|
224
|
|
225 int row; /* Holds time % ii. */
|
|
226 int stage; /* Holds time / ii. */
|
|
227
|
|
228 /* The column of a node inside the ps. If nodes u, v are on the same row,
|
|
229 u will precede v if column (u) < column (v). */
|
|
230 int column;
|
|
231 } *node_sched_params_ptr;
|
|
232
|
|
233
|
|
234 /* The following three functions are copied from the current scheduler
|
|
235 code in order to use sched_analyze() for computing the dependencies.
|
|
236 They are used when initializing the sched_info structure. */
|
|
237 static const char *
|
|
238 sms_print_insn (const_rtx insn, int aligned ATTRIBUTE_UNUSED)
|
|
239 {
|
|
240 static char tmp[80];
|
|
241
|
|
242 sprintf (tmp, "i%4d", INSN_UID (insn));
|
|
243 return tmp;
|
|
244 }
|
|
245
|
|
246 static void
|
|
247 compute_jump_reg_dependencies (rtx insn ATTRIBUTE_UNUSED,
|
|
248 regset cond_exec ATTRIBUTE_UNUSED,
|
|
249 regset used ATTRIBUTE_UNUSED,
|
|
250 regset set ATTRIBUTE_UNUSED)
|
|
251 {
|
|
252 }
|
|
253
|
|
254 static struct common_sched_info_def sms_common_sched_info;
|
|
255
|
|
256 static struct sched_deps_info_def sms_sched_deps_info =
|
|
257 {
|
|
258 compute_jump_reg_dependencies,
|
|
259 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
|
|
260 NULL,
|
|
261 0, 0, 0
|
|
262 };
|
|
263
|
|
264 static struct haifa_sched_info sms_sched_info =
|
|
265 {
|
|
266 NULL,
|
|
267 NULL,
|
|
268 NULL,
|
|
269 NULL,
|
|
270 NULL,
|
|
271 sms_print_insn,
|
|
272 NULL,
|
|
273 NULL, NULL,
|
|
274 NULL, NULL,
|
|
275 0, 0,
|
|
276
|
|
277 NULL, NULL, NULL,
|
|
278 0
|
|
279 };
|
|
280
|
|
281 /* Given HEAD and TAIL which are the first and last insns in a loop;
|
|
282 return the register which controls the loop. Return zero if it has
|
|
283 more than one occurrence in the loop besides the control part or the
|
|
284 do-loop pattern is not of the form we expect. */
|
|
285 static rtx
|
|
286 doloop_register_get (rtx head ATTRIBUTE_UNUSED, rtx tail ATTRIBUTE_UNUSED)
|
|
287 {
|
|
288 #ifdef HAVE_doloop_end
|
|
289 rtx reg, condition, insn, first_insn_not_to_check;
|
|
290
|
|
291 if (!JUMP_P (tail))
|
|
292 return NULL_RTX;
|
|
293
|
|
294 /* TODO: Free SMS's dependence on doloop_condition_get. */
|
|
295 condition = doloop_condition_get (tail);
|
|
296 if (! condition)
|
|
297 return NULL_RTX;
|
|
298
|
|
299 if (REG_P (XEXP (condition, 0)))
|
|
300 reg = XEXP (condition, 0);
|
|
301 else if (GET_CODE (XEXP (condition, 0)) == PLUS
|
|
302 && REG_P (XEXP (XEXP (condition, 0), 0)))
|
|
303 reg = XEXP (XEXP (condition, 0), 0);
|
|
304 else
|
|
305 gcc_unreachable ();
|
|
306
|
|
307 /* Check that the COUNT_REG has no other occurrences in the loop
|
|
308 until the decrement. We assume the control part consists of
|
|
309 either a single (parallel) branch-on-count or a (non-parallel)
|
|
310 branch immediately preceded by a single (decrement) insn. */
|
|
311 first_insn_not_to_check = (GET_CODE (PATTERN (tail)) == PARALLEL ? tail
|
|
312 : PREV_INSN (tail));
|
|
313
|
|
314 for (insn = head; insn != first_insn_not_to_check; insn = NEXT_INSN (insn))
|
|
315 if (reg_mentioned_p (reg, insn))
|
|
316 {
|
|
317 if (dump_file)
|
|
318 {
|
|
319 fprintf (dump_file, "SMS count_reg found ");
|
|
320 print_rtl_single (dump_file, reg);
|
|
321 fprintf (dump_file, " outside control in insn:\n");
|
|
322 print_rtl_single (dump_file, insn);
|
|
323 }
|
|
324
|
|
325 return NULL_RTX;
|
|
326 }
|
|
327
|
|
328 return reg;
|
|
329 #else
|
|
330 return NULL_RTX;
|
|
331 #endif
|
|
332 }
|
|
333
|
|
334 /* Check if COUNT_REG is set to a constant in the PRE_HEADER block, so
|
|
335 that the number of iterations is a compile-time constant. If so,
|
|
336 return the rtx that sets COUNT_REG to a constant, and set COUNT to
|
|
337 this constant. Otherwise return 0. */
|
|
338 static rtx
|
|
339 const_iteration_count (rtx count_reg, basic_block pre_header,
|
|
340 HOST_WIDEST_INT * count)
|
|
341 {
|
|
342 rtx insn;
|
|
343 rtx head, tail;
|
|
344
|
|
345 if (! pre_header)
|
|
346 return NULL_RTX;
|
|
347
|
|
348 get_ebb_head_tail (pre_header, pre_header, &head, &tail);
|
|
349
|
|
350 for (insn = tail; insn != PREV_INSN (head); insn = PREV_INSN (insn))
|
|
351 if (INSN_P (insn) && single_set (insn) &&
|
|
352 rtx_equal_p (count_reg, SET_DEST (single_set (insn))))
|
|
353 {
|
|
354 rtx pat = single_set (insn);
|
|
355
|
|
356 if (GET_CODE (SET_SRC (pat)) == CONST_INT)
|
|
357 {
|
|
358 *count = INTVAL (SET_SRC (pat));
|
|
359 return insn;
|
|
360 }
|
|
361
|
|
362 return NULL_RTX;
|
|
363 }
|
|
364
|
|
365 return NULL_RTX;
|
|
366 }
|
|
367
|
|
368 /* A very simple resource-based lower bound on the initiation interval.
|
|
369 ??? Improve the accuracy of this bound by considering the
|
|
370 utilization of various units. */
|
|
371 static int
|
|
372 res_MII (ddg_ptr g)
|
|
373 {
|
|
374 if (targetm.sched.sms_res_mii)
|
|
375 return targetm.sched.sms_res_mii (g);
|
|
376
|
|
377 return (g->num_nodes / issue_rate);
|
|
378 }
|
|
379
|
|
380
|
|
381 /* Points to the array that contains the sched data for each node. */
|
|
382 static node_sched_params_ptr node_sched_params;
|
|
383
|
|
384 /* Allocate sched_params for each node and initialize it. Assumes that
|
|
385 the aux field of each node contain the asap bound (computed earlier),
|
|
386 and copies it into the sched_params field. */
|
|
387 static void
|
|
388 set_node_sched_params (ddg_ptr g)
|
|
389 {
|
|
390 int i;
|
|
391
|
|
392 /* Allocate for each node in the DDG a place to hold the "sched_data". */
|
|
393 /* Initialize ASAP/ALAP/HIGHT to zero. */
|
|
394 node_sched_params = (node_sched_params_ptr)
|
|
395 xcalloc (g->num_nodes,
|
|
396 sizeof (struct node_sched_params));
|
|
397
|
|
398 /* Set the pointer of the general data of the node to point to the
|
|
399 appropriate sched_params structure. */
|
|
400 for (i = 0; i < g->num_nodes; i++)
|
|
401 {
|
|
402 /* Watch out for aliasing problems? */
|
|
403 node_sched_params[i].asap = g->nodes[i].aux.count;
|
|
404 g->nodes[i].aux.info = &node_sched_params[i];
|
|
405 }
|
|
406 }
|
|
407
|
|
408 static void
|
|
409 print_node_sched_params (FILE *file, int num_nodes, ddg_ptr g)
|
|
410 {
|
|
411 int i;
|
|
412
|
|
413 if (! file)
|
|
414 return;
|
|
415 for (i = 0; i < num_nodes; i++)
|
|
416 {
|
|
417 node_sched_params_ptr nsp = &node_sched_params[i];
|
|
418 rtx reg_move = nsp->first_reg_move;
|
|
419 int j;
|
|
420
|
|
421 fprintf (file, "Node = %d; INSN = %d\n", i,
|
|
422 (INSN_UID (g->nodes[i].insn)));
|
|
423 fprintf (file, " asap = %d:\n", nsp->asap);
|
|
424 fprintf (file, " time = %d:\n", nsp->time);
|
|
425 fprintf (file, " nreg_moves = %d:\n", nsp->nreg_moves);
|
|
426 for (j = 0; j < nsp->nreg_moves; j++)
|
|
427 {
|
|
428 fprintf (file, " reg_move = ");
|
|
429 print_rtl_single (file, reg_move);
|
|
430 reg_move = PREV_INSN (reg_move);
|
|
431 }
|
|
432 }
|
|
433 }
|
|
434
|
|
435 /*
|
|
436 Breaking intra-loop register anti-dependences:
|
|
437 Each intra-loop register anti-dependence implies a cross-iteration true
|
|
438 dependence of distance 1. Therefore, we can remove such false dependencies
|
|
439 and figure out if the partial schedule broke them by checking if (for a
|
|
440 true-dependence of distance 1): SCHED_TIME (def) < SCHED_TIME (use) and
|
|
441 if so generate a register move. The number of such moves is equal to:
|
|
442 SCHED_TIME (use) - SCHED_TIME (def) { 0 broken
|
|
443 nreg_moves = ----------------------------------- + 1 - { dependence.
|
|
444 ii { 1 if not.
|
|
445 */
|
|
446 static struct undo_replace_buff_elem *
|
|
447 generate_reg_moves (partial_schedule_ptr ps, bool rescan)
|
|
448 {
|
|
449 ddg_ptr g = ps->g;
|
|
450 int ii = ps->ii;
|
|
451 int i;
|
|
452 struct undo_replace_buff_elem *reg_move_replaces = NULL;
|
|
453
|
|
454 for (i = 0; i < g->num_nodes; i++)
|
|
455 {
|
|
456 ddg_node_ptr u = &g->nodes[i];
|
|
457 ddg_edge_ptr e;
|
|
458 int nreg_moves = 0, i_reg_move;
|
|
459 sbitmap *uses_of_defs;
|
|
460 rtx last_reg_move;
|
|
461 rtx prev_reg, old_reg;
|
|
462
|
|
463 /* Compute the number of reg_moves needed for u, by looking at life
|
|
464 ranges started at u (excluding self-loops). */
|
|
465 for (e = u->out; e; e = e->next_out)
|
|
466 if (e->type == TRUE_DEP && e->dest != e->src)
|
|
467 {
|
|
468 int nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
|
|
469
|
|
470 if (e->distance == 1)
|
|
471 nreg_moves4e = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
|
|
472
|
|
473 /* If dest precedes src in the schedule of the kernel, then dest
|
|
474 will read before src writes and we can save one reg_copy. */
|
|
475 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
|
|
476 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
|
|
477 nreg_moves4e--;
|
|
478
|
|
479 nreg_moves = MAX (nreg_moves, nreg_moves4e);
|
|
480 }
|
|
481
|
|
482 if (nreg_moves == 0)
|
|
483 continue;
|
|
484
|
|
485 /* Every use of the register defined by node may require a different
|
|
486 copy of this register, depending on the time the use is scheduled.
|
|
487 Set a bitmap vector, telling which nodes use each copy of this
|
|
488 register. */
|
|
489 uses_of_defs = sbitmap_vector_alloc (nreg_moves, g->num_nodes);
|
|
490 sbitmap_vector_zero (uses_of_defs, nreg_moves);
|
|
491 for (e = u->out; e; e = e->next_out)
|
|
492 if (e->type == TRUE_DEP && e->dest != e->src)
|
|
493 {
|
|
494 int dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src)) / ii;
|
|
495
|
|
496 if (e->distance == 1)
|
|
497 dest_copy = (SCHED_TIME (e->dest) - SCHED_TIME (e->src) + ii) / ii;
|
|
498
|
|
499 if (SCHED_ROW (e->dest) == SCHED_ROW (e->src)
|
|
500 && SCHED_COLUMN (e->dest) < SCHED_COLUMN (e->src))
|
|
501 dest_copy--;
|
|
502
|
|
503 if (dest_copy)
|
|
504 SET_BIT (uses_of_defs[dest_copy - 1], e->dest->cuid);
|
|
505 }
|
|
506
|
|
507 /* Now generate the reg_moves, attaching relevant uses to them. */
|
|
508 SCHED_NREG_MOVES (u) = nreg_moves;
|
|
509 old_reg = prev_reg = copy_rtx (SET_DEST (single_set (u->insn)));
|
|
510 /* Insert the reg-moves right before the notes which precede
|
|
511 the insn they relates to. */
|
|
512 last_reg_move = u->first_note;
|
|
513
|
|
514 for (i_reg_move = 0; i_reg_move < nreg_moves; i_reg_move++)
|
|
515 {
|
|
516 unsigned int i_use = 0;
|
|
517 rtx new_reg = gen_reg_rtx (GET_MODE (prev_reg));
|
|
518 rtx reg_move = gen_move_insn (new_reg, prev_reg);
|
|
519 sbitmap_iterator sbi;
|
|
520
|
|
521 add_insn_before (reg_move, last_reg_move, NULL);
|
|
522 last_reg_move = reg_move;
|
|
523
|
|
524 if (!SCHED_FIRST_REG_MOVE (u))
|
|
525 SCHED_FIRST_REG_MOVE (u) = reg_move;
|
|
526
|
|
527 EXECUTE_IF_SET_IN_SBITMAP (uses_of_defs[i_reg_move], 0, i_use, sbi)
|
|
528 {
|
|
529 struct undo_replace_buff_elem *rep;
|
|
530
|
|
531 rep = (struct undo_replace_buff_elem *)
|
|
532 xcalloc (1, sizeof (struct undo_replace_buff_elem));
|
|
533 rep->insn = g->nodes[i_use].insn;
|
|
534 rep->orig_reg = old_reg;
|
|
535 rep->new_reg = new_reg;
|
|
536
|
|
537 if (! reg_move_replaces)
|
|
538 reg_move_replaces = rep;
|
|
539 else
|
|
540 {
|
|
541 rep->next = reg_move_replaces;
|
|
542 reg_move_replaces = rep;
|
|
543 }
|
|
544
|
|
545 replace_rtx (g->nodes[i_use].insn, old_reg, new_reg);
|
|
546 if (rescan)
|
|
547 df_insn_rescan (g->nodes[i_use].insn);
|
|
548 }
|
|
549
|
|
550 prev_reg = new_reg;
|
|
551 }
|
|
552 sbitmap_vector_free (uses_of_defs);
|
|
553 }
|
|
554 return reg_move_replaces;
|
|
555 }
|
|
556
|
|
557 /* Free memory allocated for the undo buffer. */
|
|
558 static void
|
|
559 free_undo_replace_buff (struct undo_replace_buff_elem *reg_move_replaces)
|
|
560 {
|
|
561
|
|
562 while (reg_move_replaces)
|
|
563 {
|
|
564 struct undo_replace_buff_elem *rep = reg_move_replaces;
|
|
565
|
|
566 reg_move_replaces = reg_move_replaces->next;
|
|
567 free (rep);
|
|
568 }
|
|
569 }
|
|
570
|
|
571 /* Bump the SCHED_TIMEs of all nodes to start from zero. Set the values
|
|
572 of SCHED_ROW and SCHED_STAGE. */
|
|
573 static void
|
|
574 normalize_sched_times (partial_schedule_ptr ps)
|
|
575 {
|
|
576 int row;
|
|
577 int amount = PS_MIN_CYCLE (ps);
|
|
578 int ii = ps->ii;
|
|
579 ps_insn_ptr crr_insn;
|
|
580
|
|
581 for (row = 0; row < ii; row++)
|
|
582 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
|
|
583 {
|
|
584 ddg_node_ptr u = crr_insn->node;
|
|
585 int normalized_time = SCHED_TIME (u) - amount;
|
|
586
|
|
587 if (dump_file)
|
|
588 fprintf (dump_file, "crr_insn->node=%d, crr_insn->cycle=%d,\
|
|
589 min_cycle=%d\n", crr_insn->node->cuid, SCHED_TIME
|
|
590 (u), ps->min_cycle);
|
|
591 gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
|
|
592 gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
|
|
593 SCHED_TIME (u) = normalized_time;
|
|
594 SCHED_ROW (u) = normalized_time % ii;
|
|
595 SCHED_STAGE (u) = normalized_time / ii;
|
|
596 }
|
|
597 }
|
|
598
|
|
599 /* Set SCHED_COLUMN of each node according to its position in PS. */
|
|
600 static void
|
|
601 set_columns_for_ps (partial_schedule_ptr ps)
|
|
602 {
|
|
603 int row;
|
|
604
|
|
605 for (row = 0; row < ps->ii; row++)
|
|
606 {
|
|
607 ps_insn_ptr cur_insn = ps->rows[row];
|
|
608 int column = 0;
|
|
609
|
|
610 for (; cur_insn; cur_insn = cur_insn->next_in_row)
|
|
611 SCHED_COLUMN (cur_insn->node) = column++;
|
|
612 }
|
|
613 }
|
|
614
|
|
615 /* Permute the insns according to their order in PS, from row 0 to
|
|
616 row ii-1, and position them right before LAST. This schedules
|
|
617 the insns of the loop kernel. */
|
|
618 static void
|
|
619 permute_partial_schedule (partial_schedule_ptr ps, rtx last)
|
|
620 {
|
|
621 int ii = ps->ii;
|
|
622 int row;
|
|
623 ps_insn_ptr ps_ij;
|
|
624
|
|
625 for (row = 0; row < ii ; row++)
|
|
626 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
|
|
627 if (PREV_INSN (last) != ps_ij->node->insn)
|
|
628 reorder_insns_nobb (ps_ij->node->first_note, ps_ij->node->insn,
|
|
629 PREV_INSN (last));
|
|
630 }
|
|
631
|
|
632 static void
|
|
633 duplicate_insns_of_cycles (partial_schedule_ptr ps, int from_stage,
|
|
634 int to_stage, int for_prolog, rtx count_reg)
|
|
635 {
|
|
636 int row;
|
|
637 ps_insn_ptr ps_ij;
|
|
638
|
|
639 for (row = 0; row < ps->ii; row++)
|
|
640 for (ps_ij = ps->rows[row]; ps_ij; ps_ij = ps_ij->next_in_row)
|
|
641 {
|
|
642 ddg_node_ptr u_node = ps_ij->node;
|
|
643 int j, i_reg_moves;
|
|
644 rtx reg_move = NULL_RTX;
|
|
645
|
|
646 /* Do not duplicate any insn which refers to count_reg as it
|
|
647 belongs to the control part.
|
|
648 TODO: This should be done by analyzing the control part of
|
|
649 the loop. */
|
|
650 if (reg_mentioned_p (count_reg, u_node->insn))
|
|
651 continue;
|
|
652
|
|
653 if (for_prolog)
|
|
654 {
|
|
655 /* SCHED_STAGE (u_node) >= from_stage == 0. Generate increasing
|
|
656 number of reg_moves starting with the second occurrence of
|
|
657 u_node, which is generated if its SCHED_STAGE <= to_stage. */
|
|
658 i_reg_moves = to_stage - SCHED_STAGE (u_node) + 1;
|
|
659 i_reg_moves = MAX (i_reg_moves, 0);
|
|
660 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
|
|
661
|
|
662 /* The reg_moves start from the *first* reg_move backwards. */
|
|
663 if (i_reg_moves)
|
|
664 {
|
|
665 reg_move = SCHED_FIRST_REG_MOVE (u_node);
|
|
666 for (j = 1; j < i_reg_moves; j++)
|
|
667 reg_move = PREV_INSN (reg_move);
|
|
668 }
|
|
669 }
|
|
670 else /* It's for the epilog. */
|
|
671 {
|
|
672 /* SCHED_STAGE (u_node) <= to_stage. Generate all reg_moves,
|
|
673 starting to decrease one stage after u_node no longer occurs;
|
|
674 that is, generate all reg_moves until
|
|
675 SCHED_STAGE (u_node) == from_stage - 1. */
|
|
676 i_reg_moves = SCHED_NREG_MOVES (u_node)
|
|
677 - (from_stage - SCHED_STAGE (u_node) - 1);
|
|
678 i_reg_moves = MAX (i_reg_moves, 0);
|
|
679 i_reg_moves = MIN (i_reg_moves, SCHED_NREG_MOVES (u_node));
|
|
680
|
|
681 /* The reg_moves start from the *last* reg_move forwards. */
|
|
682 if (i_reg_moves)
|
|
683 {
|
|
684 reg_move = SCHED_FIRST_REG_MOVE (u_node);
|
|
685 for (j = 1; j < SCHED_NREG_MOVES (u_node); j++)
|
|
686 reg_move = PREV_INSN (reg_move);
|
|
687 }
|
|
688 }
|
|
689
|
|
690 for (j = 0; j < i_reg_moves; j++, reg_move = NEXT_INSN (reg_move))
|
|
691 emit_insn (copy_rtx (PATTERN (reg_move)));
|
|
692 if (SCHED_STAGE (u_node) >= from_stage
|
|
693 && SCHED_STAGE (u_node) <= to_stage)
|
|
694 duplicate_insn_chain (u_node->first_note, u_node->insn);
|
|
695 }
|
|
696 }
|
|
697
|
|
698
|
|
699 /* Generate the instructions (including reg_moves) for prolog & epilog. */
|
|
700 static void
|
|
701 generate_prolog_epilog (partial_schedule_ptr ps, struct loop *loop,
|
|
702 rtx count_reg, rtx count_init)
|
|
703 {
|
|
704 int i;
|
|
705 int last_stage = PS_STAGE_COUNT (ps) - 1;
|
|
706 edge e;
|
|
707
|
|
708 /* Generate the prolog, inserting its insns on the loop-entry edge. */
|
|
709 start_sequence ();
|
|
710
|
|
711 if (!count_init)
|
|
712 {
|
|
713 /* Generate instructions at the beginning of the prolog to
|
|
714 adjust the loop count by STAGE_COUNT. If loop count is constant
|
|
715 (count_init), this constant is adjusted by STAGE_COUNT in
|
|
716 generate_prolog_epilog function. */
|
|
717 rtx sub_reg = NULL_RTX;
|
|
718
|
|
719 sub_reg = expand_simple_binop (GET_MODE (count_reg), MINUS,
|
|
720 count_reg, GEN_INT (last_stage),
|
|
721 count_reg, 1, OPTAB_DIRECT);
|
|
722 gcc_assert (REG_P (sub_reg));
|
|
723 if (REGNO (sub_reg) != REGNO (count_reg))
|
|
724 emit_move_insn (count_reg, sub_reg);
|
|
725 }
|
|
726
|
|
727 for (i = 0; i < last_stage; i++)
|
|
728 duplicate_insns_of_cycles (ps, 0, i, 1, count_reg);
|
|
729
|
|
730 /* Put the prolog on the entry edge. */
|
|
731 e = loop_preheader_edge (loop);
|
|
732 split_edge_and_insert (e, get_insns ());
|
|
733
|
|
734 end_sequence ();
|
|
735
|
|
736 /* Generate the epilog, inserting its insns on the loop-exit edge. */
|
|
737 start_sequence ();
|
|
738
|
|
739 for (i = 0; i < last_stage; i++)
|
|
740 duplicate_insns_of_cycles (ps, i + 1, last_stage, 0, count_reg);
|
|
741
|
|
742 /* Put the epilogue on the exit edge. */
|
|
743 gcc_assert (single_exit (loop));
|
|
744 e = single_exit (loop);
|
|
745 split_edge_and_insert (e, get_insns ());
|
|
746 end_sequence ();
|
|
747 }
|
|
748
|
|
749 /* Return true if all the BBs of the loop are empty except the
|
|
750 loop header. */
|
|
751 static bool
|
|
752 loop_single_full_bb_p (struct loop *loop)
|
|
753 {
|
|
754 unsigned i;
|
|
755 basic_block *bbs = get_loop_body (loop);
|
|
756
|
|
757 for (i = 0; i < loop->num_nodes ; i++)
|
|
758 {
|
|
759 rtx head, tail;
|
|
760 bool empty_bb = true;
|
|
761
|
|
762 if (bbs[i] == loop->header)
|
|
763 continue;
|
|
764
|
|
765 /* Make sure that basic blocks other than the header
|
|
766 have only notes labels or jumps. */
|
|
767 get_ebb_head_tail (bbs[i], bbs[i], &head, &tail);
|
|
768 for (; head != NEXT_INSN (tail); head = NEXT_INSN (head))
|
|
769 {
|
|
770 if (NOTE_P (head) || LABEL_P (head)
|
|
771 || (INSN_P (head) && JUMP_P (head)))
|
|
772 continue;
|
|
773 empty_bb = false;
|
|
774 break;
|
|
775 }
|
|
776
|
|
777 if (! empty_bb)
|
|
778 {
|
|
779 free (bbs);
|
|
780 return false;
|
|
781 }
|
|
782 }
|
|
783 free (bbs);
|
|
784 return true;
|
|
785 }
|
|
786
|
|
787 /* A simple loop from SMS point of view; it is a loop that is composed of
|
|
788 either a single basic block or two BBs - a header and a latch. */
|
|
789 #define SIMPLE_SMS_LOOP_P(loop) ((loop->num_nodes < 3 ) \
|
|
790 && (EDGE_COUNT (loop->latch->preds) == 1) \
|
|
791 && (EDGE_COUNT (loop->latch->succs) == 1))
|
|
792
|
|
793 /* Return true if the loop is in its canonical form and false if not.
|
|
794 i.e. SIMPLE_SMS_LOOP_P and have one preheader block, and single exit. */
|
|
795 static bool
|
|
796 loop_canon_p (struct loop *loop)
|
|
797 {
|
|
798
|
|
799 if (loop->inner || !loop_outer (loop))
|
|
800 {
|
|
801 if (dump_file)
|
|
802 fprintf (dump_file, "SMS loop inner or !loop_outer\n");
|
|
803 return false;
|
|
804 }
|
|
805
|
|
806 if (!single_exit (loop))
|
|
807 {
|
|
808 if (dump_file)
|
|
809 {
|
|
810 rtx insn = BB_END (loop->header);
|
|
811
|
|
812 fprintf (dump_file, "SMS loop many exits ");
|
|
813 fprintf (dump_file, " %s %d (file, line)\n",
|
|
814 insn_file (insn), insn_line (insn));
|
|
815 }
|
|
816 return false;
|
|
817 }
|
|
818
|
|
819 if (! SIMPLE_SMS_LOOP_P (loop) && ! loop_single_full_bb_p (loop))
|
|
820 {
|
|
821 if (dump_file)
|
|
822 {
|
|
823 rtx insn = BB_END (loop->header);
|
|
824
|
|
825 fprintf (dump_file, "SMS loop many BBs. ");
|
|
826 fprintf (dump_file, " %s %d (file, line)\n",
|
|
827 insn_file (insn), insn_line (insn));
|
|
828 }
|
|
829 return false;
|
|
830 }
|
|
831
|
|
832 return true;
|
|
833 }
|
|
834
|
|
835 /* If there are more than one entry for the loop,
|
|
836 make it one by splitting the first entry edge and
|
|
837 redirecting the others to the new BB. */
|
|
838 static void
|
|
839 canon_loop (struct loop *loop)
|
|
840 {
|
|
841 edge e;
|
|
842 edge_iterator i;
|
|
843
|
|
844 /* Avoid annoying special cases of edges going to exit
|
|
845 block. */
|
|
846 FOR_EACH_EDGE (e, i, EXIT_BLOCK_PTR->preds)
|
|
847 if ((e->flags & EDGE_FALLTHRU) && (EDGE_COUNT (e->src->succs) > 1))
|
|
848 split_edge (e);
|
|
849
|
|
850 if (loop->latch == loop->header
|
|
851 || EDGE_COUNT (loop->latch->succs) > 1)
|
|
852 {
|
|
853 FOR_EACH_EDGE (e, i, loop->header->preds)
|
|
854 if (e->src == loop->latch)
|
|
855 break;
|
|
856 split_edge (e);
|
|
857 }
|
|
858 }
|
|
859
|
|
860 /* Setup infos. */
|
|
861 static void
|
|
862 setup_sched_infos (void)
|
|
863 {
|
|
864 memcpy (&sms_common_sched_info, &haifa_common_sched_info,
|
|
865 sizeof (sms_common_sched_info));
|
|
866 sms_common_sched_info.sched_pass_id = SCHED_SMS_PASS;
|
|
867 common_sched_info = &sms_common_sched_info;
|
|
868
|
|
869 sched_deps_info = &sms_sched_deps_info;
|
|
870 current_sched_info = &sms_sched_info;
|
|
871 }
|
|
872
|
|
873 /* Probability in % that the sms-ed loop rolls enough so that optimized
|
|
874 version may be entered. Just a guess. */
|
|
875 #define PROB_SMS_ENOUGH_ITERATIONS 80
|
|
876
|
|
877 /* Used to calculate the upper bound of ii. */
|
|
878 #define MAXII_FACTOR 2
|
|
879
|
|
880 /* Main entry point, perform SMS scheduling on the loops of the function
|
|
881 that consist of single basic blocks. */
|
|
882 static void
|
|
883 sms_schedule (void)
|
|
884 {
|
|
885 rtx insn;
|
|
886 ddg_ptr *g_arr, g;
|
|
887 int * node_order;
|
|
888 int maxii, max_asap;
|
|
889 loop_iterator li;
|
|
890 partial_schedule_ptr ps;
|
|
891 basic_block bb = NULL;
|
|
892 struct loop *loop;
|
|
893 basic_block condition_bb = NULL;
|
|
894 edge latch_edge;
|
|
895 gcov_type trip_count = 0;
|
|
896
|
|
897 loop_optimizer_init (LOOPS_HAVE_PREHEADERS
|
|
898 | LOOPS_HAVE_RECORDED_EXITS);
|
|
899 if (number_of_loops () <= 1)
|
|
900 {
|
|
901 loop_optimizer_finalize ();
|
|
902 return; /* There are no loops to schedule. */
|
|
903 }
|
|
904
|
|
905 /* Initialize issue_rate. */
|
|
906 if (targetm.sched.issue_rate)
|
|
907 {
|
|
908 int temp = reload_completed;
|
|
909
|
|
910 reload_completed = 1;
|
|
911 issue_rate = targetm.sched.issue_rate ();
|
|
912 reload_completed = temp;
|
|
913 }
|
|
914 else
|
|
915 issue_rate = 1;
|
|
916
|
|
917 /* Initialize the scheduler. */
|
|
918 setup_sched_infos ();
|
|
919 haifa_sched_init ();
|
|
920
|
|
921 /* Allocate memory to hold the DDG array one entry for each loop.
|
|
922 We use loop->num as index into this array. */
|
|
923 g_arr = XCNEWVEC (ddg_ptr, number_of_loops ());
|
|
924
|
|
925 if (dump_file)
|
|
926 {
|
|
927 fprintf (dump_file, "\n\nSMS analysis phase\n");
|
|
928 fprintf (dump_file, "===================\n\n");
|
|
929 }
|
|
930
|
|
931 /* Build DDGs for all the relevant loops and hold them in G_ARR
|
|
932 indexed by the loop index. */
|
|
933 FOR_EACH_LOOP (li, loop, 0)
|
|
934 {
|
|
935 rtx head, tail;
|
|
936 rtx count_reg;
|
|
937
|
|
938 /* For debugging. */
|
|
939 if (dbg_cnt (sms_sched_loop) == false)
|
|
940 {
|
|
941 if (dump_file)
|
|
942 fprintf (dump_file, "SMS reached max limit... \n");
|
|
943
|
|
944 break;
|
|
945 }
|
|
946
|
|
947 if (dump_file)
|
|
948 {
|
|
949 rtx insn = BB_END (loop->header);
|
|
950
|
|
951 fprintf (dump_file, "SMS loop num: %d, file: %s, line: %d\n",
|
|
952 loop->num, insn_file (insn), insn_line (insn));
|
|
953
|
|
954 }
|
|
955
|
|
956 if (! loop_canon_p (loop))
|
|
957 continue;
|
|
958
|
|
959 if (! loop_single_full_bb_p (loop))
|
|
960 {
|
|
961 if (dump_file)
|
|
962 fprintf (dump_file, "SMS not loop_single_full_bb_p\n");
|
|
963 continue;
|
|
964 }
|
|
965
|
|
966 bb = loop->header;
|
|
967
|
|
968 get_ebb_head_tail (bb, bb, &head, &tail);
|
|
969 latch_edge = loop_latch_edge (loop);
|
|
970 gcc_assert (single_exit (loop));
|
|
971 if (single_exit (loop)->count)
|
|
972 trip_count = latch_edge->count / single_exit (loop)->count;
|
|
973
|
|
974 /* Perform SMS only on loops that their average count is above threshold. */
|
|
975
|
|
976 if ( latch_edge->count
|
|
977 && (latch_edge->count < single_exit (loop)->count * SMS_LOOP_AVERAGE_COUNT_THRESHOLD))
|
|
978 {
|
|
979 if (dump_file)
|
|
980 {
|
|
981 fprintf (dump_file, " %s %d (file, line)\n",
|
|
982 insn_file (tail), insn_line (tail));
|
|
983 fprintf (dump_file, "SMS single-bb-loop\n");
|
|
984 if (profile_info && flag_branch_probabilities)
|
|
985 {
|
|
986 fprintf (dump_file, "SMS loop-count ");
|
|
987 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
|
|
988 (HOST_WIDEST_INT) bb->count);
|
|
989 fprintf (dump_file, "\n");
|
|
990 fprintf (dump_file, "SMS trip-count ");
|
|
991 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
|
|
992 (HOST_WIDEST_INT) trip_count);
|
|
993 fprintf (dump_file, "\n");
|
|
994 fprintf (dump_file, "SMS profile-sum-max ");
|
|
995 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
|
|
996 (HOST_WIDEST_INT) profile_info->sum_max);
|
|
997 fprintf (dump_file, "\n");
|
|
998 }
|
|
999 }
|
|
1000 continue;
|
|
1001 }
|
|
1002
|
|
1003 /* Make sure this is a doloop. */
|
|
1004 if ( !(count_reg = doloop_register_get (head, tail)))
|
|
1005 {
|
|
1006 if (dump_file)
|
|
1007 fprintf (dump_file, "SMS doloop_register_get failed\n");
|
|
1008 continue;
|
|
1009 }
|
|
1010
|
|
1011 /* Don't handle BBs with calls or barriers, or !single_set insns,
|
|
1012 or auto-increment insns (to avoid creating invalid reg-moves
|
|
1013 for the auto-increment insns).
|
|
1014 ??? Should handle auto-increment insns.
|
|
1015 ??? Should handle insns defining subregs. */
|
|
1016 for (insn = head; insn != NEXT_INSN (tail); insn = NEXT_INSN (insn))
|
|
1017 {
|
|
1018 rtx set;
|
|
1019
|
|
1020 if (CALL_P (insn)
|
|
1021 || BARRIER_P (insn)
|
|
1022 || (INSN_P (insn) && !JUMP_P (insn)
|
|
1023 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE)
|
|
1024 || (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
|
|
1025 || (INSN_P (insn) && (set = single_set (insn))
|
|
1026 && GET_CODE (SET_DEST (set)) == SUBREG))
|
|
1027 break;
|
|
1028 }
|
|
1029
|
|
1030 if (insn != NEXT_INSN (tail))
|
|
1031 {
|
|
1032 if (dump_file)
|
|
1033 {
|
|
1034 if (CALL_P (insn))
|
|
1035 fprintf (dump_file, "SMS loop-with-call\n");
|
|
1036 else if (BARRIER_P (insn))
|
|
1037 fprintf (dump_file, "SMS loop-with-barrier\n");
|
|
1038 else if (FIND_REG_INC_NOTE (insn, NULL_RTX) != 0)
|
|
1039 fprintf (dump_file, "SMS reg inc\n");
|
|
1040 else if ((INSN_P (insn) && !JUMP_P (insn)
|
|
1041 && !single_set (insn) && GET_CODE (PATTERN (insn)) != USE))
|
|
1042 fprintf (dump_file, "SMS loop-with-not-single-set\n");
|
|
1043 else
|
|
1044 fprintf (dump_file, "SMS loop with subreg in lhs\n");
|
|
1045 print_rtl_single (dump_file, insn);
|
|
1046 }
|
|
1047
|
|
1048 continue;
|
|
1049 }
|
|
1050
|
|
1051 if (! (g = create_ddg (bb, 0)))
|
|
1052 {
|
|
1053 if (dump_file)
|
|
1054 fprintf (dump_file, "SMS create_ddg failed\n");
|
|
1055 continue;
|
|
1056 }
|
|
1057
|
|
1058 g_arr[loop->num] = g;
|
|
1059 if (dump_file)
|
|
1060 fprintf (dump_file, "...OK\n");
|
|
1061
|
|
1062 }
|
|
1063 if (dump_file)
|
|
1064 {
|
|
1065 fprintf (dump_file, "\nSMS transformation phase\n");
|
|
1066 fprintf (dump_file, "=========================\n\n");
|
|
1067 }
|
|
1068
|
|
1069 /* We don't want to perform SMS on new loops - created by versioning. */
|
|
1070 FOR_EACH_LOOP (li, loop, 0)
|
|
1071 {
|
|
1072 rtx head, tail;
|
|
1073 rtx count_reg, count_init;
|
|
1074 int mii, rec_mii;
|
|
1075 unsigned stage_count = 0;
|
|
1076 HOST_WIDEST_INT loop_count = 0;
|
|
1077
|
|
1078 if (! (g = g_arr[loop->num]))
|
|
1079 continue;
|
|
1080
|
|
1081 if (dump_file)
|
|
1082 {
|
|
1083 rtx insn = BB_END (loop->header);
|
|
1084
|
|
1085 fprintf (dump_file, "SMS loop num: %d, file: %s, line: %d\n",
|
|
1086 loop->num, insn_file (insn), insn_line (insn));
|
|
1087
|
|
1088 print_ddg (dump_file, g);
|
|
1089 }
|
|
1090
|
|
1091 get_ebb_head_tail (loop->header, loop->header, &head, &tail);
|
|
1092
|
|
1093 latch_edge = loop_latch_edge (loop);
|
|
1094 gcc_assert (single_exit (loop));
|
|
1095 if (single_exit (loop)->count)
|
|
1096 trip_count = latch_edge->count / single_exit (loop)->count;
|
|
1097
|
|
1098 if (dump_file)
|
|
1099 {
|
|
1100 fprintf (dump_file, " %s %d (file, line)\n",
|
|
1101 insn_file (tail), insn_line (tail));
|
|
1102 fprintf (dump_file, "SMS single-bb-loop\n");
|
|
1103 if (profile_info && flag_branch_probabilities)
|
|
1104 {
|
|
1105 fprintf (dump_file, "SMS loop-count ");
|
|
1106 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
|
|
1107 (HOST_WIDEST_INT) bb->count);
|
|
1108 fprintf (dump_file, "\n");
|
|
1109 fprintf (dump_file, "SMS profile-sum-max ");
|
|
1110 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
|
|
1111 (HOST_WIDEST_INT) profile_info->sum_max);
|
|
1112 fprintf (dump_file, "\n");
|
|
1113 }
|
|
1114 fprintf (dump_file, "SMS doloop\n");
|
|
1115 fprintf (dump_file, "SMS built-ddg %d\n", g->num_nodes);
|
|
1116 fprintf (dump_file, "SMS num-loads %d\n", g->num_loads);
|
|
1117 fprintf (dump_file, "SMS num-stores %d\n", g->num_stores);
|
|
1118 }
|
|
1119
|
|
1120
|
|
1121 /* In case of th loop have doloop register it gets special
|
|
1122 handling. */
|
|
1123 count_init = NULL_RTX;
|
|
1124 if ((count_reg = doloop_register_get (head, tail)))
|
|
1125 {
|
|
1126 basic_block pre_header;
|
|
1127
|
|
1128 pre_header = loop_preheader_edge (loop)->src;
|
|
1129 count_init = const_iteration_count (count_reg, pre_header,
|
|
1130 &loop_count);
|
|
1131 }
|
|
1132 gcc_assert (count_reg);
|
|
1133
|
|
1134 if (dump_file && count_init)
|
|
1135 {
|
|
1136 fprintf (dump_file, "SMS const-doloop ");
|
|
1137 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC,
|
|
1138 loop_count);
|
|
1139 fprintf (dump_file, "\n");
|
|
1140 }
|
|
1141
|
|
1142 node_order = XNEWVEC (int, g->num_nodes);
|
|
1143
|
|
1144 mii = 1; /* Need to pass some estimate of mii. */
|
|
1145 rec_mii = sms_order_nodes (g, mii, node_order, &max_asap);
|
|
1146 mii = MAX (res_MII (g), rec_mii);
|
|
1147 maxii = MAX (max_asap, MAXII_FACTOR * mii);
|
|
1148
|
|
1149 if (dump_file)
|
|
1150 fprintf (dump_file, "SMS iis %d %d %d (rec_mii, mii, maxii)\n",
|
|
1151 rec_mii, mii, maxii);
|
|
1152
|
|
1153 /* After sms_order_nodes and before sms_schedule_by_order, to copy over
|
|
1154 ASAP. */
|
|
1155 set_node_sched_params (g);
|
|
1156
|
|
1157 ps = sms_schedule_by_order (g, mii, maxii, node_order);
|
|
1158
|
|
1159 if (ps)
|
|
1160 stage_count = PS_STAGE_COUNT (ps);
|
|
1161
|
|
1162 /* Stage count of 1 means that there is no interleaving between
|
|
1163 iterations, let the scheduling passes do the job. */
|
|
1164 if (stage_count < 1
|
|
1165 || (count_init && (loop_count <= stage_count))
|
|
1166 || (flag_branch_probabilities && (trip_count <= stage_count)))
|
|
1167 {
|
|
1168 if (dump_file)
|
|
1169 {
|
|
1170 fprintf (dump_file, "SMS failed... \n");
|
|
1171 fprintf (dump_file, "SMS sched-failed (stage-count=%d, loop-count=", stage_count);
|
|
1172 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, loop_count);
|
|
1173 fprintf (dump_file, ", trip-count=");
|
|
1174 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, trip_count);
|
|
1175 fprintf (dump_file, ")\n");
|
|
1176 }
|
|
1177 continue;
|
|
1178 }
|
|
1179 else
|
|
1180 {
|
|
1181 struct undo_replace_buff_elem *reg_move_replaces;
|
|
1182
|
|
1183 if (dump_file)
|
|
1184 {
|
|
1185 fprintf (dump_file,
|
|
1186 "SMS succeeded %d %d (with ii, sc)\n", ps->ii,
|
|
1187 stage_count);
|
|
1188 print_partial_schedule (ps, dump_file);
|
|
1189 fprintf (dump_file,
|
|
1190 "SMS Branch (%d) will later be scheduled at cycle %d.\n",
|
|
1191 g->closing_branch->cuid, PS_MIN_CYCLE (ps) - 1);
|
|
1192 }
|
|
1193
|
|
1194 /* Set the stage boundaries. If the DDG is built with closing_branch_deps,
|
|
1195 the closing_branch was scheduled and should appear in the last (ii-1)
|
|
1196 row. Otherwise, we are free to schedule the branch, and we let nodes
|
|
1197 that were scheduled at the first PS_MIN_CYCLE cycle appear in the first
|
|
1198 row; this should reduce stage_count to minimum.
|
|
1199 TODO: Revisit the issue of scheduling the insns of the
|
|
1200 control part relative to the branch when the control part
|
|
1201 has more than one insn. */
|
|
1202 normalize_sched_times (ps);
|
|
1203 rotate_partial_schedule (ps, PS_MIN_CYCLE (ps));
|
|
1204 set_columns_for_ps (ps);
|
|
1205
|
|
1206 canon_loop (loop);
|
|
1207
|
|
1208 /* case the BCT count is not known , Do loop-versioning */
|
|
1209 if (count_reg && ! count_init)
|
|
1210 {
|
|
1211 rtx comp_rtx = gen_rtx_fmt_ee (GT, VOIDmode, count_reg,
|
|
1212 GEN_INT(stage_count));
|
|
1213 unsigned prob = (PROB_SMS_ENOUGH_ITERATIONS
|
|
1214 * REG_BR_PROB_BASE) / 100;
|
|
1215
|
|
1216 loop_version (loop, comp_rtx, &condition_bb,
|
|
1217 prob, prob, REG_BR_PROB_BASE - prob,
|
|
1218 true);
|
|
1219 }
|
|
1220
|
|
1221 /* Set new iteration count of loop kernel. */
|
|
1222 if (count_reg && count_init)
|
|
1223 SET_SRC (single_set (count_init)) = GEN_INT (loop_count
|
|
1224 - stage_count + 1);
|
|
1225
|
|
1226 /* Now apply the scheduled kernel to the RTL of the loop. */
|
|
1227 permute_partial_schedule (ps, g->closing_branch->first_note);
|
|
1228
|
|
1229 /* Mark this loop as software pipelined so the later
|
|
1230 scheduling passes doesn't touch it. */
|
|
1231 if (! flag_resched_modulo_sched)
|
|
1232 g->bb->flags |= BB_DISABLE_SCHEDULE;
|
|
1233 /* The life-info is not valid any more. */
|
|
1234 df_set_bb_dirty (g->bb);
|
|
1235
|
|
1236 reg_move_replaces = generate_reg_moves (ps, true);
|
|
1237 if (dump_file)
|
|
1238 print_node_sched_params (dump_file, g->num_nodes, g);
|
|
1239 /* Generate prolog and epilog. */
|
|
1240 generate_prolog_epilog (ps, loop, count_reg, count_init);
|
|
1241
|
|
1242 free_undo_replace_buff (reg_move_replaces);
|
|
1243 }
|
|
1244
|
|
1245 free_partial_schedule (ps);
|
|
1246 free (node_sched_params);
|
|
1247 free (node_order);
|
|
1248 free_ddg (g);
|
|
1249 }
|
|
1250
|
|
1251 free (g_arr);
|
|
1252
|
|
1253 /* Release scheduler data, needed until now because of DFA. */
|
|
1254 haifa_sched_finish ();
|
|
1255 loop_optimizer_finalize ();
|
|
1256 }
|
|
1257
|
|
1258 /* The SMS scheduling algorithm itself
|
|
1259 -----------------------------------
|
|
1260 Input: 'O' an ordered list of insns of a loop.
|
|
1261 Output: A scheduling of the loop - kernel, prolog, and epilogue.
|
|
1262
|
|
1263 'Q' is the empty Set
|
|
1264 'PS' is the partial schedule; it holds the currently scheduled nodes with
|
|
1265 their cycle/slot.
|
|
1266 'PSP' previously scheduled predecessors.
|
|
1267 'PSS' previously scheduled successors.
|
|
1268 't(u)' the cycle where u is scheduled.
|
|
1269 'l(u)' is the latency of u.
|
|
1270 'd(v,u)' is the dependence distance from v to u.
|
|
1271 'ASAP(u)' the earliest time at which u could be scheduled as computed in
|
|
1272 the node ordering phase.
|
|
1273 'check_hardware_resources_conflicts(u, PS, c)'
|
|
1274 run a trace around cycle/slot through DFA model
|
|
1275 to check resource conflicts involving instruction u
|
|
1276 at cycle c given the partial schedule PS.
|
|
1277 'add_to_partial_schedule_at_time(u, PS, c)'
|
|
1278 Add the node/instruction u to the partial schedule
|
|
1279 PS at time c.
|
|
1280 'calculate_register_pressure(PS)'
|
|
1281 Given a schedule of instructions, calculate the register
|
|
1282 pressure it implies. One implementation could be the
|
|
1283 maximum number of overlapping live ranges.
|
|
1284 'maxRP' The maximum allowed register pressure, it is usually derived from the number
|
|
1285 registers available in the hardware.
|
|
1286
|
|
1287 1. II = MII.
|
|
1288 2. PS = empty list
|
|
1289 3. for each node u in O in pre-computed order
|
|
1290 4. if (PSP(u) != Q && PSS(u) == Q) then
|
|
1291 5. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
|
|
1292 6. start = Early_start; end = Early_start + II - 1; step = 1
|
|
1293 11. else if (PSP(u) == Q && PSS(u) != Q) then
|
|
1294 12. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
|
|
1295 13. start = Late_start; end = Late_start - II + 1; step = -1
|
|
1296 14. else if (PSP(u) != Q && PSS(u) != Q) then
|
|
1297 15. Early_start(u) = max ( t(v) + l(v) - d(v,u)*II ) over all every v in PSP(u).
|
|
1298 16. Late_start(u) = min ( t(v) - l(v) + d(v,u)*II ) over all every v in PSS(u).
|
|
1299 17. start = Early_start;
|
|
1300 18. end = min(Early_start + II - 1 , Late_start);
|
|
1301 19. step = 1
|
|
1302 20. else "if (PSP(u) == Q && PSS(u) == Q)"
|
|
1303 21. start = ASAP(u); end = start + II - 1; step = 1
|
|
1304 22. endif
|
|
1305
|
|
1306 23. success = false
|
|
1307 24. for (c = start ; c != end ; c += step)
|
|
1308 25. if check_hardware_resources_conflicts(u, PS, c) then
|
|
1309 26. add_to_partial_schedule_at_time(u, PS, c)
|
|
1310 27. success = true
|
|
1311 28. break
|
|
1312 29. endif
|
|
1313 30. endfor
|
|
1314 31. if (success == false) then
|
|
1315 32. II = II + 1
|
|
1316 33. if (II > maxII) then
|
|
1317 34. finish - failed to schedule
|
|
1318 35. endif
|
|
1319 36. goto 2.
|
|
1320 37. endif
|
|
1321 38. endfor
|
|
1322 39. if (calculate_register_pressure(PS) > maxRP) then
|
|
1323 40. goto 32.
|
|
1324 41. endif
|
|
1325 42. compute epilogue & prologue
|
|
1326 43. finish - succeeded to schedule
|
|
1327 */
|
|
1328
|
|
1329 /* A limit on the number of cycles that resource conflicts can span. ??? Should
|
|
1330 be provided by DFA, and be dependent on the type of insn scheduled. Currently
|
|
1331 set to 0 to save compile time. */
|
|
1332 #define DFA_HISTORY SMS_DFA_HISTORY
|
|
1333
|
|
1334 /* A threshold for the number of repeated unsuccessful attempts to insert
|
|
1335 an empty row, before we flush the partial schedule and start over. */
|
|
1336 #define MAX_SPLIT_NUM 10
|
|
1337 /* Given the partial schedule PS, this function calculates and returns the
|
|
1338 cycles in which we can schedule the node with the given index I.
|
|
1339 NOTE: Here we do the backtracking in SMS, in some special cases. We have
|
|
1340 noticed that there are several cases in which we fail to SMS the loop
|
|
1341 because the sched window of a node is empty due to tight data-deps. In
|
|
1342 such cases we want to unschedule some of the predecessors/successors
|
|
1343 until we get non-empty scheduling window. It returns -1 if the
|
|
1344 scheduling window is empty and zero otherwise. */
|
|
1345
|
|
1346 static int
|
|
1347 get_sched_window (partial_schedule_ptr ps, int *nodes_order, int i,
|
|
1348 sbitmap sched_nodes, int ii, int *start_p, int *step_p, int *end_p)
|
|
1349 {
|
|
1350 int start, step, end;
|
|
1351 ddg_edge_ptr e;
|
|
1352 int u = nodes_order [i];
|
|
1353 ddg_node_ptr u_node = &ps->g->nodes[u];
|
|
1354 sbitmap psp = sbitmap_alloc (ps->g->num_nodes);
|
|
1355 sbitmap pss = sbitmap_alloc (ps->g->num_nodes);
|
|
1356 sbitmap u_node_preds = NODE_PREDECESSORS (u_node);
|
|
1357 sbitmap u_node_succs = NODE_SUCCESSORS (u_node);
|
|
1358 int psp_not_empty;
|
|
1359 int pss_not_empty;
|
|
1360
|
|
1361 /* 1. compute sched window for u (start, end, step). */
|
|
1362 sbitmap_zero (psp);
|
|
1363 sbitmap_zero (pss);
|
|
1364 psp_not_empty = sbitmap_a_and_b_cg (psp, u_node_preds, sched_nodes);
|
|
1365 pss_not_empty = sbitmap_a_and_b_cg (pss, u_node_succs, sched_nodes);
|
|
1366
|
|
1367 if (psp_not_empty && !pss_not_empty)
|
|
1368 {
|
|
1369 int early_start = INT_MIN;
|
|
1370
|
|
1371 end = INT_MAX;
|
|
1372 for (e = u_node->in; e != 0; e = e->next_in)
|
|
1373 {
|
|
1374 ddg_node_ptr v_node = e->src;
|
|
1375
|
|
1376 if (dump_file)
|
|
1377 {
|
|
1378 fprintf (dump_file, "\nProcessing edge: ");
|
|
1379 print_ddg_edge (dump_file, e);
|
|
1380 fprintf (dump_file,
|
|
1381 "\nScheduling %d (%d) in psp_not_empty,"
|
|
1382 " checking p %d (%d): ", u_node->cuid,
|
|
1383 INSN_UID (u_node->insn), v_node->cuid, INSN_UID
|
|
1384 (v_node->insn));
|
|
1385 }
|
|
1386
|
|
1387 if (TEST_BIT (sched_nodes, v_node->cuid))
|
|
1388 {
|
|
1389 int p_st = SCHED_TIME (v_node);
|
|
1390
|
|
1391 early_start =
|
|
1392 MAX (early_start, p_st + e->latency - (e->distance * ii));
|
|
1393
|
|
1394 if (dump_file)
|
|
1395 fprintf (dump_file,
|
|
1396 "pred st = %d; early_start = %d; latency: %d",
|
|
1397 p_st, early_start, e->latency);
|
|
1398
|
|
1399 if (e->data_type == MEM_DEP)
|
|
1400 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
|
|
1401 }
|
|
1402 else if (dump_file)
|
|
1403 fprintf (dump_file, "the node is not scheduled\n");
|
|
1404 }
|
|
1405 start = early_start;
|
|
1406 end = MIN (end, early_start + ii);
|
|
1407 /* Schedule the node close to it's predecessors. */
|
|
1408 step = 1;
|
|
1409
|
|
1410 if (dump_file)
|
|
1411 fprintf (dump_file,
|
|
1412 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
|
|
1413 u_node->cuid, INSN_UID (u_node->insn), start, end, step);
|
|
1414 }
|
|
1415
|
|
1416 else if (!psp_not_empty && pss_not_empty)
|
|
1417 {
|
|
1418 int late_start = INT_MAX;
|
|
1419
|
|
1420 end = INT_MIN;
|
|
1421 for (e = u_node->out; e != 0; e = e->next_out)
|
|
1422 {
|
|
1423 ddg_node_ptr v_node = e->dest;
|
|
1424
|
|
1425 if (dump_file)
|
|
1426 {
|
|
1427 fprintf (dump_file, "\nProcessing edge:");
|
|
1428 print_ddg_edge (dump_file, e);
|
|
1429 fprintf (dump_file,
|
|
1430 "\nScheduling %d (%d) in pss_not_empty,"
|
|
1431 " checking s %d (%d): ", u_node->cuid,
|
|
1432 INSN_UID (u_node->insn), v_node->cuid, INSN_UID
|
|
1433 (v_node->insn));
|
|
1434 }
|
|
1435
|
|
1436 if (TEST_BIT (sched_nodes, v_node->cuid))
|
|
1437 {
|
|
1438 int s_st = SCHED_TIME (v_node);
|
|
1439
|
|
1440 late_start = MIN (late_start,
|
|
1441 s_st - e->latency + (e->distance * ii));
|
|
1442
|
|
1443 if (dump_file)
|
|
1444 fprintf (dump_file,
|
|
1445 "succ st = %d; late_start = %d; latency = %d",
|
|
1446 s_st, late_start, e->latency);
|
|
1447
|
|
1448 if (e->data_type == MEM_DEP)
|
|
1449 end = MAX (end, SCHED_TIME (v_node) - ii + 1);
|
|
1450 if (dump_file)
|
|
1451 fprintf (dump_file, "end = %d\n", end);
|
|
1452
|
|
1453 }
|
|
1454 else if (dump_file)
|
|
1455 fprintf (dump_file, "the node is not scheduled\n");
|
|
1456
|
|
1457 }
|
|
1458 start = late_start;
|
|
1459 end = MAX (end, late_start - ii);
|
|
1460 /* Schedule the node close to it's successors. */
|
|
1461 step = -1;
|
|
1462
|
|
1463 if (dump_file)
|
|
1464 fprintf (dump_file,
|
|
1465 "\nScheduling %d (%d) in a window (%d..%d) with step %d\n",
|
|
1466 u_node->cuid, INSN_UID (u_node->insn), start, end, step);
|
|
1467
|
|
1468 }
|
|
1469
|
|
1470 else if (psp_not_empty && pss_not_empty)
|
|
1471 {
|
|
1472 int early_start = INT_MIN;
|
|
1473 int late_start = INT_MAX;
|
|
1474 int count_preds = 0;
|
|
1475 int count_succs = 0;
|
|
1476
|
|
1477 start = INT_MIN;
|
|
1478 end = INT_MAX;
|
|
1479 for (e = u_node->in; e != 0; e = e->next_in)
|
|
1480 {
|
|
1481 ddg_node_ptr v_node = e->src;
|
|
1482
|
|
1483 if (dump_file)
|
|
1484 {
|
|
1485 fprintf (dump_file, "\nProcessing edge:");
|
|
1486 print_ddg_edge (dump_file, e);
|
|
1487 fprintf (dump_file,
|
|
1488 "\nScheduling %d (%d) in psp_pss_not_empty,"
|
|
1489 " checking p %d (%d): ", u_node->cuid, INSN_UID
|
|
1490 (u_node->insn), v_node->cuid, INSN_UID
|
|
1491 (v_node->insn));
|
|
1492 }
|
|
1493
|
|
1494 if (TEST_BIT (sched_nodes, v_node->cuid))
|
|
1495 {
|
|
1496 int p_st = SCHED_TIME (v_node);
|
|
1497
|
|
1498 early_start = MAX (early_start,
|
|
1499 p_st + e->latency
|
|
1500 - (e->distance * ii));
|
|
1501
|
|
1502 if (dump_file)
|
|
1503 fprintf (dump_file,
|
|
1504 "pred st = %d; early_start = %d; latency = %d",
|
|
1505 p_st, early_start, e->latency);
|
|
1506
|
|
1507 if (e->type == TRUE_DEP && e->data_type == REG_DEP)
|
|
1508 count_preds++;
|
|
1509
|
|
1510 if (e->data_type == MEM_DEP)
|
|
1511 end = MIN (end, SCHED_TIME (v_node) + ii - 1);
|
|
1512 }
|
|
1513 else if (dump_file)
|
|
1514 fprintf (dump_file, "the node is not scheduled\n");
|
|
1515
|
|
1516 }
|
|
1517 for (e = u_node->out; e != 0; e = e->next_out)
|
|
1518 {
|
|
1519 ddg_node_ptr v_node = e->dest;
|
|
1520
|
|
1521 if (dump_file)
|
|
1522 {
|
|
1523 fprintf (dump_file, "\nProcessing edge:");
|
|
1524 print_ddg_edge (dump_file, e);
|
|
1525 fprintf (dump_file,
|
|
1526 "\nScheduling %d (%d) in psp_pss_not_empty,"
|
|
1527 " checking s %d (%d): ", u_node->cuid, INSN_UID
|
|
1528 (u_node->insn), v_node->cuid, INSN_UID
|
|
1529 (v_node->insn));
|
|
1530 }
|
|
1531
|
|
1532 if (TEST_BIT (sched_nodes, v_node->cuid))
|
|
1533 {
|
|
1534 int s_st = SCHED_TIME (v_node);
|
|
1535
|
|
1536 late_start = MIN (late_start,
|
|
1537 s_st - e->latency
|
|
1538 + (e->distance * ii));
|
|
1539
|
|
1540 if (dump_file)
|
|
1541 fprintf (dump_file,
|
|
1542 "succ st = %d; late_start = %d; latency = %d",
|
|
1543 s_st, late_start, e->latency);
|
|
1544
|
|
1545 if (e->type == TRUE_DEP && e->data_type == REG_DEP)
|
|
1546 count_succs++;
|
|
1547
|
|
1548 if (e->data_type == MEM_DEP)
|
|
1549 start = MAX (start, SCHED_TIME (v_node) - ii + 1);
|
|
1550 }
|
|
1551 else if (dump_file)
|
|
1552 fprintf (dump_file, "the node is not scheduled\n");
|
|
1553
|
|
1554 }
|
|
1555 start = MAX (start, early_start);
|
|
1556 end = MIN (end, MIN (early_start + ii, late_start + 1));
|
|
1557 step = 1;
|
|
1558 /* If there are more successors than predecessors schedule the
|
|
1559 node close to it's successors. */
|
|
1560 if (count_succs >= count_preds)
|
|
1561 {
|
|
1562 int old_start = start;
|
|
1563
|
|
1564 start = end - 1;
|
|
1565 end = old_start - 1;
|
|
1566 step = -1;
|
|
1567 }
|
|
1568 }
|
|
1569 else /* psp is empty && pss is empty. */
|
|
1570 {
|
|
1571 start = SCHED_ASAP (u_node);
|
|
1572 end = start + ii;
|
|
1573 step = 1;
|
|
1574 }
|
|
1575
|
|
1576 *start_p = start;
|
|
1577 *step_p = step;
|
|
1578 *end_p = end;
|
|
1579 sbitmap_free (psp);
|
|
1580 sbitmap_free (pss);
|
|
1581
|
|
1582 if ((start >= end && step == 1) || (start <= end && step == -1))
|
|
1583 {
|
|
1584 if (dump_file)
|
|
1585 fprintf (dump_file, "\nEmpty window: start=%d, end=%d, step=%d\n",
|
|
1586 start, end, step);
|
|
1587 return -1;
|
|
1588 }
|
|
1589
|
|
1590 return 0;
|
|
1591 }
|
|
1592
|
|
1593 /* Calculate MUST_PRECEDE/MUST_FOLLOW bitmaps of U_NODE; which is the
|
|
1594 node currently been scheduled. At the end of the calculation
|
|
1595 MUST_PRECEDE/MUST_FOLLOW contains all predecessors/successors of
|
|
1596 U_NODE which are (1) already scheduled in the first/last row of
|
|
1597 U_NODE's scheduling window, (2) whose dependence inequality with U
|
|
1598 becomes an equality when U is scheduled in this same row, and (3)
|
|
1599 whose dependence latency is zero.
|
|
1600
|
|
1601 The first and last rows are calculated using the following parameters:
|
|
1602 START/END rows - The cycles that begins/ends the traversal on the window;
|
|
1603 searching for an empty cycle to schedule U_NODE.
|
|
1604 STEP - The direction in which we traverse the window.
|
|
1605 II - The initiation interval. */
|
|
1606
|
|
1607 static void
|
|
1608 calculate_must_precede_follow (ddg_node_ptr u_node, int start, int end,
|
|
1609 int step, int ii, sbitmap sched_nodes,
|
|
1610 sbitmap must_precede, sbitmap must_follow)
|
|
1611 {
|
|
1612 ddg_edge_ptr e;
|
|
1613 int first_cycle_in_window, last_cycle_in_window;
|
|
1614
|
|
1615 gcc_assert (must_precede && must_follow);
|
|
1616
|
|
1617 /* Consider the following scheduling window:
|
|
1618 {first_cycle_in_window, first_cycle_in_window+1, ...,
|
|
1619 last_cycle_in_window}. If step is 1 then the following will be
|
|
1620 the order we traverse the window: {start=first_cycle_in_window,
|
|
1621 first_cycle_in_window+1, ..., end=last_cycle_in_window+1},
|
|
1622 or {start=last_cycle_in_window, last_cycle_in_window-1, ...,
|
|
1623 end=first_cycle_in_window-1} if step is -1. */
|
|
1624 first_cycle_in_window = (step == 1) ? start : end - step;
|
|
1625 last_cycle_in_window = (step == 1) ? end - step : start;
|
|
1626
|
|
1627 sbitmap_zero (must_precede);
|
|
1628 sbitmap_zero (must_follow);
|
|
1629
|
|
1630 if (dump_file)
|
|
1631 fprintf (dump_file, "\nmust_precede: ");
|
|
1632
|
|
1633 /* Instead of checking if:
|
|
1634 (SMODULO (SCHED_TIME (e->src), ii) == first_row_in_window)
|
|
1635 && ((SCHED_TIME (e->src) + e->latency - (e->distance * ii)) ==
|
|
1636 first_cycle_in_window)
|
|
1637 && e->latency == 0
|
|
1638 we use the fact that latency is non-negative:
|
|
1639 SCHED_TIME (e->src) - (e->distance * ii) <=
|
|
1640 SCHED_TIME (e->src) + e->latency - (e->distance * ii)) <=
|
|
1641 first_cycle_in_window
|
|
1642 and check only if
|
|
1643 SCHED_TIME (e->src) - (e->distance * ii) == first_cycle_in_window */
|
|
1644 for (e = u_node->in; e != 0; e = e->next_in)
|
|
1645 if (TEST_BIT (sched_nodes, e->src->cuid)
|
|
1646 && ((SCHED_TIME (e->src) - (e->distance * ii)) ==
|
|
1647 first_cycle_in_window))
|
|
1648 {
|
|
1649 if (dump_file)
|
|
1650 fprintf (dump_file, "%d ", e->src->cuid);
|
|
1651
|
|
1652 SET_BIT (must_precede, e->src->cuid);
|
|
1653 }
|
|
1654
|
|
1655 if (dump_file)
|
|
1656 fprintf (dump_file, "\nmust_follow: ");
|
|
1657
|
|
1658 /* Instead of checking if:
|
|
1659 (SMODULO (SCHED_TIME (e->dest), ii) == last_row_in_window)
|
|
1660 && ((SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) ==
|
|
1661 last_cycle_in_window)
|
|
1662 && e->latency == 0
|
|
1663 we use the fact that latency is non-negative:
|
|
1664 SCHED_TIME (e->dest) + (e->distance * ii) >=
|
|
1665 SCHED_TIME (e->dest) - e->latency + (e->distance * ii)) >=
|
|
1666 last_cycle_in_window
|
|
1667 and check only if
|
|
1668 SCHED_TIME (e->dest) + (e->distance * ii) == last_cycle_in_window */
|
|
1669 for (e = u_node->out; e != 0; e = e->next_out)
|
|
1670 if (TEST_BIT (sched_nodes, e->dest->cuid)
|
|
1671 && ((SCHED_TIME (e->dest) + (e->distance * ii)) ==
|
|
1672 last_cycle_in_window))
|
|
1673 {
|
|
1674 if (dump_file)
|
|
1675 fprintf (dump_file, "%d ", e->dest->cuid);
|
|
1676
|
|
1677 SET_BIT (must_follow, e->dest->cuid);
|
|
1678 }
|
|
1679
|
|
1680 if (dump_file)
|
|
1681 fprintf (dump_file, "\n");
|
|
1682 }
|
|
1683
|
|
1684 /* Return 1 if U_NODE can be scheduled in CYCLE. Use the following
|
|
1685 parameters to decide if that's possible:
|
|
1686 PS - The partial schedule.
|
|
1687 U - The serial number of U_NODE.
|
|
1688 NUM_SPLITS - The number of row splits made so far.
|
|
1689 MUST_PRECEDE - The nodes that must precede U_NODE. (only valid at
|
|
1690 the first row of the scheduling window)
|
|
1691 MUST_FOLLOW - The nodes that must follow U_NODE. (only valid at the
|
|
1692 last row of the scheduling window) */
|
|
1693
|
|
1694 static bool
|
|
1695 try_scheduling_node_in_cycle (partial_schedule_ptr ps, ddg_node_ptr u_node,
|
|
1696 int u, int cycle, sbitmap sched_nodes,
|
|
1697 int *num_splits, sbitmap must_precede,
|
|
1698 sbitmap must_follow)
|
|
1699 {
|
|
1700 ps_insn_ptr psi;
|
|
1701 bool success = 0;
|
|
1702
|
|
1703 verify_partial_schedule (ps, sched_nodes);
|
|
1704 psi = ps_add_node_check_conflicts (ps, u_node, cycle,
|
|
1705 must_precede, must_follow);
|
|
1706 if (psi)
|
|
1707 {
|
|
1708 SCHED_TIME (u_node) = cycle;
|
|
1709 SET_BIT (sched_nodes, u);
|
|
1710 success = 1;
|
|
1711 *num_splits = 0;
|
|
1712 if (dump_file)
|
|
1713 fprintf (dump_file, "Scheduled w/o split in %d\n", cycle);
|
|
1714
|
|
1715 }
|
|
1716
|
|
1717 return success;
|
|
1718 }
|
|
1719
|
|
1720 /* This function implements the scheduling algorithm for SMS according to the
|
|
1721 above algorithm. */
|
|
1722 static partial_schedule_ptr
|
|
1723 sms_schedule_by_order (ddg_ptr g, int mii, int maxii, int *nodes_order)
|
|
1724 {
|
|
1725 int ii = mii;
|
|
1726 int i, c, success, num_splits = 0;
|
|
1727 int flush_and_start_over = true;
|
|
1728 int num_nodes = g->num_nodes;
|
|
1729 int start, end, step; /* Place together into one struct? */
|
|
1730 sbitmap sched_nodes = sbitmap_alloc (num_nodes);
|
|
1731 sbitmap must_precede = sbitmap_alloc (num_nodes);
|
|
1732 sbitmap must_follow = sbitmap_alloc (num_nodes);
|
|
1733 sbitmap tobe_scheduled = sbitmap_alloc (num_nodes);
|
|
1734
|
|
1735 partial_schedule_ptr ps = create_partial_schedule (ii, g, DFA_HISTORY);
|
|
1736
|
|
1737 sbitmap_ones (tobe_scheduled);
|
|
1738 sbitmap_zero (sched_nodes);
|
|
1739
|
|
1740 while (flush_and_start_over && (ii < maxii))
|
|
1741 {
|
|
1742
|
|
1743 if (dump_file)
|
|
1744 fprintf (dump_file, "Starting with ii=%d\n", ii);
|
|
1745 flush_and_start_over = false;
|
|
1746 sbitmap_zero (sched_nodes);
|
|
1747
|
|
1748 for (i = 0; i < num_nodes; i++)
|
|
1749 {
|
|
1750 int u = nodes_order[i];
|
|
1751 ddg_node_ptr u_node = &ps->g->nodes[u];
|
|
1752 rtx insn = u_node->insn;
|
|
1753
|
|
1754 if (!INSN_P (insn))
|
|
1755 {
|
|
1756 RESET_BIT (tobe_scheduled, u);
|
|
1757 continue;
|
|
1758 }
|
|
1759
|
|
1760 if (JUMP_P (insn)) /* Closing branch handled later. */
|
|
1761 {
|
|
1762 RESET_BIT (tobe_scheduled, u);
|
|
1763 continue;
|
|
1764 }
|
|
1765
|
|
1766 if (TEST_BIT (sched_nodes, u))
|
|
1767 continue;
|
|
1768
|
|
1769 /* Try to get non-empty scheduling window. */
|
|
1770 success = 0;
|
|
1771 if (get_sched_window (ps, nodes_order, i, sched_nodes, ii, &start,
|
|
1772 &step, &end) == 0)
|
|
1773 {
|
|
1774 if (dump_file)
|
|
1775 fprintf (dump_file, "\nTrying to schedule node %d \
|
|
1776 INSN = %d in (%d .. %d) step %d\n", u, (INSN_UID
|
|
1777 (g->nodes[u].insn)), start, end, step);
|
|
1778
|
|
1779 gcc_assert ((step > 0 && start < end)
|
|
1780 || (step < 0 && start > end));
|
|
1781
|
|
1782 calculate_must_precede_follow (u_node, start, end, step, ii,
|
|
1783 sched_nodes, must_precede,
|
|
1784 must_follow);
|
|
1785
|
|
1786 for (c = start; c != end; c += step)
|
|
1787 {
|
|
1788 sbitmap tmp_precede = NULL;
|
|
1789 sbitmap tmp_follow = NULL;
|
|
1790
|
|
1791 if (c == start)
|
|
1792 {
|
|
1793 if (step == 1)
|
|
1794 tmp_precede = must_precede;
|
|
1795 else /* step == -1. */
|
|
1796 tmp_follow = must_follow;
|
|
1797 }
|
|
1798 if (c == end - step)
|
|
1799 {
|
|
1800 if (step == 1)
|
|
1801 tmp_follow = must_follow;
|
|
1802 else /* step == -1. */
|
|
1803 tmp_precede = must_precede;
|
|
1804 }
|
|
1805
|
|
1806 success =
|
|
1807 try_scheduling_node_in_cycle (ps, u_node, u, c,
|
|
1808 sched_nodes,
|
|
1809 &num_splits, tmp_precede,
|
|
1810 tmp_follow);
|
|
1811 if (success)
|
|
1812 break;
|
|
1813 }
|
|
1814
|
|
1815 verify_partial_schedule (ps, sched_nodes);
|
|
1816 }
|
|
1817 if (!success)
|
|
1818 {
|
|
1819 int split_row;
|
|
1820
|
|
1821 if (ii++ == maxii)
|
|
1822 break;
|
|
1823
|
|
1824 if (num_splits >= MAX_SPLIT_NUM)
|
|
1825 {
|
|
1826 num_splits = 0;
|
|
1827 flush_and_start_over = true;
|
|
1828 verify_partial_schedule (ps, sched_nodes);
|
|
1829 reset_partial_schedule (ps, ii);
|
|
1830 verify_partial_schedule (ps, sched_nodes);
|
|
1831 break;
|
|
1832 }
|
|
1833
|
|
1834 num_splits++;
|
|
1835 if (step == 1)
|
|
1836 split_row = compute_split_row (sched_nodes, start, end,
|
|
1837 ps->ii, u_node);
|
|
1838 else
|
|
1839 split_row = compute_split_row (sched_nodes, end, start,
|
|
1840 ps->ii, u_node);
|
|
1841
|
|
1842 ps_insert_empty_row (ps, split_row, sched_nodes);
|
|
1843 i--; /* Go back and retry node i. */
|
|
1844
|
|
1845 if (dump_file)
|
|
1846 fprintf (dump_file, "num_splits=%d\n", num_splits);
|
|
1847 }
|
|
1848
|
|
1849 /* ??? If (success), check register pressure estimates. */
|
|
1850 } /* Continue with next node. */
|
|
1851 } /* While flush_and_start_over. */
|
|
1852 if (ii >= maxii)
|
|
1853 {
|
|
1854 free_partial_schedule (ps);
|
|
1855 ps = NULL;
|
|
1856 }
|
|
1857 else
|
|
1858 gcc_assert (sbitmap_equal (tobe_scheduled, sched_nodes));
|
|
1859
|
|
1860 sbitmap_free (sched_nodes);
|
|
1861 sbitmap_free (must_precede);
|
|
1862 sbitmap_free (must_follow);
|
|
1863 sbitmap_free (tobe_scheduled);
|
|
1864
|
|
1865 return ps;
|
|
1866 }
|
|
1867
|
|
1868 /* This function inserts a new empty row into PS at the position
|
|
1869 according to SPLITROW, keeping all already scheduled instructions
|
|
1870 intact and updating their SCHED_TIME and cycle accordingly. */
|
|
1871 static void
|
|
1872 ps_insert_empty_row (partial_schedule_ptr ps, int split_row,
|
|
1873 sbitmap sched_nodes)
|
|
1874 {
|
|
1875 ps_insn_ptr crr_insn;
|
|
1876 ps_insn_ptr *rows_new;
|
|
1877 int ii = ps->ii;
|
|
1878 int new_ii = ii + 1;
|
|
1879 int row;
|
|
1880
|
|
1881 verify_partial_schedule (ps, sched_nodes);
|
|
1882
|
|
1883 /* We normalize sched_time and rotate ps to have only non-negative sched
|
|
1884 times, for simplicity of updating cycles after inserting new row. */
|
|
1885 split_row -= ps->min_cycle;
|
|
1886 split_row = SMODULO (split_row, ii);
|
|
1887 if (dump_file)
|
|
1888 fprintf (dump_file, "split_row=%d\n", split_row);
|
|
1889
|
|
1890 normalize_sched_times (ps);
|
|
1891 rotate_partial_schedule (ps, ps->min_cycle);
|
|
1892
|
|
1893 rows_new = (ps_insn_ptr *) xcalloc (new_ii, sizeof (ps_insn_ptr));
|
|
1894 for (row = 0; row < split_row; row++)
|
|
1895 {
|
|
1896 rows_new[row] = ps->rows[row];
|
|
1897 ps->rows[row] = NULL;
|
|
1898 for (crr_insn = rows_new[row];
|
|
1899 crr_insn; crr_insn = crr_insn->next_in_row)
|
|
1900 {
|
|
1901 ddg_node_ptr u = crr_insn->node;
|
|
1902 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii);
|
|
1903
|
|
1904 SCHED_TIME (u) = new_time;
|
|
1905 crr_insn->cycle = new_time;
|
|
1906 SCHED_ROW (u) = new_time % new_ii;
|
|
1907 SCHED_STAGE (u) = new_time / new_ii;
|
|
1908 }
|
|
1909
|
|
1910 }
|
|
1911
|
|
1912 rows_new[split_row] = NULL;
|
|
1913
|
|
1914 for (row = split_row; row < ii; row++)
|
|
1915 {
|
|
1916 rows_new[row + 1] = ps->rows[row];
|
|
1917 ps->rows[row] = NULL;
|
|
1918 for (crr_insn = rows_new[row + 1];
|
|
1919 crr_insn; crr_insn = crr_insn->next_in_row)
|
|
1920 {
|
|
1921 ddg_node_ptr u = crr_insn->node;
|
|
1922 int new_time = SCHED_TIME (u) + (SCHED_TIME (u) / ii) + 1;
|
|
1923
|
|
1924 SCHED_TIME (u) = new_time;
|
|
1925 crr_insn->cycle = new_time;
|
|
1926 SCHED_ROW (u) = new_time % new_ii;
|
|
1927 SCHED_STAGE (u) = new_time / new_ii;
|
|
1928 }
|
|
1929 }
|
|
1930
|
|
1931 /* Updating ps. */
|
|
1932 ps->min_cycle = ps->min_cycle + ps->min_cycle / ii
|
|
1933 + (SMODULO (ps->min_cycle, ii) >= split_row ? 1 : 0);
|
|
1934 ps->max_cycle = ps->max_cycle + ps->max_cycle / ii
|
|
1935 + (SMODULO (ps->max_cycle, ii) >= split_row ? 1 : 0);
|
|
1936 free (ps->rows);
|
|
1937 ps->rows = rows_new;
|
|
1938 ps->ii = new_ii;
|
|
1939 gcc_assert (ps->min_cycle >= 0);
|
|
1940
|
|
1941 verify_partial_schedule (ps, sched_nodes);
|
|
1942
|
|
1943 if (dump_file)
|
|
1944 fprintf (dump_file, "min_cycle=%d, max_cycle=%d\n", ps->min_cycle,
|
|
1945 ps->max_cycle);
|
|
1946 }
|
|
1947
|
|
1948 /* Given U_NODE which is the node that failed to be scheduled; LOW and
|
|
1949 UP which are the boundaries of it's scheduling window; compute using
|
|
1950 SCHED_NODES and II a row in the partial schedule that can be split
|
|
1951 which will separate a critical predecessor from a critical successor
|
|
1952 thereby expanding the window, and return it. */
|
|
1953 static int
|
|
1954 compute_split_row (sbitmap sched_nodes, int low, int up, int ii,
|
|
1955 ddg_node_ptr u_node)
|
|
1956 {
|
|
1957 ddg_edge_ptr e;
|
|
1958 int lower = INT_MIN, upper = INT_MAX;
|
|
1959 ddg_node_ptr crit_pred = NULL;
|
|
1960 ddg_node_ptr crit_succ = NULL;
|
|
1961 int crit_cycle;
|
|
1962
|
|
1963 for (e = u_node->in; e != 0; e = e->next_in)
|
|
1964 {
|
|
1965 ddg_node_ptr v_node = e->src;
|
|
1966
|
|
1967 if (TEST_BIT (sched_nodes, v_node->cuid)
|
|
1968 && (low == SCHED_TIME (v_node) + e->latency - (e->distance * ii)))
|
|
1969 if (SCHED_TIME (v_node) > lower)
|
|
1970 {
|
|
1971 crit_pred = v_node;
|
|
1972 lower = SCHED_TIME (v_node);
|
|
1973 }
|
|
1974 }
|
|
1975
|
|
1976 if (crit_pred != NULL)
|
|
1977 {
|
|
1978 crit_cycle = SCHED_TIME (crit_pred) + 1;
|
|
1979 return SMODULO (crit_cycle, ii);
|
|
1980 }
|
|
1981
|
|
1982 for (e = u_node->out; e != 0; e = e->next_out)
|
|
1983 {
|
|
1984 ddg_node_ptr v_node = e->dest;
|
|
1985 if (TEST_BIT (sched_nodes, v_node->cuid)
|
|
1986 && (up == SCHED_TIME (v_node) - e->latency + (e->distance * ii)))
|
|
1987 if (SCHED_TIME (v_node) < upper)
|
|
1988 {
|
|
1989 crit_succ = v_node;
|
|
1990 upper = SCHED_TIME (v_node);
|
|
1991 }
|
|
1992 }
|
|
1993
|
|
1994 if (crit_succ != NULL)
|
|
1995 {
|
|
1996 crit_cycle = SCHED_TIME (crit_succ);
|
|
1997 return SMODULO (crit_cycle, ii);
|
|
1998 }
|
|
1999
|
|
2000 if (dump_file)
|
|
2001 fprintf (dump_file, "Both crit_pred and crit_succ are NULL\n");
|
|
2002
|
|
2003 return SMODULO ((low + up + 1) / 2, ii);
|
|
2004 }
|
|
2005
|
|
2006 static void
|
|
2007 verify_partial_schedule (partial_schedule_ptr ps, sbitmap sched_nodes)
|
|
2008 {
|
|
2009 int row;
|
|
2010 ps_insn_ptr crr_insn;
|
|
2011
|
|
2012 for (row = 0; row < ps->ii; row++)
|
|
2013 for (crr_insn = ps->rows[row]; crr_insn; crr_insn = crr_insn->next_in_row)
|
|
2014 {
|
|
2015 ddg_node_ptr u = crr_insn->node;
|
|
2016
|
|
2017 gcc_assert (TEST_BIT (sched_nodes, u->cuid));
|
|
2018 /* ??? Test also that all nodes of sched_nodes are in ps, perhaps by
|
|
2019 popcount (sched_nodes) == number of insns in ps. */
|
|
2020 gcc_assert (SCHED_TIME (u) >= ps->min_cycle);
|
|
2021 gcc_assert (SCHED_TIME (u) <= ps->max_cycle);
|
|
2022 }
|
|
2023 }
|
|
2024
|
|
2025
|
|
2026 /* This page implements the algorithm for ordering the nodes of a DDG
|
|
2027 for modulo scheduling, activated through the
|
|
2028 "int sms_order_nodes (ddg_ptr, int mii, int * result)" API. */
|
|
2029
|
|
2030 #define ORDER_PARAMS(x) ((struct node_order_params *) (x)->aux.info)
|
|
2031 #define ASAP(x) (ORDER_PARAMS ((x))->asap)
|
|
2032 #define ALAP(x) (ORDER_PARAMS ((x))->alap)
|
|
2033 #define HEIGHT(x) (ORDER_PARAMS ((x))->height)
|
|
2034 #define MOB(x) (ALAP ((x)) - ASAP ((x)))
|
|
2035 #define DEPTH(x) (ASAP ((x)))
|
|
2036
|
|
2037 typedef struct node_order_params * nopa;
|
|
2038
|
|
2039 static void order_nodes_of_sccs (ddg_all_sccs_ptr, int * result);
|
|
2040 static int order_nodes_in_scc (ddg_ptr, sbitmap, sbitmap, int*, int);
|
|
2041 static nopa calculate_order_params (ddg_ptr, int, int *);
|
|
2042 static int find_max_asap (ddg_ptr, sbitmap);
|
|
2043 static int find_max_hv_min_mob (ddg_ptr, sbitmap);
|
|
2044 static int find_max_dv_min_mob (ddg_ptr, sbitmap);
|
|
2045
|
|
2046 enum sms_direction {BOTTOMUP, TOPDOWN};
|
|
2047
|
|
2048 struct node_order_params
|
|
2049 {
|
|
2050 int asap;
|
|
2051 int alap;
|
|
2052 int height;
|
|
2053 };
|
|
2054
|
|
2055 /* Check if NODE_ORDER contains a permutation of 0 .. NUM_NODES-1. */
|
|
2056 static void
|
|
2057 check_nodes_order (int *node_order, int num_nodes)
|
|
2058 {
|
|
2059 int i;
|
|
2060 sbitmap tmp = sbitmap_alloc (num_nodes);
|
|
2061
|
|
2062 sbitmap_zero (tmp);
|
|
2063
|
|
2064 if (dump_file)
|
|
2065 fprintf (dump_file, "SMS final nodes order: \n");
|
|
2066
|
|
2067 for (i = 0; i < num_nodes; i++)
|
|
2068 {
|
|
2069 int u = node_order[i];
|
|
2070
|
|
2071 if (dump_file)
|
|
2072 fprintf (dump_file, "%d ", u);
|
|
2073 gcc_assert (u < num_nodes && u >= 0 && !TEST_BIT (tmp, u));
|
|
2074
|
|
2075 SET_BIT (tmp, u);
|
|
2076 }
|
|
2077
|
|
2078 if (dump_file)
|
|
2079 fprintf (dump_file, "\n");
|
|
2080
|
|
2081 sbitmap_free (tmp);
|
|
2082 }
|
|
2083
|
|
2084 /* Order the nodes of G for scheduling and pass the result in
|
|
2085 NODE_ORDER. Also set aux.count of each node to ASAP.
|
|
2086 Put maximal ASAP to PMAX_ASAP. Return the recMII for the given DDG. */
|
|
2087 static int
|
|
2088 sms_order_nodes (ddg_ptr g, int mii, int * node_order, int *pmax_asap)
|
|
2089 {
|
|
2090 int i;
|
|
2091 int rec_mii = 0;
|
|
2092 ddg_all_sccs_ptr sccs = create_ddg_all_sccs (g);
|
|
2093
|
|
2094 nopa nops = calculate_order_params (g, mii, pmax_asap);
|
|
2095
|
|
2096 if (dump_file)
|
|
2097 print_sccs (dump_file, sccs, g);
|
|
2098
|
|
2099 order_nodes_of_sccs (sccs, node_order);
|
|
2100
|
|
2101 if (sccs->num_sccs > 0)
|
|
2102 /* First SCC has the largest recurrence_length. */
|
|
2103 rec_mii = sccs->sccs[0]->recurrence_length;
|
|
2104
|
|
2105 /* Save ASAP before destroying node_order_params. */
|
|
2106 for (i = 0; i < g->num_nodes; i++)
|
|
2107 {
|
|
2108 ddg_node_ptr v = &g->nodes[i];
|
|
2109 v->aux.count = ASAP (v);
|
|
2110 }
|
|
2111
|
|
2112 free (nops);
|
|
2113 free_ddg_all_sccs (sccs);
|
|
2114 check_nodes_order (node_order, g->num_nodes);
|
|
2115
|
|
2116 return rec_mii;
|
|
2117 }
|
|
2118
|
|
2119 static void
|
|
2120 order_nodes_of_sccs (ddg_all_sccs_ptr all_sccs, int * node_order)
|
|
2121 {
|
|
2122 int i, pos = 0;
|
|
2123 ddg_ptr g = all_sccs->ddg;
|
|
2124 int num_nodes = g->num_nodes;
|
|
2125 sbitmap prev_sccs = sbitmap_alloc (num_nodes);
|
|
2126 sbitmap on_path = sbitmap_alloc (num_nodes);
|
|
2127 sbitmap tmp = sbitmap_alloc (num_nodes);
|
|
2128 sbitmap ones = sbitmap_alloc (num_nodes);
|
|
2129
|
|
2130 sbitmap_zero (prev_sccs);
|
|
2131 sbitmap_ones (ones);
|
|
2132
|
|
2133 /* Perform the node ordering starting from the SCC with the highest recMII.
|
|
2134 For each SCC order the nodes according to their ASAP/ALAP/HEIGHT etc. */
|
|
2135 for (i = 0; i < all_sccs->num_sccs; i++)
|
|
2136 {
|
|
2137 ddg_scc_ptr scc = all_sccs->sccs[i];
|
|
2138
|
|
2139 /* Add nodes on paths from previous SCCs to the current SCC. */
|
|
2140 find_nodes_on_paths (on_path, g, prev_sccs, scc->nodes);
|
|
2141 sbitmap_a_or_b (tmp, scc->nodes, on_path);
|
|
2142
|
|
2143 /* Add nodes on paths from the current SCC to previous SCCs. */
|
|
2144 find_nodes_on_paths (on_path, g, scc->nodes, prev_sccs);
|
|
2145 sbitmap_a_or_b (tmp, tmp, on_path);
|
|
2146
|
|
2147 /* Remove nodes of previous SCCs from current extended SCC. */
|
|
2148 sbitmap_difference (tmp, tmp, prev_sccs);
|
|
2149
|
|
2150 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
|
|
2151 /* Above call to order_nodes_in_scc updated prev_sccs |= tmp. */
|
|
2152 }
|
|
2153
|
|
2154 /* Handle the remaining nodes that do not belong to any scc. Each call
|
|
2155 to order_nodes_in_scc handles a single connected component. */
|
|
2156 while (pos < g->num_nodes)
|
|
2157 {
|
|
2158 sbitmap_difference (tmp, ones, prev_sccs);
|
|
2159 pos = order_nodes_in_scc (g, prev_sccs, tmp, node_order, pos);
|
|
2160 }
|
|
2161 sbitmap_free (prev_sccs);
|
|
2162 sbitmap_free (on_path);
|
|
2163 sbitmap_free (tmp);
|
|
2164 sbitmap_free (ones);
|
|
2165 }
|
|
2166
|
|
2167 /* MII is needed if we consider backarcs (that do not close recursive cycles). */
|
|
2168 static struct node_order_params *
|
|
2169 calculate_order_params (ddg_ptr g, int mii ATTRIBUTE_UNUSED, int *pmax_asap)
|
|
2170 {
|
|
2171 int u;
|
|
2172 int max_asap;
|
|
2173 int num_nodes = g->num_nodes;
|
|
2174 ddg_edge_ptr e;
|
|
2175 /* Allocate a place to hold ordering params for each node in the DDG. */
|
|
2176 nopa node_order_params_arr;
|
|
2177
|
|
2178 /* Initialize of ASAP/ALAP/HEIGHT to zero. */
|
|
2179 node_order_params_arr = (nopa) xcalloc (num_nodes,
|
|
2180 sizeof (struct node_order_params));
|
|
2181
|
|
2182 /* Set the aux pointer of each node to point to its order_params structure. */
|
|
2183 for (u = 0; u < num_nodes; u++)
|
|
2184 g->nodes[u].aux.info = &node_order_params_arr[u];
|
|
2185
|
|
2186 /* Disregarding a backarc from each recursive cycle to obtain a DAG,
|
|
2187 calculate ASAP, ALAP, mobility, distance, and height for each node
|
|
2188 in the dependence (direct acyclic) graph. */
|
|
2189
|
|
2190 /* We assume that the nodes in the array are in topological order. */
|
|
2191
|
|
2192 max_asap = 0;
|
|
2193 for (u = 0; u < num_nodes; u++)
|
|
2194 {
|
|
2195 ddg_node_ptr u_node = &g->nodes[u];
|
|
2196
|
|
2197 ASAP (u_node) = 0;
|
|
2198 for (e = u_node->in; e; e = e->next_in)
|
|
2199 if (e->distance == 0)
|
|
2200 ASAP (u_node) = MAX (ASAP (u_node),
|
|
2201 ASAP (e->src) + e->latency);
|
|
2202 max_asap = MAX (max_asap, ASAP (u_node));
|
|
2203 }
|
|
2204
|
|
2205 for (u = num_nodes - 1; u > -1; u--)
|
|
2206 {
|
|
2207 ddg_node_ptr u_node = &g->nodes[u];
|
|
2208
|
|
2209 ALAP (u_node) = max_asap;
|
|
2210 HEIGHT (u_node) = 0;
|
|
2211 for (e = u_node->out; e; e = e->next_out)
|
|
2212 if (e->distance == 0)
|
|
2213 {
|
|
2214 ALAP (u_node) = MIN (ALAP (u_node),
|
|
2215 ALAP (e->dest) - e->latency);
|
|
2216 HEIGHT (u_node) = MAX (HEIGHT (u_node),
|
|
2217 HEIGHT (e->dest) + e->latency);
|
|
2218 }
|
|
2219 }
|
|
2220 if (dump_file)
|
|
2221 {
|
|
2222 fprintf (dump_file, "\nOrder params\n");
|
|
2223 for (u = 0; u < num_nodes; u++)
|
|
2224 {
|
|
2225 ddg_node_ptr u_node = &g->nodes[u];
|
|
2226
|
|
2227 fprintf (dump_file, "node %d, ASAP: %d, ALAP: %d, HEIGHT: %d\n", u,
|
|
2228 ASAP (u_node), ALAP (u_node), HEIGHT (u_node));
|
|
2229 }
|
|
2230 }
|
|
2231
|
|
2232 *pmax_asap = max_asap;
|
|
2233 return node_order_params_arr;
|
|
2234 }
|
|
2235
|
|
2236 static int
|
|
2237 find_max_asap (ddg_ptr g, sbitmap nodes)
|
|
2238 {
|
|
2239 unsigned int u = 0;
|
|
2240 int max_asap = -1;
|
|
2241 int result = -1;
|
|
2242 sbitmap_iterator sbi;
|
|
2243
|
|
2244 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
|
|
2245 {
|
|
2246 ddg_node_ptr u_node = &g->nodes[u];
|
|
2247
|
|
2248 if (max_asap < ASAP (u_node))
|
|
2249 {
|
|
2250 max_asap = ASAP (u_node);
|
|
2251 result = u;
|
|
2252 }
|
|
2253 }
|
|
2254 return result;
|
|
2255 }
|
|
2256
|
|
2257 static int
|
|
2258 find_max_hv_min_mob (ddg_ptr g, sbitmap nodes)
|
|
2259 {
|
|
2260 unsigned int u = 0;
|
|
2261 int max_hv = -1;
|
|
2262 int min_mob = INT_MAX;
|
|
2263 int result = -1;
|
|
2264 sbitmap_iterator sbi;
|
|
2265
|
|
2266 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
|
|
2267 {
|
|
2268 ddg_node_ptr u_node = &g->nodes[u];
|
|
2269
|
|
2270 if (max_hv < HEIGHT (u_node))
|
|
2271 {
|
|
2272 max_hv = HEIGHT (u_node);
|
|
2273 min_mob = MOB (u_node);
|
|
2274 result = u;
|
|
2275 }
|
|
2276 else if ((max_hv == HEIGHT (u_node))
|
|
2277 && (min_mob > MOB (u_node)))
|
|
2278 {
|
|
2279 min_mob = MOB (u_node);
|
|
2280 result = u;
|
|
2281 }
|
|
2282 }
|
|
2283 return result;
|
|
2284 }
|
|
2285
|
|
2286 static int
|
|
2287 find_max_dv_min_mob (ddg_ptr g, sbitmap nodes)
|
|
2288 {
|
|
2289 unsigned int u = 0;
|
|
2290 int max_dv = -1;
|
|
2291 int min_mob = INT_MAX;
|
|
2292 int result = -1;
|
|
2293 sbitmap_iterator sbi;
|
|
2294
|
|
2295 EXECUTE_IF_SET_IN_SBITMAP (nodes, 0, u, sbi)
|
|
2296 {
|
|
2297 ddg_node_ptr u_node = &g->nodes[u];
|
|
2298
|
|
2299 if (max_dv < DEPTH (u_node))
|
|
2300 {
|
|
2301 max_dv = DEPTH (u_node);
|
|
2302 min_mob = MOB (u_node);
|
|
2303 result = u;
|
|
2304 }
|
|
2305 else if ((max_dv == DEPTH (u_node))
|
|
2306 && (min_mob > MOB (u_node)))
|
|
2307 {
|
|
2308 min_mob = MOB (u_node);
|
|
2309 result = u;
|
|
2310 }
|
|
2311 }
|
|
2312 return result;
|
|
2313 }
|
|
2314
|
|
2315 /* Places the nodes of SCC into the NODE_ORDER array starting
|
|
2316 at position POS, according to the SMS ordering algorithm.
|
|
2317 NODES_ORDERED (in&out parameter) holds the bitset of all nodes in
|
|
2318 the NODE_ORDER array, starting from position zero. */
|
|
2319 static int
|
|
2320 order_nodes_in_scc (ddg_ptr g, sbitmap nodes_ordered, sbitmap scc,
|
|
2321 int * node_order, int pos)
|
|
2322 {
|
|
2323 enum sms_direction dir;
|
|
2324 int num_nodes = g->num_nodes;
|
|
2325 sbitmap workset = sbitmap_alloc (num_nodes);
|
|
2326 sbitmap tmp = sbitmap_alloc (num_nodes);
|
|
2327 sbitmap zero_bitmap = sbitmap_alloc (num_nodes);
|
|
2328 sbitmap predecessors = sbitmap_alloc (num_nodes);
|
|
2329 sbitmap successors = sbitmap_alloc (num_nodes);
|
|
2330
|
|
2331 sbitmap_zero (predecessors);
|
|
2332 find_predecessors (predecessors, g, nodes_ordered);
|
|
2333
|
|
2334 sbitmap_zero (successors);
|
|
2335 find_successors (successors, g, nodes_ordered);
|
|
2336
|
|
2337 sbitmap_zero (tmp);
|
|
2338 if (sbitmap_a_and_b_cg (tmp, predecessors, scc))
|
|
2339 {
|
|
2340 sbitmap_copy (workset, tmp);
|
|
2341 dir = BOTTOMUP;
|
|
2342 }
|
|
2343 else if (sbitmap_a_and_b_cg (tmp, successors, scc))
|
|
2344 {
|
|
2345 sbitmap_copy (workset, tmp);
|
|
2346 dir = TOPDOWN;
|
|
2347 }
|
|
2348 else
|
|
2349 {
|
|
2350 int u;
|
|
2351
|
|
2352 sbitmap_zero (workset);
|
|
2353 if ((u = find_max_asap (g, scc)) >= 0)
|
|
2354 SET_BIT (workset, u);
|
|
2355 dir = BOTTOMUP;
|
|
2356 }
|
|
2357
|
|
2358 sbitmap_zero (zero_bitmap);
|
|
2359 while (!sbitmap_equal (workset, zero_bitmap))
|
|
2360 {
|
|
2361 int v;
|
|
2362 ddg_node_ptr v_node;
|
|
2363 sbitmap v_node_preds;
|
|
2364 sbitmap v_node_succs;
|
|
2365
|
|
2366 if (dir == TOPDOWN)
|
|
2367 {
|
|
2368 while (!sbitmap_equal (workset, zero_bitmap))
|
|
2369 {
|
|
2370 v = find_max_hv_min_mob (g, workset);
|
|
2371 v_node = &g->nodes[v];
|
|
2372 node_order[pos++] = v;
|
|
2373 v_node_succs = NODE_SUCCESSORS (v_node);
|
|
2374 sbitmap_a_and_b (tmp, v_node_succs, scc);
|
|
2375
|
|
2376 /* Don't consider the already ordered successors again. */
|
|
2377 sbitmap_difference (tmp, tmp, nodes_ordered);
|
|
2378 sbitmap_a_or_b (workset, workset, tmp);
|
|
2379 RESET_BIT (workset, v);
|
|
2380 SET_BIT (nodes_ordered, v);
|
|
2381 }
|
|
2382 dir = BOTTOMUP;
|
|
2383 sbitmap_zero (predecessors);
|
|
2384 find_predecessors (predecessors, g, nodes_ordered);
|
|
2385 sbitmap_a_and_b (workset, predecessors, scc);
|
|
2386 }
|
|
2387 else
|
|
2388 {
|
|
2389 while (!sbitmap_equal (workset, zero_bitmap))
|
|
2390 {
|
|
2391 v = find_max_dv_min_mob (g, workset);
|
|
2392 v_node = &g->nodes[v];
|
|
2393 node_order[pos++] = v;
|
|
2394 v_node_preds = NODE_PREDECESSORS (v_node);
|
|
2395 sbitmap_a_and_b (tmp, v_node_preds, scc);
|
|
2396
|
|
2397 /* Don't consider the already ordered predecessors again. */
|
|
2398 sbitmap_difference (tmp, tmp, nodes_ordered);
|
|
2399 sbitmap_a_or_b (workset, workset, tmp);
|
|
2400 RESET_BIT (workset, v);
|
|
2401 SET_BIT (nodes_ordered, v);
|
|
2402 }
|
|
2403 dir = TOPDOWN;
|
|
2404 sbitmap_zero (successors);
|
|
2405 find_successors (successors, g, nodes_ordered);
|
|
2406 sbitmap_a_and_b (workset, successors, scc);
|
|
2407 }
|
|
2408 }
|
|
2409 sbitmap_free (tmp);
|
|
2410 sbitmap_free (workset);
|
|
2411 sbitmap_free (zero_bitmap);
|
|
2412 sbitmap_free (predecessors);
|
|
2413 sbitmap_free (successors);
|
|
2414 return pos;
|
|
2415 }
|
|
2416
|
|
2417
|
|
2418 /* This page contains functions for manipulating partial-schedules during
|
|
2419 modulo scheduling. */
|
|
2420
|
|
2421 /* Create a partial schedule and allocate a memory to hold II rows. */
|
|
2422
|
|
2423 static partial_schedule_ptr
|
|
2424 create_partial_schedule (int ii, ddg_ptr g, int history)
|
|
2425 {
|
|
2426 partial_schedule_ptr ps = XNEW (struct partial_schedule);
|
|
2427 ps->rows = (ps_insn_ptr *) xcalloc (ii, sizeof (ps_insn_ptr));
|
|
2428 ps->ii = ii;
|
|
2429 ps->history = history;
|
|
2430 ps->min_cycle = INT_MAX;
|
|
2431 ps->max_cycle = INT_MIN;
|
|
2432 ps->g = g;
|
|
2433
|
|
2434 return ps;
|
|
2435 }
|
|
2436
|
|
2437 /* Free the PS_INSNs in rows array of the given partial schedule.
|
|
2438 ??? Consider caching the PS_INSN's. */
|
|
2439 static void
|
|
2440 free_ps_insns (partial_schedule_ptr ps)
|
|
2441 {
|
|
2442 int i;
|
|
2443
|
|
2444 for (i = 0; i < ps->ii; i++)
|
|
2445 {
|
|
2446 while (ps->rows[i])
|
|
2447 {
|
|
2448 ps_insn_ptr ps_insn = ps->rows[i]->next_in_row;
|
|
2449
|
|
2450 free (ps->rows[i]);
|
|
2451 ps->rows[i] = ps_insn;
|
|
2452 }
|
|
2453 ps->rows[i] = NULL;
|
|
2454 }
|
|
2455 }
|
|
2456
|
|
2457 /* Free all the memory allocated to the partial schedule. */
|
|
2458
|
|
2459 static void
|
|
2460 free_partial_schedule (partial_schedule_ptr ps)
|
|
2461 {
|
|
2462 if (!ps)
|
|
2463 return;
|
|
2464 free_ps_insns (ps);
|
|
2465 free (ps->rows);
|
|
2466 free (ps);
|
|
2467 }
|
|
2468
|
|
2469 /* Clear the rows array with its PS_INSNs, and create a new one with
|
|
2470 NEW_II rows. */
|
|
2471
|
|
2472 static void
|
|
2473 reset_partial_schedule (partial_schedule_ptr ps, int new_ii)
|
|
2474 {
|
|
2475 if (!ps)
|
|
2476 return;
|
|
2477 free_ps_insns (ps);
|
|
2478 if (new_ii == ps->ii)
|
|
2479 return;
|
|
2480 ps->rows = (ps_insn_ptr *) xrealloc (ps->rows, new_ii
|
|
2481 * sizeof (ps_insn_ptr));
|
|
2482 memset (ps->rows, 0, new_ii * sizeof (ps_insn_ptr));
|
|
2483 ps->ii = new_ii;
|
|
2484 ps->min_cycle = INT_MAX;
|
|
2485 ps->max_cycle = INT_MIN;
|
|
2486 }
|
|
2487
|
|
2488 /* Prints the partial schedule as an ii rows array, for each rows
|
|
2489 print the ids of the insns in it. */
|
|
2490 void
|
|
2491 print_partial_schedule (partial_schedule_ptr ps, FILE *dump)
|
|
2492 {
|
|
2493 int i;
|
|
2494
|
|
2495 for (i = 0; i < ps->ii; i++)
|
|
2496 {
|
|
2497 ps_insn_ptr ps_i = ps->rows[i];
|
|
2498
|
|
2499 fprintf (dump, "\n[ROW %d ]: ", i);
|
|
2500 while (ps_i)
|
|
2501 {
|
|
2502 fprintf (dump, "%d, ",
|
|
2503 INSN_UID (ps_i->node->insn));
|
|
2504 ps_i = ps_i->next_in_row;
|
|
2505 }
|
|
2506 }
|
|
2507 }
|
|
2508
|
|
2509 /* Creates an object of PS_INSN and initializes it to the given parameters. */
|
|
2510 static ps_insn_ptr
|
|
2511 create_ps_insn (ddg_node_ptr node, int rest_count, int cycle)
|
|
2512 {
|
|
2513 ps_insn_ptr ps_i = XNEW (struct ps_insn);
|
|
2514
|
|
2515 ps_i->node = node;
|
|
2516 ps_i->next_in_row = NULL;
|
|
2517 ps_i->prev_in_row = NULL;
|
|
2518 ps_i->row_rest_count = rest_count;
|
|
2519 ps_i->cycle = cycle;
|
|
2520
|
|
2521 return ps_i;
|
|
2522 }
|
|
2523
|
|
2524
|
|
2525 /* Removes the given PS_INSN from the partial schedule. Returns false if the
|
|
2526 node is not found in the partial schedule, else returns true. */
|
|
2527 static bool
|
|
2528 remove_node_from_ps (partial_schedule_ptr ps, ps_insn_ptr ps_i)
|
|
2529 {
|
|
2530 int row;
|
|
2531
|
|
2532 if (!ps || !ps_i)
|
|
2533 return false;
|
|
2534
|
|
2535 row = SMODULO (ps_i->cycle, ps->ii);
|
|
2536 if (! ps_i->prev_in_row)
|
|
2537 {
|
|
2538 if (ps_i != ps->rows[row])
|
|
2539 return false;
|
|
2540
|
|
2541 ps->rows[row] = ps_i->next_in_row;
|
|
2542 if (ps->rows[row])
|
|
2543 ps->rows[row]->prev_in_row = NULL;
|
|
2544 }
|
|
2545 else
|
|
2546 {
|
|
2547 ps_i->prev_in_row->next_in_row = ps_i->next_in_row;
|
|
2548 if (ps_i->next_in_row)
|
|
2549 ps_i->next_in_row->prev_in_row = ps_i->prev_in_row;
|
|
2550 }
|
|
2551 free (ps_i);
|
|
2552 return true;
|
|
2553 }
|
|
2554
|
|
2555 /* Unlike what literature describes for modulo scheduling (which focuses
|
|
2556 on VLIW machines) the order of the instructions inside a cycle is
|
|
2557 important. Given the bitmaps MUST_FOLLOW and MUST_PRECEDE we know
|
|
2558 where the current instruction should go relative to the already
|
|
2559 scheduled instructions in the given cycle. Go over these
|
|
2560 instructions and find the first possible column to put it in. */
|
|
2561 static bool
|
|
2562 ps_insn_find_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
|
|
2563 sbitmap must_precede, sbitmap must_follow)
|
|
2564 {
|
|
2565 ps_insn_ptr next_ps_i;
|
|
2566 ps_insn_ptr first_must_follow = NULL;
|
|
2567 ps_insn_ptr last_must_precede = NULL;
|
|
2568 int row;
|
|
2569
|
|
2570 if (! ps_i)
|
|
2571 return false;
|
|
2572
|
|
2573 row = SMODULO (ps_i->cycle, ps->ii);
|
|
2574
|
|
2575 /* Find the first must follow and the last must precede
|
|
2576 and insert the node immediately after the must precede
|
|
2577 but make sure that it there is no must follow after it. */
|
|
2578 for (next_ps_i = ps->rows[row];
|
|
2579 next_ps_i;
|
|
2580 next_ps_i = next_ps_i->next_in_row)
|
|
2581 {
|
|
2582 if (must_follow && TEST_BIT (must_follow, next_ps_i->node->cuid)
|
|
2583 && ! first_must_follow)
|
|
2584 first_must_follow = next_ps_i;
|
|
2585 if (must_precede && TEST_BIT (must_precede, next_ps_i->node->cuid))
|
|
2586 {
|
|
2587 /* If we have already met a node that must follow, then
|
|
2588 there is no possible column. */
|
|
2589 if (first_must_follow)
|
|
2590 return false;
|
|
2591 else
|
|
2592 last_must_precede = next_ps_i;
|
|
2593 }
|
|
2594 }
|
|
2595
|
|
2596 /* Now insert the node after INSERT_AFTER_PSI. */
|
|
2597
|
|
2598 if (! last_must_precede)
|
|
2599 {
|
|
2600 ps_i->next_in_row = ps->rows[row];
|
|
2601 ps_i->prev_in_row = NULL;
|
|
2602 if (ps_i->next_in_row)
|
|
2603 ps_i->next_in_row->prev_in_row = ps_i;
|
|
2604 ps->rows[row] = ps_i;
|
|
2605 }
|
|
2606 else
|
|
2607 {
|
|
2608 ps_i->next_in_row = last_must_precede->next_in_row;
|
|
2609 last_must_precede->next_in_row = ps_i;
|
|
2610 ps_i->prev_in_row = last_must_precede;
|
|
2611 if (ps_i->next_in_row)
|
|
2612 ps_i->next_in_row->prev_in_row = ps_i;
|
|
2613 }
|
|
2614
|
|
2615 return true;
|
|
2616 }
|
|
2617
|
|
2618 /* Advances the PS_INSN one column in its current row; returns false
|
|
2619 in failure and true in success. Bit N is set in MUST_FOLLOW if
|
|
2620 the node with cuid N must be come after the node pointed to by
|
|
2621 PS_I when scheduled in the same cycle. */
|
|
2622 static int
|
|
2623 ps_insn_advance_column (partial_schedule_ptr ps, ps_insn_ptr ps_i,
|
|
2624 sbitmap must_follow)
|
|
2625 {
|
|
2626 ps_insn_ptr prev, next;
|
|
2627 int row;
|
|
2628 ddg_node_ptr next_node;
|
|
2629
|
|
2630 if (!ps || !ps_i)
|
|
2631 return false;
|
|
2632
|
|
2633 row = SMODULO (ps_i->cycle, ps->ii);
|
|
2634
|
|
2635 if (! ps_i->next_in_row)
|
|
2636 return false;
|
|
2637
|
|
2638 next_node = ps_i->next_in_row->node;
|
|
2639
|
|
2640 /* Check if next_in_row is dependent on ps_i, both having same sched
|
|
2641 times (typically ANTI_DEP). If so, ps_i cannot skip over it. */
|
|
2642 if (must_follow && TEST_BIT (must_follow, next_node->cuid))
|
|
2643 return false;
|
|
2644
|
|
2645 /* Advance PS_I over its next_in_row in the doubly linked list. */
|
|
2646 prev = ps_i->prev_in_row;
|
|
2647 next = ps_i->next_in_row;
|
|
2648
|
|
2649 if (ps_i == ps->rows[row])
|
|
2650 ps->rows[row] = next;
|
|
2651
|
|
2652 ps_i->next_in_row = next->next_in_row;
|
|
2653
|
|
2654 if (next->next_in_row)
|
|
2655 next->next_in_row->prev_in_row = ps_i;
|
|
2656
|
|
2657 next->next_in_row = ps_i;
|
|
2658 ps_i->prev_in_row = next;
|
|
2659
|
|
2660 next->prev_in_row = prev;
|
|
2661 if (prev)
|
|
2662 prev->next_in_row = next;
|
|
2663
|
|
2664 return true;
|
|
2665 }
|
|
2666
|
|
2667 /* Inserts a DDG_NODE to the given partial schedule at the given cycle.
|
|
2668 Returns 0 if this is not possible and a PS_INSN otherwise. Bit N is
|
|
2669 set in MUST_PRECEDE/MUST_FOLLOW if the node with cuid N must be come
|
|
2670 before/after (respectively) the node pointed to by PS_I when scheduled
|
|
2671 in the same cycle. */
|
|
2672 static ps_insn_ptr
|
|
2673 add_node_to_ps (partial_schedule_ptr ps, ddg_node_ptr node, int cycle,
|
|
2674 sbitmap must_precede, sbitmap must_follow)
|
|
2675 {
|
|
2676 ps_insn_ptr ps_i;
|
|
2677 int rest_count = 1;
|
|
2678 int row = SMODULO (cycle, ps->ii);
|
|
2679
|
|
2680 if (ps->rows[row]
|
|
2681 && ps->rows[row]->row_rest_count >= issue_rate)
|
|
2682 return NULL;
|
|
2683
|
|
2684 if (ps->rows[row])
|
|
2685 rest_count += ps->rows[row]->row_rest_count;
|
|
2686
|
|
2687 ps_i = create_ps_insn (node, rest_count, cycle);
|
|
2688
|
|
2689 /* Finds and inserts PS_I according to MUST_FOLLOW and
|
|
2690 MUST_PRECEDE. */
|
|
2691 if (! ps_insn_find_column (ps, ps_i, must_precede, must_follow))
|
|
2692 {
|
|
2693 free (ps_i);
|
|
2694 return NULL;
|
|
2695 }
|
|
2696
|
|
2697 return ps_i;
|
|
2698 }
|
|
2699
|
|
2700 /* Advance time one cycle. Assumes DFA is being used. */
|
|
2701 static void
|
|
2702 advance_one_cycle (void)
|
|
2703 {
|
|
2704 if (targetm.sched.dfa_pre_cycle_insn)
|
|
2705 state_transition (curr_state,
|
|
2706 targetm.sched.dfa_pre_cycle_insn ());
|
|
2707
|
|
2708 state_transition (curr_state, NULL);
|
|
2709
|
|
2710 if (targetm.sched.dfa_post_cycle_insn)
|
|
2711 state_transition (curr_state,
|
|
2712 targetm.sched.dfa_post_cycle_insn ());
|
|
2713 }
|
|
2714
|
|
2715
|
|
2716
|
|
2717 /* Checks if PS has resource conflicts according to DFA, starting from
|
|
2718 FROM cycle to TO cycle; returns true if there are conflicts and false
|
|
2719 if there are no conflicts. Assumes DFA is being used. */
|
|
2720 static int
|
|
2721 ps_has_conflicts (partial_schedule_ptr ps, int from, int to)
|
|
2722 {
|
|
2723 int cycle;
|
|
2724
|
|
2725 state_reset (curr_state);
|
|
2726
|
|
2727 for (cycle = from; cycle <= to; cycle++)
|
|
2728 {
|
|
2729 ps_insn_ptr crr_insn;
|
|
2730 /* Holds the remaining issue slots in the current row. */
|
|
2731 int can_issue_more = issue_rate;
|
|
2732
|
|
2733 /* Walk through the DFA for the current row. */
|
|
2734 for (crr_insn = ps->rows[SMODULO (cycle, ps->ii)];
|
|
2735 crr_insn;
|
|
2736 crr_insn = crr_insn->next_in_row)
|
|
2737 {
|
|
2738 rtx insn = crr_insn->node->insn;
|
|
2739
|
|
2740 if (!INSN_P (insn))
|
|
2741 continue;
|
|
2742
|
|
2743 /* Check if there is room for the current insn. */
|
|
2744 if (!can_issue_more || state_dead_lock_p (curr_state))
|
|
2745 return true;
|
|
2746
|
|
2747 /* Update the DFA state and return with failure if the DFA found
|
|
2748 resource conflicts. */
|
|
2749 if (state_transition (curr_state, insn) >= 0)
|
|
2750 return true;
|
|
2751
|
|
2752 if (targetm.sched.variable_issue)
|
|
2753 can_issue_more =
|
|
2754 targetm.sched.variable_issue (sched_dump, sched_verbose,
|
|
2755 insn, can_issue_more);
|
|
2756 /* A naked CLOBBER or USE generates no instruction, so don't
|
|
2757 let them consume issue slots. */
|
|
2758 else if (GET_CODE (PATTERN (insn)) != USE
|
|
2759 && GET_CODE (PATTERN (insn)) != CLOBBER)
|
|
2760 can_issue_more--;
|
|
2761 }
|
|
2762
|
|
2763 /* Advance the DFA to the next cycle. */
|
|
2764 advance_one_cycle ();
|
|
2765 }
|
|
2766 return false;
|
|
2767 }
|
|
2768
|
|
2769 /* Checks if the given node causes resource conflicts when added to PS at
|
|
2770 cycle C. If not the node is added to PS and returned; otherwise zero
|
|
2771 is returned. Bit N is set in MUST_PRECEDE/MUST_FOLLOW if the node with
|
|
2772 cuid N must be come before/after (respectively) the node pointed to by
|
|
2773 PS_I when scheduled in the same cycle. */
|
|
2774 ps_insn_ptr
|
|
2775 ps_add_node_check_conflicts (partial_schedule_ptr ps, ddg_node_ptr n,
|
|
2776 int c, sbitmap must_precede,
|
|
2777 sbitmap must_follow)
|
|
2778 {
|
|
2779 int has_conflicts = 0;
|
|
2780 ps_insn_ptr ps_i;
|
|
2781
|
|
2782 /* First add the node to the PS, if this succeeds check for
|
|
2783 conflicts, trying different issue slots in the same row. */
|
|
2784 if (! (ps_i = add_node_to_ps (ps, n, c, must_precede, must_follow)))
|
|
2785 return NULL; /* Failed to insert the node at the given cycle. */
|
|
2786
|
|
2787 has_conflicts = ps_has_conflicts (ps, c, c)
|
|
2788 || (ps->history > 0
|
|
2789 && ps_has_conflicts (ps,
|
|
2790 c - ps->history,
|
|
2791 c + ps->history));
|
|
2792
|
|
2793 /* Try different issue slots to find one that the given node can be
|
|
2794 scheduled in without conflicts. */
|
|
2795 while (has_conflicts)
|
|
2796 {
|
|
2797 if (! ps_insn_advance_column (ps, ps_i, must_follow))
|
|
2798 break;
|
|
2799 has_conflicts = ps_has_conflicts (ps, c, c)
|
|
2800 || (ps->history > 0
|
|
2801 && ps_has_conflicts (ps,
|
|
2802 c - ps->history,
|
|
2803 c + ps->history));
|
|
2804 }
|
|
2805
|
|
2806 if (has_conflicts)
|
|
2807 {
|
|
2808 remove_node_from_ps (ps, ps_i);
|
|
2809 return NULL;
|
|
2810 }
|
|
2811
|
|
2812 ps->min_cycle = MIN (ps->min_cycle, c);
|
|
2813 ps->max_cycle = MAX (ps->max_cycle, c);
|
|
2814 return ps_i;
|
|
2815 }
|
|
2816
|
|
2817 /* Rotate the rows of PS such that insns scheduled at time
|
|
2818 START_CYCLE will appear in row 0. Updates max/min_cycles. */
|
|
2819 void
|
|
2820 rotate_partial_schedule (partial_schedule_ptr ps, int start_cycle)
|
|
2821 {
|
|
2822 int i, row, backward_rotates;
|
|
2823 int last_row = ps->ii - 1;
|
|
2824
|
|
2825 if (start_cycle == 0)
|
|
2826 return;
|
|
2827
|
|
2828 backward_rotates = SMODULO (start_cycle, ps->ii);
|
|
2829
|
|
2830 /* Revisit later and optimize this into a single loop. */
|
|
2831 for (i = 0; i < backward_rotates; i++)
|
|
2832 {
|
|
2833 ps_insn_ptr first_row = ps->rows[0];
|
|
2834
|
|
2835 for (row = 0; row < last_row; row++)
|
|
2836 ps->rows[row] = ps->rows[row+1];
|
|
2837
|
|
2838 ps->rows[last_row] = first_row;
|
|
2839 }
|
|
2840
|
|
2841 ps->max_cycle -= start_cycle;
|
|
2842 ps->min_cycle -= start_cycle;
|
|
2843 }
|
|
2844
|
|
2845 #endif /* INSN_SCHEDULING */
|
|
2846
|
|
2847 static bool
|
|
2848 gate_handle_sms (void)
|
|
2849 {
|
|
2850 return (optimize > 0 && flag_modulo_sched);
|
|
2851 }
|
|
2852
|
|
2853
|
|
2854 /* Run instruction scheduler. */
|
|
2855 /* Perform SMS module scheduling. */
|
|
2856 static unsigned int
|
|
2857 rest_of_handle_sms (void)
|
|
2858 {
|
|
2859 #ifdef INSN_SCHEDULING
|
|
2860 basic_block bb;
|
|
2861
|
|
2862 /* Collect loop information to be used in SMS. */
|
|
2863 cfg_layout_initialize (0);
|
|
2864 sms_schedule ();
|
|
2865
|
|
2866 /* Update the life information, because we add pseudos. */
|
|
2867 max_regno = max_reg_num ();
|
|
2868
|
|
2869 /* Finalize layout changes. */
|
|
2870 FOR_EACH_BB (bb)
|
|
2871 if (bb->next_bb != EXIT_BLOCK_PTR)
|
|
2872 bb->aux = bb->next_bb;
|
|
2873 free_dominance_info (CDI_DOMINATORS);
|
|
2874 cfg_layout_finalize ();
|
|
2875 #endif /* INSN_SCHEDULING */
|
|
2876 return 0;
|
|
2877 }
|
|
2878
|
|
2879 struct rtl_opt_pass pass_sms =
|
|
2880 {
|
|
2881 {
|
|
2882 RTL_PASS,
|
|
2883 "sms", /* name */
|
|
2884 gate_handle_sms, /* gate */
|
|
2885 rest_of_handle_sms, /* execute */
|
|
2886 NULL, /* sub */
|
|
2887 NULL, /* next */
|
|
2888 0, /* static_pass_number */
|
|
2889 TV_SMS, /* tv_id */
|
|
2890 0, /* properties_required */
|
|
2891 0, /* properties_provided */
|
|
2892 0, /* properties_destroyed */
|
|
2893 TODO_dump_func, /* todo_flags_start */
|
|
2894 TODO_df_finish | TODO_verify_rtl_sharing |
|
|
2895 TODO_dump_func |
|
|
2896 TODO_ggc_collect /* todo_flags_finish */
|
|
2897 }
|
|
2898 };
|
|
2899
|