Mercurial > hg > CbC > CbC_gcc
comparison gcc/cfgloopanal.c @ 0:a06113de4d67
first commit
author | kent <kent@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 17 Jul 2009 14:47:48 +0900 |
parents | |
children | 77e2b8dfacca |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a06113de4d67 |
---|---|
1 /* Natural loop analysis code for GNU compiler. | |
2 Copyright (C) 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 | |
3 Free Software Foundation, Inc. | |
4 | |
5 This file is part of GCC. | |
6 | |
7 GCC is free software; you can redistribute it and/or modify it under | |
8 the terms of the GNU General Public License as published by the Free | |
9 Software Foundation; either version 3, or (at your option) any later | |
10 version. | |
11 | |
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 for more details. | |
16 | |
17 You should have received a copy of the GNU General Public License | |
18 along with GCC; see the file COPYING3. If not see | |
19 <http://www.gnu.org/licenses/>. */ | |
20 | |
21 #include "config.h" | |
22 #include "system.h" | |
23 #include "coretypes.h" | |
24 #include "tm.h" | |
25 #include "rtl.h" | |
26 #include "hard-reg-set.h" | |
27 #include "obstack.h" | |
28 #include "basic-block.h" | |
29 #include "cfgloop.h" | |
30 #include "expr.h" | |
31 #include "output.h" | |
32 #include "graphds.h" | |
33 #include "params.h" | |
34 | |
35 /* Checks whether BB is executed exactly once in each LOOP iteration. */ | |
36 | |
37 bool | |
38 just_once_each_iteration_p (const struct loop *loop, const_basic_block bb) | |
39 { | |
40 /* It must be executed at least once each iteration. */ | |
41 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) | |
42 return false; | |
43 | |
44 /* And just once. */ | |
45 if (bb->loop_father != loop) | |
46 return false; | |
47 | |
48 /* But this was not enough. We might have some irreducible loop here. */ | |
49 if (bb->flags & BB_IRREDUCIBLE_LOOP) | |
50 return false; | |
51 | |
52 return true; | |
53 } | |
54 | |
55 /* Marks the edge E in graph G irreducible if it connects two vertices in the | |
56 same scc. */ | |
57 | |
58 static void | |
59 check_irred (struct graph *g, struct graph_edge *e) | |
60 { | |
61 edge real = (edge) e->data; | |
62 | |
63 /* All edges should lead from a component with higher number to the | |
64 one with lower one. */ | |
65 gcc_assert (g->vertices[e->src].component >= g->vertices[e->dest].component); | |
66 | |
67 if (g->vertices[e->src].component != g->vertices[e->dest].component) | |
68 return; | |
69 | |
70 real->flags |= EDGE_IRREDUCIBLE_LOOP; | |
71 if (flow_bb_inside_loop_p (real->src->loop_father, real->dest)) | |
72 real->src->flags |= BB_IRREDUCIBLE_LOOP; | |
73 } | |
74 | |
75 /* Marks blocks and edges that are part of non-recognized loops; i.e. we | |
76 throw away all latch edges and mark blocks inside any remaining cycle. | |
77 Everything is a bit complicated due to fact we do not want to do this | |
78 for parts of cycles that only "pass" through some loop -- i.e. for | |
79 each cycle, we want to mark blocks that belong directly to innermost | |
80 loop containing the whole cycle. | |
81 | |
82 LOOPS is the loop tree. */ | |
83 | |
84 #define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block) | |
85 #define BB_REPR(BB) ((BB)->index + 1) | |
86 | |
87 void | |
88 mark_irreducible_loops (void) | |
89 { | |
90 basic_block act; | |
91 edge e; | |
92 edge_iterator ei; | |
93 int src, dest; | |
94 unsigned depth; | |
95 struct graph *g; | |
96 int num = number_of_loops (); | |
97 struct loop *cloop; | |
98 | |
99 gcc_assert (current_loops != NULL); | |
100 | |
101 /* Reset the flags. */ | |
102 FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb) | |
103 { | |
104 act->flags &= ~BB_IRREDUCIBLE_LOOP; | |
105 FOR_EACH_EDGE (e, ei, act->succs) | |
106 e->flags &= ~EDGE_IRREDUCIBLE_LOOP; | |
107 } | |
108 | |
109 /* Create the edge lists. */ | |
110 g = new_graph (last_basic_block + num); | |
111 | |
112 FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR, EXIT_BLOCK_PTR, next_bb) | |
113 FOR_EACH_EDGE (e, ei, act->succs) | |
114 { | |
115 /* Ignore edges to exit. */ | |
116 if (e->dest == EXIT_BLOCK_PTR) | |
117 continue; | |
118 | |
119 src = BB_REPR (act); | |
120 dest = BB_REPR (e->dest); | |
121 | |
122 /* Ignore latch edges. */ | |
123 if (e->dest->loop_father->header == e->dest | |
124 && e->dest->loop_father->latch == act) | |
125 continue; | |
126 | |
127 /* Edges inside a single loop should be left where they are. Edges | |
128 to subloop headers should lead to representative of the subloop, | |
129 but from the same place. | |
130 | |
131 Edges exiting loops should lead from representative | |
132 of the son of nearest common ancestor of the loops in that | |
133 act lays. */ | |
134 | |
135 if (e->dest->loop_father->header == e->dest) | |
136 dest = LOOP_REPR (e->dest->loop_father); | |
137 | |
138 if (!flow_bb_inside_loop_p (act->loop_father, e->dest)) | |
139 { | |
140 depth = 1 + loop_depth (find_common_loop (act->loop_father, | |
141 e->dest->loop_father)); | |
142 if (depth == loop_depth (act->loop_father)) | |
143 cloop = act->loop_father; | |
144 else | |
145 cloop = VEC_index (loop_p, act->loop_father->superloops, depth); | |
146 | |
147 src = LOOP_REPR (cloop); | |
148 } | |
149 | |
150 add_edge (g, src, dest)->data = e; | |
151 } | |
152 | |
153 /* Find the strongly connected components. */ | |
154 graphds_scc (g, NULL); | |
155 | |
156 /* Mark the irreducible loops. */ | |
157 for_each_edge (g, check_irred); | |
158 | |
159 free_graph (g); | |
160 | |
161 loops_state_set (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS); | |
162 } | |
163 | |
164 /* Counts number of insns inside LOOP. */ | |
165 int | |
166 num_loop_insns (const struct loop *loop) | |
167 { | |
168 basic_block *bbs, bb; | |
169 unsigned i, ninsns = 0; | |
170 rtx insn; | |
171 | |
172 bbs = get_loop_body (loop); | |
173 for (i = 0; i < loop->num_nodes; i++) | |
174 { | |
175 bb = bbs[i]; | |
176 ninsns++; | |
177 for (insn = BB_HEAD (bb); insn != BB_END (bb); insn = NEXT_INSN (insn)) | |
178 if (INSN_P (insn)) | |
179 ninsns++; | |
180 } | |
181 free(bbs); | |
182 | |
183 return ninsns; | |
184 } | |
185 | |
186 /* Counts number of insns executed on average per iteration LOOP. */ | |
187 int | |
188 average_num_loop_insns (const struct loop *loop) | |
189 { | |
190 basic_block *bbs, bb; | |
191 unsigned i, binsns, ninsns, ratio; | |
192 rtx insn; | |
193 | |
194 ninsns = 0; | |
195 bbs = get_loop_body (loop); | |
196 for (i = 0; i < loop->num_nodes; i++) | |
197 { | |
198 bb = bbs[i]; | |
199 | |
200 binsns = 1; | |
201 for (insn = BB_HEAD (bb); insn != BB_END (bb); insn = NEXT_INSN (insn)) | |
202 if (INSN_P (insn)) | |
203 binsns++; | |
204 | |
205 ratio = loop->header->frequency == 0 | |
206 ? BB_FREQ_MAX | |
207 : (bb->frequency * BB_FREQ_MAX) / loop->header->frequency; | |
208 ninsns += binsns * ratio; | |
209 } | |
210 free(bbs); | |
211 | |
212 ninsns /= BB_FREQ_MAX; | |
213 if (!ninsns) | |
214 ninsns = 1; /* To avoid division by zero. */ | |
215 | |
216 return ninsns; | |
217 } | |
218 | |
219 /* Returns expected number of iterations of LOOP, according to | |
220 measured or guessed profile. No bounding is done on the | |
221 value. */ | |
222 | |
223 gcov_type | |
224 expected_loop_iterations_unbounded (const struct loop *loop) | |
225 { | |
226 edge e; | |
227 edge_iterator ei; | |
228 | |
229 if (loop->latch->count || loop->header->count) | |
230 { | |
231 gcov_type count_in, count_latch, expected; | |
232 | |
233 count_in = 0; | |
234 count_latch = 0; | |
235 | |
236 FOR_EACH_EDGE (e, ei, loop->header->preds) | |
237 if (e->src == loop->latch) | |
238 count_latch = e->count; | |
239 else | |
240 count_in += e->count; | |
241 | |
242 if (count_in == 0) | |
243 expected = count_latch * 2; | |
244 else | |
245 expected = (count_latch + count_in - 1) / count_in; | |
246 | |
247 return expected; | |
248 } | |
249 else | |
250 { | |
251 int freq_in, freq_latch; | |
252 | |
253 freq_in = 0; | |
254 freq_latch = 0; | |
255 | |
256 FOR_EACH_EDGE (e, ei, loop->header->preds) | |
257 if (e->src == loop->latch) | |
258 freq_latch = EDGE_FREQUENCY (e); | |
259 else | |
260 freq_in += EDGE_FREQUENCY (e); | |
261 | |
262 if (freq_in == 0) | |
263 return freq_latch * 2; | |
264 | |
265 return (freq_latch + freq_in - 1) / freq_in; | |
266 } | |
267 } | |
268 | |
269 /* Returns expected number of LOOP iterations. The returned value is bounded | |
270 by REG_BR_PROB_BASE. */ | |
271 | |
272 unsigned | |
273 expected_loop_iterations (const struct loop *loop) | |
274 { | |
275 gcov_type expected = expected_loop_iterations_unbounded (loop); | |
276 return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected); | |
277 } | |
278 | |
279 /* Returns the maximum level of nesting of subloops of LOOP. */ | |
280 | |
281 unsigned | |
282 get_loop_level (const struct loop *loop) | |
283 { | |
284 const struct loop *ploop; | |
285 unsigned mx = 0, l; | |
286 | |
287 for (ploop = loop->inner; ploop; ploop = ploop->next) | |
288 { | |
289 l = get_loop_level (ploop); | |
290 if (l >= mx) | |
291 mx = l + 1; | |
292 } | |
293 return mx; | |
294 } | |
295 | |
296 /* Returns estimate on cost of computing SEQ. */ | |
297 | |
298 static unsigned | |
299 seq_cost (const_rtx seq, bool speed) | |
300 { | |
301 unsigned cost = 0; | |
302 rtx set; | |
303 | |
304 for (; seq; seq = NEXT_INSN (seq)) | |
305 { | |
306 set = single_set (seq); | |
307 if (set) | |
308 cost += rtx_cost (set, SET, speed); | |
309 else | |
310 cost++; | |
311 } | |
312 | |
313 return cost; | |
314 } | |
315 | |
316 /* The properties of the target. */ | |
317 | |
318 unsigned target_avail_regs; /* Number of available registers. */ | |
319 unsigned target_res_regs; /* Number of registers reserved for temporary | |
320 expressions. */ | |
321 unsigned target_reg_cost[2]; /* The cost for register when there still | |
322 is some reserve, but we are approaching | |
323 the number of available registers. */ | |
324 unsigned target_spill_cost[2]; /* The cost for register when we need | |
325 to spill. */ | |
326 | |
327 /* Initialize the constants for computing set costs. */ | |
328 | |
329 void | |
330 init_set_costs (void) | |
331 { | |
332 int speed; | |
333 rtx seq; | |
334 rtx reg1 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER); | |
335 rtx reg2 = gen_raw_REG (SImode, FIRST_PSEUDO_REGISTER + 1); | |
336 rtx addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 2); | |
337 rtx mem = validize_mem (gen_rtx_MEM (SImode, addr)); | |
338 unsigned i; | |
339 | |
340 target_avail_regs = 0; | |
341 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) | |
342 if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i) | |
343 && !fixed_regs[i]) | |
344 target_avail_regs++; | |
345 | |
346 target_res_regs = 3; | |
347 | |
348 for (speed = 0; speed < 2; speed++) | |
349 { | |
350 crtl->maybe_hot_insn_p = speed; | |
351 /* Set up the costs for using extra registers: | |
352 | |
353 1) If not many free registers remain, we should prefer having an | |
354 additional move to decreasing the number of available registers. | |
355 (TARGET_REG_COST). | |
356 2) If no registers are available, we need to spill, which may require | |
357 storing the old value to memory and loading it back | |
358 (TARGET_SPILL_COST). */ | |
359 | |
360 start_sequence (); | |
361 emit_move_insn (reg1, reg2); | |
362 seq = get_insns (); | |
363 end_sequence (); | |
364 target_reg_cost [speed] = seq_cost (seq, speed); | |
365 | |
366 start_sequence (); | |
367 emit_move_insn (mem, reg1); | |
368 emit_move_insn (reg2, mem); | |
369 seq = get_insns (); | |
370 end_sequence (); | |
371 target_spill_cost [speed] = seq_cost (seq, speed); | |
372 } | |
373 default_rtl_profile (); | |
374 } | |
375 | |
376 /* Estimates cost of increased register pressure caused by making N_NEW new | |
377 registers live around the loop. N_OLD is the number of registers live | |
378 around the loop. */ | |
379 | |
380 unsigned | |
381 estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed) | |
382 { | |
383 unsigned cost; | |
384 unsigned regs_needed = n_new + n_old; | |
385 | |
386 /* If we have enough registers, we should use them and not restrict | |
387 the transformations unnecessarily. */ | |
388 if (regs_needed + target_res_regs <= target_avail_regs) | |
389 return 0; | |
390 | |
391 if (regs_needed <= target_avail_regs) | |
392 /* If we are close to running out of registers, try to preserve | |
393 them. */ | |
394 cost = target_reg_cost [speed] * n_new; | |
395 else | |
396 /* If we run out of registers, it is very expensive to add another | |
397 one. */ | |
398 cost = target_spill_cost [speed] * n_new; | |
399 | |
400 if (optimize && (flag_ira_region == IRA_REGION_ALL | |
401 || flag_ira_region == IRA_REGION_MIXED) | |
402 && number_of_loops () <= (unsigned) IRA_MAX_LOOPS_NUM) | |
403 /* IRA regional allocation deals with high register pressure | |
404 better. So decrease the cost (to do more accurate the cost | |
405 calculation for IRA, we need to know how many registers lives | |
406 through the loop transparently). */ | |
407 cost /= 2; | |
408 | |
409 return cost; | |
410 } | |
411 | |
412 /* Sets EDGE_LOOP_EXIT flag for all loop exits. */ | |
413 | |
414 void | |
415 mark_loop_exit_edges (void) | |
416 { | |
417 basic_block bb; | |
418 edge e; | |
419 | |
420 if (number_of_loops () <= 1) | |
421 return; | |
422 | |
423 FOR_EACH_BB (bb) | |
424 { | |
425 edge_iterator ei; | |
426 | |
427 FOR_EACH_EDGE (e, ei, bb->succs) | |
428 { | |
429 if (loop_outer (bb->loop_father) | |
430 && loop_exit_edge_p (bb->loop_father, e)) | |
431 e->flags |= EDGE_LOOP_EXIT; | |
432 else | |
433 e->flags &= ~EDGE_LOOP_EXIT; | |
434 } | |
435 } | |
436 } | |
437 |