Mercurial > hg > CbC > CbC_gcc
annotate gcc/cfgloopanal.c @ 158:494b0b89df80 default tip
...
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 25 May 2020 18:13:55 +0900 |
parents | 1830386684a0 |
children |
rev | line source |
---|---|
0 | 1 /* Natural loop analysis code for GNU compiler. |
145 | 2 Copyright (C) 2002-2020 Free Software Foundation, Inc. |
0 | 3 |
4 This file is part of GCC. | |
5 | |
6 GCC is free software; you can redistribute it and/or modify it under | |
7 the terms of the GNU General Public License as published by the Free | |
8 Software Foundation; either version 3, or (at your option) any later | |
9 version. | |
10 | |
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 for more details. | |
15 | |
16 You should have received a copy of the GNU General Public License | |
17 along with GCC; see the file COPYING3. If not see | |
18 <http://www.gnu.org/licenses/>. */ | |
19 | |
20 #include "config.h" | |
21 #include "system.h" | |
22 #include "coretypes.h" | |
111 | 23 #include "backend.h" |
0 | 24 #include "rtl.h" |
111 | 25 #include "tree.h" |
26 #include "predict.h" | |
27 #include "memmodel.h" | |
28 #include "emit-rtl.h" | |
0 | 29 #include "cfgloop.h" |
111 | 30 #include "explow.h" |
0 | 31 #include "expr.h" |
32 #include "graphds.h" | |
131 | 33 #include "sreal.h" |
145 | 34 #include "regs.h" |
35 #include "function-abi.h" | |
0 | 36 |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
37 struct target_cfgloop default_target_cfgloop; |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
38 #if SWITCHABLE_TARGET |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
39 struct target_cfgloop *this_target_cfgloop = &default_target_cfgloop; |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
40 #endif |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
41 |
0 | 42 /* Checks whether BB is executed exactly once in each LOOP iteration. */ |
43 | |
44 bool | |
145 | 45 just_once_each_iteration_p (const class loop *loop, const_basic_block bb) |
0 | 46 { |
47 /* It must be executed at least once each iteration. */ | |
48 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) | |
49 return false; | |
50 | |
51 /* And just once. */ | |
52 if (bb->loop_father != loop) | |
53 return false; | |
54 | |
55 /* But this was not enough. We might have some irreducible loop here. */ | |
56 if (bb->flags & BB_IRREDUCIBLE_LOOP) | |
57 return false; | |
58 | |
59 return true; | |
60 } | |
61 | |
62 /* Marks blocks and edges that are part of non-recognized loops; i.e. we | |
63 throw away all latch edges and mark blocks inside any remaining cycle. | |
64 Everything is a bit complicated due to fact we do not want to do this | |
65 for parts of cycles that only "pass" through some loop -- i.e. for | |
66 each cycle, we want to mark blocks that belong directly to innermost | |
67 loop containing the whole cycle. | |
68 | |
69 LOOPS is the loop tree. */ | |
70 | |
111 | 71 #define LOOP_REPR(LOOP) ((LOOP)->num + last_basic_block_for_fn (cfun)) |
0 | 72 #define BB_REPR(BB) ((BB)->index + 1) |
73 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
74 bool |
0 | 75 mark_irreducible_loops (void) |
76 { | |
77 basic_block act; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
78 struct graph_edge *ge; |
0 | 79 edge e; |
80 edge_iterator ei; | |
81 int src, dest; | |
82 unsigned depth; | |
83 struct graph *g; | |
111 | 84 int num = number_of_loops (cfun); |
145 | 85 class loop *cloop; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
86 bool irred_loop_found = false; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
87 int i; |
0 | 88 |
89 gcc_assert (current_loops != NULL); | |
90 | |
91 /* Reset the flags. */ | |
111 | 92 FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun), |
93 EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb) | |
0 | 94 { |
95 act->flags &= ~BB_IRREDUCIBLE_LOOP; | |
96 FOR_EACH_EDGE (e, ei, act->succs) | |
97 e->flags &= ~EDGE_IRREDUCIBLE_LOOP; | |
98 } | |
99 | |
100 /* Create the edge lists. */ | |
111 | 101 g = new_graph (last_basic_block_for_fn (cfun) + num); |
0 | 102 |
111 | 103 FOR_BB_BETWEEN (act, ENTRY_BLOCK_PTR_FOR_FN (cfun), |
104 EXIT_BLOCK_PTR_FOR_FN (cfun), next_bb) | |
0 | 105 FOR_EACH_EDGE (e, ei, act->succs) |
106 { | |
107 /* Ignore edges to exit. */ | |
111 | 108 if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun)) |
0 | 109 continue; |
110 | |
111 src = BB_REPR (act); | |
112 dest = BB_REPR (e->dest); | |
113 | |
114 /* Ignore latch edges. */ | |
115 if (e->dest->loop_father->header == e->dest | |
116 && e->dest->loop_father->latch == act) | |
117 continue; | |
118 | |
119 /* Edges inside a single loop should be left where they are. Edges | |
120 to subloop headers should lead to representative of the subloop, | |
121 but from the same place. | |
122 | |
123 Edges exiting loops should lead from representative | |
124 of the son of nearest common ancestor of the loops in that | |
125 act lays. */ | |
126 | |
127 if (e->dest->loop_father->header == e->dest) | |
128 dest = LOOP_REPR (e->dest->loop_father); | |
129 | |
130 if (!flow_bb_inside_loop_p (act->loop_father, e->dest)) | |
131 { | |
132 depth = 1 + loop_depth (find_common_loop (act->loop_father, | |
133 e->dest->loop_father)); | |
134 if (depth == loop_depth (act->loop_father)) | |
135 cloop = act->loop_father; | |
136 else | |
111 | 137 cloop = (*act->loop_father->superloops)[depth]; |
0 | 138 |
139 src = LOOP_REPR (cloop); | |
140 } | |
141 | |
142 add_edge (g, src, dest)->data = e; | |
143 } | |
144 | |
145 /* Find the strongly connected components. */ | |
146 graphds_scc (g, NULL); | |
147 | |
148 /* Mark the irreducible loops. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
149 for (i = 0; i < g->n_vertices; i++) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
150 for (ge = g->vertices[i].succ; ge; ge = ge->succ_next) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
151 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
152 edge real = (edge) ge->data; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
153 /* edge E in graph G is irreducible if it connects two vertices in the |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
154 same scc. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
155 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
156 /* All edges should lead from a component with higher number to the |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
157 one with lower one. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
158 gcc_assert (g->vertices[ge->src].component >= g->vertices[ge->dest].component); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
159 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
160 if (g->vertices[ge->src].component != g->vertices[ge->dest].component) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
161 continue; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
162 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
163 real->flags |= EDGE_IRREDUCIBLE_LOOP; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
164 irred_loop_found = true; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
165 if (flow_bb_inside_loop_p (real->src->loop_father, real->dest)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
166 real->src->flags |= BB_IRREDUCIBLE_LOOP; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
167 } |
0 | 168 |
169 free_graph (g); | |
170 | |
171 loops_state_set (LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
172 return irred_loop_found; |
0 | 173 } |
174 | |
175 /* Counts number of insns inside LOOP. */ | |
176 int | |
145 | 177 num_loop_insns (const class loop *loop) |
0 | 178 { |
179 basic_block *bbs, bb; | |
180 unsigned i, ninsns = 0; | |
111 | 181 rtx_insn *insn; |
0 | 182 |
183 bbs = get_loop_body (loop); | |
184 for (i = 0; i < loop->num_nodes; i++) | |
185 { | |
186 bb = bbs[i]; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
187 FOR_BB_INSNS (bb, insn) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
188 if (NONDEBUG_INSN_P (insn)) |
0 | 189 ninsns++; |
190 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
191 free (bbs); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
192 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
193 if (!ninsns) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
194 ninsns = 1; /* To avoid division by zero. */ |
0 | 195 |
196 return ninsns; | |
197 } | |
198 | |
199 /* Counts number of insns executed on average per iteration LOOP. */ | |
200 int | |
145 | 201 average_num_loop_insns (const class loop *loop) |
0 | 202 { |
203 basic_block *bbs, bb; | |
131 | 204 unsigned i, binsns; |
205 sreal ninsns; | |
111 | 206 rtx_insn *insn; |
0 | 207 |
208 ninsns = 0; | |
209 bbs = get_loop_body (loop); | |
210 for (i = 0; i < loop->num_nodes; i++) | |
211 { | |
212 bb = bbs[i]; | |
213 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
214 binsns = 0; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
215 FOR_BB_INSNS (bb, insn) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
216 if (NONDEBUG_INSN_P (insn)) |
0 | 217 binsns++; |
218 | |
131 | 219 ninsns += (sreal)binsns * bb->count.to_sreal_scale (loop->header->count); |
220 /* Avoid overflows. */ | |
221 if (ninsns > 1000000) | |
145 | 222 { |
223 free (bbs); | |
224 return 1000000; | |
225 } | |
0 | 226 } |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
227 free (bbs); |
0 | 228 |
131 | 229 int64_t ret = ninsns.to_int (); |
230 if (!ret) | |
231 ret = 1; /* To avoid division by zero. */ | |
0 | 232 |
131 | 233 return ret; |
0 | 234 } |
235 | |
236 /* Returns expected number of iterations of LOOP, according to | |
131 | 237 measured or guessed profile. |
238 | |
239 This functions attempts to return "sane" value even if profile | |
240 information is not good enough to derive osmething. | |
241 If BY_PROFILE_ONLY is set, this logic is bypassed and function | |
242 return -1 in those scenarios. */ | |
0 | 243 |
244 gcov_type | |
145 | 245 expected_loop_iterations_unbounded (const class loop *loop, |
131 | 246 bool *read_profile_p, |
247 bool by_profile_only) | |
0 | 248 { |
249 edge e; | |
250 edge_iterator ei; | |
111 | 251 gcov_type expected = -1; |
252 | |
253 if (read_profile_p) | |
254 *read_profile_p = false; | |
0 | 255 |
111 | 256 /* If we have no profile at all, use AVG_LOOP_NITER. */ |
257 if (profile_status_for_fn (cfun) == PROFILE_ABSENT) | |
131 | 258 { |
259 if (by_profile_only) | |
260 return -1; | |
145 | 261 expected = param_avg_loop_niter; |
131 | 262 } |
263 else if (loop->latch && (loop->latch->count.initialized_p () | |
264 || loop->header->count.initialized_p ())) | |
0 | 265 { |
111 | 266 profile_count count_in = profile_count::zero (), |
267 count_latch = profile_count::zero (); | |
0 | 268 |
269 FOR_EACH_EDGE (e, ei, loop->header->preds) | |
270 if (e->src == loop->latch) | |
111 | 271 count_latch = e->count (); |
0 | 272 else |
111 | 273 count_in += e->count (); |
0 | 274 |
111 | 275 if (!count_latch.initialized_p ()) |
131 | 276 { |
277 if (by_profile_only) | |
278 return -1; | |
145 | 279 expected = param_avg_loop_niter; |
131 | 280 } |
281 else if (!count_in.nonzero_p ()) | |
282 { | |
283 if (by_profile_only) | |
284 return -1; | |
285 expected = count_latch.to_gcov_type () * 2; | |
286 } | |
0 | 287 else |
111 | 288 { |
289 expected = (count_latch.to_gcov_type () + count_in.to_gcov_type () | |
290 - 1) / count_in.to_gcov_type (); | |
131 | 291 if (read_profile_p |
292 && count_latch.reliable_p () && count_in.reliable_p ()) | |
111 | 293 *read_profile_p = true; |
294 } | |
0 | 295 } |
131 | 296 else |
0 | 297 { |
131 | 298 if (by_profile_only) |
299 return -1; | |
145 | 300 expected = param_avg_loop_niter; |
111 | 301 } |
0 | 302 |
131 | 303 if (!by_profile_only) |
304 { | |
305 HOST_WIDE_INT max = get_max_loop_iterations_int (loop); | |
306 if (max != -1 && max < expected) | |
307 return max; | |
308 } | |
309 | |
111 | 310 return expected; |
0 | 311 } |
312 | |
313 /* Returns expected number of LOOP iterations. The returned value is bounded | |
314 by REG_BR_PROB_BASE. */ | |
315 | |
316 unsigned | |
145 | 317 expected_loop_iterations (class loop *loop) |
0 | 318 { |
319 gcov_type expected = expected_loop_iterations_unbounded (loop); | |
320 return (expected > REG_BR_PROB_BASE ? REG_BR_PROB_BASE : expected); | |
321 } | |
322 | |
323 /* Returns the maximum level of nesting of subloops of LOOP. */ | |
324 | |
325 unsigned | |
145 | 326 get_loop_level (const class loop *loop) |
0 | 327 { |
145 | 328 const class loop *ploop; |
0 | 329 unsigned mx = 0, l; |
330 | |
331 for (ploop = loop->inner; ploop; ploop = ploop->next) | |
332 { | |
333 l = get_loop_level (ploop); | |
334 if (l >= mx) | |
335 mx = l + 1; | |
336 } | |
337 return mx; | |
338 } | |
339 | |
340 /* Initialize the constants for computing set costs. */ | |
341 | |
342 void | |
343 init_set_costs (void) | |
344 { | |
345 int speed; | |
111 | 346 rtx_insn *seq; |
347 rtx reg1 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 1); | |
348 rtx reg2 = gen_raw_REG (SImode, LAST_VIRTUAL_REGISTER + 2); | |
349 rtx addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 3); | |
0 | 350 rtx mem = validize_mem (gen_rtx_MEM (SImode, addr)); |
351 unsigned i; | |
352 | |
353 target_avail_regs = 0; | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
354 target_clobbered_regs = 0; |
0 | 355 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) |
356 if (TEST_HARD_REG_BIT (reg_class_contents[GENERAL_REGS], i) | |
357 && !fixed_regs[i]) | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
358 { |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
359 target_avail_regs++; |
145 | 360 /* ??? This is only a rough heuristic. It doesn't cope well |
361 with alternative ABIs, but that's an optimization rather than | |
362 correctness issue. */ | |
363 if (default_function_abi.clobbers_full_reg_p (i)) | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
364 target_clobbered_regs++; |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
365 } |
0 | 366 |
367 target_res_regs = 3; | |
368 | |
369 for (speed = 0; speed < 2; speed++) | |
370 { | |
371 crtl->maybe_hot_insn_p = speed; | |
372 /* Set up the costs for using extra registers: | |
373 | |
374 1) If not many free registers remain, we should prefer having an | |
375 additional move to decreasing the number of available registers. | |
376 (TARGET_REG_COST). | |
377 2) If no registers are available, we need to spill, which may require | |
378 storing the old value to memory and loading it back | |
379 (TARGET_SPILL_COST). */ | |
380 | |
381 start_sequence (); | |
382 emit_move_insn (reg1, reg2); | |
383 seq = get_insns (); | |
384 end_sequence (); | |
385 target_reg_cost [speed] = seq_cost (seq, speed); | |
386 | |
387 start_sequence (); | |
388 emit_move_insn (mem, reg1); | |
389 emit_move_insn (reg2, mem); | |
390 seq = get_insns (); | |
391 end_sequence (); | |
392 target_spill_cost [speed] = seq_cost (seq, speed); | |
393 } | |
394 default_rtl_profile (); | |
395 } | |
396 | |
397 /* Estimates cost of increased register pressure caused by making N_NEW new | |
398 registers live around the loop. N_OLD is the number of registers live | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
399 around the loop. If CALL_P is true, also take into account that |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
400 call-used registers may be clobbered in the loop body, reducing the |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
401 number of available registers before we spill. */ |
0 | 402 |
403 unsigned | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
404 estimate_reg_pressure_cost (unsigned n_new, unsigned n_old, bool speed, |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
405 bool call_p) |
0 | 406 { |
407 unsigned cost; | |
408 unsigned regs_needed = n_new + n_old; | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
409 unsigned available_regs = target_avail_regs; |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
410 |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
411 /* If there is a call in the loop body, the call-clobbered registers |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
412 are not available for loop invariants. */ |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
413 if (call_p) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
414 available_regs = available_regs - target_clobbered_regs; |
0 | 415 |
416 /* If we have enough registers, we should use them and not restrict | |
417 the transformations unnecessarily. */ | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
418 if (regs_needed + target_res_regs <= available_regs) |
0 | 419 return 0; |
420 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
421 if (regs_needed <= available_regs) |
0 | 422 /* If we are close to running out of registers, try to preserve |
423 them. */ | |
424 cost = target_reg_cost [speed] * n_new; | |
425 else | |
426 /* If we run out of registers, it is very expensive to add another | |
427 one. */ | |
428 cost = target_spill_cost [speed] * n_new; | |
429 | |
430 if (optimize && (flag_ira_region == IRA_REGION_ALL | |
431 || flag_ira_region == IRA_REGION_MIXED) | |
145 | 432 && number_of_loops (cfun) <= (unsigned) param_ira_max_loops_num) |
0 | 433 /* IRA regional allocation deals with high register pressure |
434 better. So decrease the cost (to do more accurate the cost | |
435 calculation for IRA, we need to know how many registers lives | |
436 through the loop transparently). */ | |
437 cost /= 2; | |
438 | |
439 return cost; | |
440 } | |
441 | |
442 /* Sets EDGE_LOOP_EXIT flag for all loop exits. */ | |
443 | |
444 void | |
445 mark_loop_exit_edges (void) | |
446 { | |
447 basic_block bb; | |
448 edge e; | |
449 | |
111 | 450 if (number_of_loops (cfun) <= 1) |
0 | 451 return; |
452 | |
111 | 453 FOR_EACH_BB_FN (bb, cfun) |
0 | 454 { |
455 edge_iterator ei; | |
456 | |
457 FOR_EACH_EDGE (e, ei, bb->succs) | |
458 { | |
459 if (loop_outer (bb->loop_father) | |
460 && loop_exit_edge_p (bb->loop_father, e)) | |
461 e->flags |= EDGE_LOOP_EXIT; | |
462 else | |
463 e->flags &= ~EDGE_LOOP_EXIT; | |
464 } | |
465 } | |
466 } | |
467 | |
111 | 468 /* Return exit edge if loop has only one exit that is likely |
469 to be executed on runtime (i.e. it is not EH or leading | |
470 to noreturn call. */ | |
471 | |
472 edge | |
145 | 473 single_likely_exit (class loop *loop, vec<edge> exits) |
111 | 474 { |
475 edge found = single_exit (loop); | |
476 unsigned i; | |
477 edge ex; | |
478 | |
479 if (found) | |
480 return found; | |
481 FOR_EACH_VEC_ELT (exits, i, ex) | |
482 { | |
483 if (probably_never_executed_edge_p (cfun, ex) | |
484 /* We want to rule out paths to noreturns but not low probabilities | |
485 resulting from adjustments or combining. | |
486 FIXME: once we have better quality tracking, make this more | |
487 robust. */ | |
488 || ex->probability <= profile_probability::very_unlikely ()) | |
489 continue; | |
490 if (!found) | |
491 found = ex; | |
492 else | |
145 | 493 return NULL; |
111 | 494 } |
495 return found; | |
496 } | |
497 | |
498 | |
499 /* Gets basic blocks of a LOOP. Header is the 0-th block, rest is in dfs | |
500 order against direction of edges from latch. Specially, if | |
501 header != latch, latch is the 1-st block. */ | |
502 | |
503 vec<basic_block> | |
145 | 504 get_loop_hot_path (const class loop *loop) |
111 | 505 { |
506 basic_block bb = loop->header; | |
507 vec<basic_block> path = vNULL; | |
508 bitmap visited = BITMAP_ALLOC (NULL); | |
509 | |
510 while (true) | |
511 { | |
512 edge_iterator ei; | |
513 edge e; | |
514 edge best = NULL; | |
515 | |
516 path.safe_push (bb); | |
517 bitmap_set_bit (visited, bb->index); | |
518 FOR_EACH_EDGE (e, ei, bb->succs) | |
519 if ((!best || e->probability > best->probability) | |
520 && !loop_exit_edge_p (loop, e) | |
521 && !bitmap_bit_p (visited, e->dest->index)) | |
522 best = e; | |
523 if (!best || best->dest == loop->header) | |
524 break; | |
525 bb = best->dest; | |
526 } | |
527 BITMAP_FREE (visited); | |
528 return path; | |
529 } |