Mercurial > hg > CbC > CbC_gcc
annotate gcc/loop-unroll.c @ 63:b7f97abdc517 gcc-4.6-20100522
update gcc from gcc-4.5.0 to gcc-4.6
author | ryoma <e075725@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 24 May 2010 12:47:05 +0900 |
parents | 77e2b8dfacca |
children | f6334be47118 |
rev | line source |
---|---|
0 | 1 /* Loop unrolling and peeling. |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
2 Copyright (C) 2002, 2003, 2004, 2005, 2007, 2008, 2010 |
0 | 3 Free Software Foundation, Inc. |
4 | |
5 This file is part of GCC. | |
6 | |
7 GCC is free software; you can redistribute it and/or modify it under | |
8 the terms of the GNU General Public License as published by the Free | |
9 Software Foundation; either version 3, or (at your option) any later | |
10 version. | |
11 | |
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 for more details. | |
16 | |
17 You should have received a copy of the GNU General Public License | |
18 along with GCC; see the file COPYING3. If not see | |
19 <http://www.gnu.org/licenses/>. */ | |
20 | |
21 #include "config.h" | |
22 #include "system.h" | |
23 #include "coretypes.h" | |
24 #include "tm.h" | |
25 #include "rtl.h" | |
26 #include "hard-reg-set.h" | |
27 #include "obstack.h" | |
28 #include "basic-block.h" | |
29 #include "cfgloop.h" | |
30 #include "cfglayout.h" | |
31 #include "params.h" | |
32 #include "output.h" | |
33 #include "expr.h" | |
34 #include "hashtab.h" | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
35 #include "recog.h" |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
36 #include "target.h" |
0 | 37 |
38 /* This pass performs loop unrolling and peeling. We only perform these | |
39 optimizations on innermost loops (with single exception) because | |
40 the impact on performance is greatest here, and we want to avoid | |
41 unnecessary code size growth. The gain is caused by greater sequentiality | |
42 of code, better code to optimize for further passes and in some cases | |
43 by fewer testings of exit conditions. The main problem is code growth, | |
44 that impacts performance negatively due to effect of caches. | |
45 | |
46 What we do: | |
47 | |
48 -- complete peeling of once-rolling loops; this is the above mentioned | |
49 exception, as this causes loop to be cancelled completely and | |
50 does not cause code growth | |
51 -- complete peeling of loops that roll (small) constant times. | |
52 -- simple peeling of first iterations of loops that do not roll much | |
53 (according to profile feedback) | |
54 -- unrolling of loops that roll constant times; this is almost always | |
55 win, as we get rid of exit condition tests. | |
56 -- unrolling of loops that roll number of times that we can compute | |
57 in runtime; we also get rid of exit condition tests here, but there | |
58 is the extra expense for calculating the number of iterations | |
59 -- simple unrolling of remaining loops; this is performed only if we | |
60 are asked to, as the gain is questionable in this case and often | |
61 it may even slow down the code | |
62 For more detailed descriptions of each of those, see comments at | |
63 appropriate function below. | |
64 | |
65 There is a lot of parameters (defined and described in params.def) that | |
66 control how much we unroll/peel. | |
67 | |
68 ??? A great problem is that we don't have a good way how to determine | |
69 how many times we should unroll the loop; the experiments I have made | |
70 showed that this choice may affect performance in order of several %. | |
71 */ | |
72 | |
73 /* Information about induction variables to split. */ | |
74 | |
75 struct iv_to_split | |
76 { | |
77 rtx insn; /* The insn in that the induction variable occurs. */ | |
78 rtx base_var; /* The variable on that the values in the further | |
79 iterations are based. */ | |
80 rtx step; /* Step of the induction variable. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
81 struct iv_to_split *next; /* Next entry in walking order. */ |
0 | 82 unsigned n_loc; |
83 unsigned loc[3]; /* Location where the definition of the induction | |
84 variable occurs in the insn. For example if | |
85 N_LOC is 2, the expression is located at | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
86 XEXP (XEXP (single_set, loc[0]), loc[1]). */ |
0 | 87 }; |
88 | |
89 /* Information about accumulators to expand. */ | |
90 | |
91 struct var_to_expand | |
92 { | |
93 rtx insn; /* The insn in that the variable expansion occurs. */ | |
94 rtx reg; /* The accumulator which is expanded. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
95 VEC(rtx,heap) *var_expansions; /* The copies of the accumulator which is expanded. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
96 struct var_to_expand *next; /* Next entry in walking order. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
97 enum rtx_code op; /* The type of the accumulation - addition, subtraction |
0 | 98 or multiplication. */ |
99 int expansion_count; /* Count the number of expansions generated so far. */ | |
100 int reuse_expansion; /* The expansion we intend to reuse to expand | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
101 the accumulator. If REUSE_EXPANSION is 0 reuse |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
102 the original accumulator. Else use |
0 | 103 var_expansions[REUSE_EXPANSION - 1]. */ |
104 unsigned accum_pos; /* The position in which the accumulator is placed in | |
105 the insn src. For example in x = x + something | |
106 accum_pos is 0 while in x = something + x accum_pos | |
107 is 1. */ | |
108 }; | |
109 | |
110 /* Information about optimization applied in | |
111 the unrolled loop. */ | |
112 | |
113 struct opt_info | |
114 { | |
115 htab_t insns_to_split; /* A hashtable of insns to split. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
116 struct iv_to_split *iv_to_split_head; /* The first iv to split. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
117 struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list. */ |
0 | 118 htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators |
119 to expand. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
120 struct var_to_expand *var_to_expand_head; /* The first var to expand. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
121 struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list. */ |
0 | 122 unsigned first_new_block; /* The first basic block that was |
123 duplicated. */ | |
124 basic_block loop_exit; /* The loop exit basic block. */ | |
125 basic_block loop_preheader; /* The loop preheader basic block. */ | |
126 }; | |
127 | |
128 static void decide_unrolling_and_peeling (int); | |
129 static void peel_loops_completely (int); | |
130 static void decide_peel_simple (struct loop *, int); | |
131 static void decide_peel_once_rolling (struct loop *, int); | |
132 static void decide_peel_completely (struct loop *, int); | |
133 static void decide_unroll_stupid (struct loop *, int); | |
134 static void decide_unroll_constant_iterations (struct loop *, int); | |
135 static void decide_unroll_runtime_iterations (struct loop *, int); | |
136 static void peel_loop_simple (struct loop *); | |
137 static void peel_loop_completely (struct loop *); | |
138 static void unroll_loop_stupid (struct loop *); | |
139 static void unroll_loop_constant_iterations (struct loop *); | |
140 static void unroll_loop_runtime_iterations (struct loop *); | |
141 static struct opt_info *analyze_insns_in_loop (struct loop *); | |
142 static void opt_info_start_duplication (struct opt_info *); | |
143 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool); | |
144 static void free_opt_info (struct opt_info *); | |
145 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx); | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
146 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx, int *); |
0 | 147 static struct iv_to_split *analyze_iv_to_split_insn (rtx); |
148 static void expand_var_during_unrolling (struct var_to_expand *, rtx); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
149 static void insert_var_expansion_initialization (struct var_to_expand *, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
150 basic_block); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
151 static void combine_var_copies_in_loop_exit (struct var_to_expand *, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
152 basic_block); |
0 | 153 static rtx get_expansion (struct var_to_expand *); |
154 | |
155 /* Unroll and/or peel (depending on FLAGS) LOOPS. */ | |
156 void | |
157 unroll_and_peel_loops (int flags) | |
158 { | |
159 struct loop *loop; | |
160 bool check; | |
161 loop_iterator li; | |
162 | |
163 /* First perform complete loop peeling (it is almost surely a win, | |
164 and affects parameters for further decision a lot). */ | |
165 peel_loops_completely (flags); | |
166 | |
167 /* Now decide rest of unrolling and peeling. */ | |
168 decide_unrolling_and_peeling (flags); | |
169 | |
170 /* Scan the loops, inner ones first. */ | |
171 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST) | |
172 { | |
173 check = true; | |
174 /* And perform the appropriate transformations. */ | |
175 switch (loop->lpt_decision.decision) | |
176 { | |
177 case LPT_PEEL_COMPLETELY: | |
178 /* Already done. */ | |
179 gcc_unreachable (); | |
180 case LPT_PEEL_SIMPLE: | |
181 peel_loop_simple (loop); | |
182 break; | |
183 case LPT_UNROLL_CONSTANT: | |
184 unroll_loop_constant_iterations (loop); | |
185 break; | |
186 case LPT_UNROLL_RUNTIME: | |
187 unroll_loop_runtime_iterations (loop); | |
188 break; | |
189 case LPT_UNROLL_STUPID: | |
190 unroll_loop_stupid (loop); | |
191 break; | |
192 case LPT_NONE: | |
193 check = false; | |
194 break; | |
195 default: | |
196 gcc_unreachable (); | |
197 } | |
198 if (check) | |
199 { | |
200 #ifdef ENABLE_CHECKING | |
201 verify_dominators (CDI_DOMINATORS); | |
202 verify_loop_structure (); | |
203 #endif | |
204 } | |
205 } | |
206 | |
207 iv_analysis_done (); | |
208 } | |
209 | |
210 /* Check whether exit of the LOOP is at the end of loop body. */ | |
211 | |
212 static bool | |
213 loop_exit_at_end_p (struct loop *loop) | |
214 { | |
215 struct niter_desc *desc = get_simple_loop_desc (loop); | |
216 rtx insn; | |
217 | |
218 if (desc->in_edge->dest != loop->latch) | |
219 return false; | |
220 | |
221 /* Check that the latch is empty. */ | |
222 FOR_BB_INSNS (loop->latch, insn) | |
223 { | |
224 if (INSN_P (insn)) | |
225 return false; | |
226 } | |
227 | |
228 return true; | |
229 } | |
230 | |
231 /* Depending on FLAGS, check whether to peel loops completely and do so. */ | |
232 static void | |
233 peel_loops_completely (int flags) | |
234 { | |
235 struct loop *loop; | |
236 loop_iterator li; | |
237 | |
238 /* Scan the loops, the inner ones first. */ | |
239 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST) | |
240 { | |
241 loop->lpt_decision.decision = LPT_NONE; | |
242 | |
243 if (dump_file) | |
244 fprintf (dump_file, | |
245 "\n;; *** Considering loop %d for complete peeling ***\n", | |
246 loop->num); | |
247 | |
248 loop->ninsns = num_loop_insns (loop); | |
249 | |
250 decide_peel_once_rolling (loop, flags); | |
251 if (loop->lpt_decision.decision == LPT_NONE) | |
252 decide_peel_completely (loop, flags); | |
253 | |
254 if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY) | |
255 { | |
256 peel_loop_completely (loop); | |
257 #ifdef ENABLE_CHECKING | |
258 verify_dominators (CDI_DOMINATORS); | |
259 verify_loop_structure (); | |
260 #endif | |
261 } | |
262 } | |
263 } | |
264 | |
265 /* Decide whether unroll or peel loops (depending on FLAGS) and how much. */ | |
266 static void | |
267 decide_unrolling_and_peeling (int flags) | |
268 { | |
269 struct loop *loop; | |
270 loop_iterator li; | |
271 | |
272 /* Scan the loops, inner ones first. */ | |
273 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST) | |
274 { | |
275 loop->lpt_decision.decision = LPT_NONE; | |
276 | |
277 if (dump_file) | |
278 fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num); | |
279 | |
280 /* Do not peel cold areas. */ | |
281 if (optimize_loop_for_size_p (loop)) | |
282 { | |
283 if (dump_file) | |
284 fprintf (dump_file, ";; Not considering loop, cold area\n"); | |
285 continue; | |
286 } | |
287 | |
288 /* Can the loop be manipulated? */ | |
289 if (!can_duplicate_loop_p (loop)) | |
290 { | |
291 if (dump_file) | |
292 fprintf (dump_file, | |
293 ";; Not considering loop, cannot duplicate\n"); | |
294 continue; | |
295 } | |
296 | |
297 /* Skip non-innermost loops. */ | |
298 if (loop->inner) | |
299 { | |
300 if (dump_file) | |
301 fprintf (dump_file, ";; Not considering loop, is not innermost\n"); | |
302 continue; | |
303 } | |
304 | |
305 loop->ninsns = num_loop_insns (loop); | |
306 loop->av_ninsns = average_num_loop_insns (loop); | |
307 | |
308 /* Try transformations one by one in decreasing order of | |
309 priority. */ | |
310 | |
311 decide_unroll_constant_iterations (loop, flags); | |
312 if (loop->lpt_decision.decision == LPT_NONE) | |
313 decide_unroll_runtime_iterations (loop, flags); | |
314 if (loop->lpt_decision.decision == LPT_NONE) | |
315 decide_unroll_stupid (loop, flags); | |
316 if (loop->lpt_decision.decision == LPT_NONE) | |
317 decide_peel_simple (loop, flags); | |
318 } | |
319 } | |
320 | |
321 /* Decide whether the LOOP is once rolling and suitable for complete | |
322 peeling. */ | |
323 static void | |
324 decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED) | |
325 { | |
326 struct niter_desc *desc; | |
327 | |
328 if (dump_file) | |
329 fprintf (dump_file, "\n;; Considering peeling once rolling loop\n"); | |
330 | |
331 /* Is the loop small enough? */ | |
332 if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns) | |
333 { | |
334 if (dump_file) | |
335 fprintf (dump_file, ";; Not considering loop, is too big\n"); | |
336 return; | |
337 } | |
338 | |
339 /* Check for simple loops. */ | |
340 desc = get_simple_loop_desc (loop); | |
341 | |
342 /* Check number of iterations. */ | |
343 if (!desc->simple_p | |
344 || desc->assumptions | |
345 || desc->infinite | |
346 || !desc->const_iter | |
347 || desc->niter != 0) | |
348 { | |
349 if (dump_file) | |
350 fprintf (dump_file, | |
351 ";; Unable to prove that the loop rolls exactly once\n"); | |
352 return; | |
353 } | |
354 | |
355 /* Success. */ | |
356 if (dump_file) | |
357 fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n"); | |
358 loop->lpt_decision.decision = LPT_PEEL_COMPLETELY; | |
359 } | |
360 | |
361 /* Decide whether the LOOP is suitable for complete peeling. */ | |
362 static void | |
363 decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED) | |
364 { | |
365 unsigned npeel; | |
366 struct niter_desc *desc; | |
367 | |
368 if (dump_file) | |
369 fprintf (dump_file, "\n;; Considering peeling completely\n"); | |
370 | |
371 /* Skip non-innermost loops. */ | |
372 if (loop->inner) | |
373 { | |
374 if (dump_file) | |
375 fprintf (dump_file, ";; Not considering loop, is not innermost\n"); | |
376 return; | |
377 } | |
378 | |
379 /* Do not peel cold areas. */ | |
380 if (optimize_loop_for_size_p (loop)) | |
381 { | |
382 if (dump_file) | |
383 fprintf (dump_file, ";; Not considering loop, cold area\n"); | |
384 return; | |
385 } | |
386 | |
387 /* Can the loop be manipulated? */ | |
388 if (!can_duplicate_loop_p (loop)) | |
389 { | |
390 if (dump_file) | |
391 fprintf (dump_file, | |
392 ";; Not considering loop, cannot duplicate\n"); | |
393 return; | |
394 } | |
395 | |
396 /* npeel = number of iterations to peel. */ | |
397 npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns; | |
398 if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES)) | |
399 npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES); | |
400 | |
401 /* Is the loop small enough? */ | |
402 if (!npeel) | |
403 { | |
404 if (dump_file) | |
405 fprintf (dump_file, ";; Not considering loop, is too big\n"); | |
406 return; | |
407 } | |
408 | |
409 /* Check for simple loops. */ | |
410 desc = get_simple_loop_desc (loop); | |
411 | |
412 /* Check number of iterations. */ | |
413 if (!desc->simple_p | |
414 || desc->assumptions | |
415 || !desc->const_iter | |
416 || desc->infinite) | |
417 { | |
418 if (dump_file) | |
419 fprintf (dump_file, | |
420 ";; Unable to prove that the loop iterates constant times\n"); | |
421 return; | |
422 } | |
423 | |
424 if (desc->niter > npeel - 1) | |
425 { | |
426 if (dump_file) | |
427 { | |
428 fprintf (dump_file, | |
429 ";; Not peeling loop completely, rolls too much ("); | |
430 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter); | |
431 fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel); | |
432 } | |
433 return; | |
434 } | |
435 | |
436 /* Success. */ | |
437 if (dump_file) | |
438 fprintf (dump_file, ";; Decided to peel loop completely\n"); | |
439 loop->lpt_decision.decision = LPT_PEEL_COMPLETELY; | |
440 } | |
441 | |
442 /* Peel all iterations of LOOP, remove exit edges and cancel the loop | |
443 completely. The transformation done: | |
444 | |
445 for (i = 0; i < 4; i++) | |
446 body; | |
447 | |
448 ==> | |
449 | |
450 i = 0; | |
451 body; i++; | |
452 body; i++; | |
453 body; i++; | |
454 body; i++; | |
455 */ | |
456 static void | |
457 peel_loop_completely (struct loop *loop) | |
458 { | |
459 sbitmap wont_exit; | |
460 unsigned HOST_WIDE_INT npeel; | |
461 unsigned i; | |
462 VEC (edge, heap) *remove_edges; | |
463 edge ein; | |
464 struct niter_desc *desc = get_simple_loop_desc (loop); | |
465 struct opt_info *opt_info = NULL; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
466 |
0 | 467 npeel = desc->niter; |
468 | |
469 if (npeel) | |
470 { | |
471 bool ok; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
472 |
0 | 473 wont_exit = sbitmap_alloc (npeel + 1); |
474 sbitmap_ones (wont_exit); | |
475 RESET_BIT (wont_exit, 0); | |
476 if (desc->noloop_assumptions) | |
477 RESET_BIT (wont_exit, 1); | |
478 | |
479 remove_edges = NULL; | |
480 | |
481 if (flag_split_ivs_in_unroller) | |
482 opt_info = analyze_insns_in_loop (loop); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
483 |
0 | 484 opt_info_start_duplication (opt_info); |
485 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), | |
486 npeel, | |
487 wont_exit, desc->out_edge, | |
488 &remove_edges, | |
489 DLTHE_FLAG_UPDATE_FREQ | |
490 | DLTHE_FLAG_COMPLETTE_PEEL | |
491 | (opt_info | |
492 ? DLTHE_RECORD_COPY_NUMBER : 0)); | |
493 gcc_assert (ok); | |
494 | |
495 free (wont_exit); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
496 |
0 | 497 if (opt_info) |
498 { | |
499 apply_opt_in_copies (opt_info, npeel, false, true); | |
500 free_opt_info (opt_info); | |
501 } | |
502 | |
503 /* Remove the exit edges. */ | |
504 for (i = 0; VEC_iterate (edge, remove_edges, i, ein); i++) | |
505 remove_path (ein); | |
506 VEC_free (edge, heap, remove_edges); | |
507 } | |
508 | |
509 ein = desc->in_edge; | |
510 free_simple_loop_desc (loop); | |
511 | |
512 /* Now remove the unreachable part of the last iteration and cancel | |
513 the loop. */ | |
514 remove_path (ein); | |
515 | |
516 if (dump_file) | |
517 fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel); | |
518 } | |
519 | |
520 /* Decide whether to unroll LOOP iterating constant number of times | |
521 and how much. */ | |
522 | |
523 static void | |
524 decide_unroll_constant_iterations (struct loop *loop, int flags) | |
525 { | |
526 unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i; | |
527 struct niter_desc *desc; | |
528 | |
529 if (!(flags & UAP_UNROLL)) | |
530 { | |
531 /* We were not asked to, just return back silently. */ | |
532 return; | |
533 } | |
534 | |
535 if (dump_file) | |
536 fprintf (dump_file, | |
537 "\n;; Considering unrolling loop with constant " | |
538 "number of iterations\n"); | |
539 | |
540 /* nunroll = total number of copies of the original loop body in | |
541 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */ | |
542 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns; | |
543 nunroll_by_av | |
544 = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns; | |
545 if (nunroll > nunroll_by_av) | |
546 nunroll = nunroll_by_av; | |
547 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES)) | |
548 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES); | |
549 | |
550 /* Skip big loops. */ | |
551 if (nunroll <= 1) | |
552 { | |
553 if (dump_file) | |
554 fprintf (dump_file, ";; Not considering loop, is too big\n"); | |
555 return; | |
556 } | |
557 | |
558 /* Check for simple loops. */ | |
559 desc = get_simple_loop_desc (loop); | |
560 | |
561 /* Check number of iterations. */ | |
562 if (!desc->simple_p || !desc->const_iter || desc->assumptions) | |
563 { | |
564 if (dump_file) | |
565 fprintf (dump_file, | |
566 ";; Unable to prove that the loop iterates constant times\n"); | |
567 return; | |
568 } | |
569 | |
570 /* Check whether the loop rolls enough to consider. */ | |
571 if (desc->niter < 2 * nunroll) | |
572 { | |
573 if (dump_file) | |
574 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n"); | |
575 return; | |
576 } | |
577 | |
578 /* Success; now compute number of iterations to unroll. We alter | |
579 nunroll so that as few as possible copies of loop body are | |
580 necessary, while still not decreasing the number of unrollings | |
581 too much (at most by 1). */ | |
582 best_copies = 2 * nunroll + 10; | |
583 | |
584 i = 2 * nunroll + 2; | |
585 if (i - 1 >= desc->niter) | |
586 i = desc->niter - 2; | |
587 | |
588 for (; i >= nunroll - 1; i--) | |
589 { | |
590 unsigned exit_mod = desc->niter % (i + 1); | |
591 | |
592 if (!loop_exit_at_end_p (loop)) | |
593 n_copies = exit_mod + i + 1; | |
594 else if (exit_mod != (unsigned) i | |
595 || desc->noloop_assumptions != NULL_RTX) | |
596 n_copies = exit_mod + i + 2; | |
597 else | |
598 n_copies = i + 1; | |
599 | |
600 if (n_copies < best_copies) | |
601 { | |
602 best_copies = n_copies; | |
603 best_unroll = i; | |
604 } | |
605 } | |
606 | |
607 if (dump_file) | |
608 fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n", | |
609 best_unroll + 1, best_copies, nunroll); | |
610 | |
611 loop->lpt_decision.decision = LPT_UNROLL_CONSTANT; | |
612 loop->lpt_decision.times = best_unroll; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
613 |
0 | 614 if (dump_file) |
615 fprintf (dump_file, | |
616 ";; Decided to unroll the constant times rolling loop, %d times.\n", | |
617 loop->lpt_decision.times); | |
618 } | |
619 | |
620 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1 | |
621 times. The transformation does this: | |
622 | |
623 for (i = 0; i < 102; i++) | |
624 body; | |
625 | |
626 ==> | |
627 | |
628 i = 0; | |
629 body; i++; | |
630 body; i++; | |
631 while (i < 102) | |
632 { | |
633 body; i++; | |
634 body; i++; | |
635 body; i++; | |
636 body; i++; | |
637 } | |
638 */ | |
639 static void | |
640 unroll_loop_constant_iterations (struct loop *loop) | |
641 { | |
642 unsigned HOST_WIDE_INT niter; | |
643 unsigned exit_mod; | |
644 sbitmap wont_exit; | |
645 unsigned i; | |
646 VEC (edge, heap) *remove_edges; | |
647 edge e; | |
648 unsigned max_unroll = loop->lpt_decision.times; | |
649 struct niter_desc *desc = get_simple_loop_desc (loop); | |
650 bool exit_at_end = loop_exit_at_end_p (loop); | |
651 struct opt_info *opt_info = NULL; | |
652 bool ok; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
653 |
0 | 654 niter = desc->niter; |
655 | |
656 /* Should not get here (such loop should be peeled instead). */ | |
657 gcc_assert (niter > max_unroll + 1); | |
658 | |
659 exit_mod = niter % (max_unroll + 1); | |
660 | |
661 wont_exit = sbitmap_alloc (max_unroll + 1); | |
662 sbitmap_ones (wont_exit); | |
663 | |
664 remove_edges = NULL; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
665 if (flag_split_ivs_in_unroller |
0 | 666 || flag_variable_expansion_in_unroller) |
667 opt_info = analyze_insns_in_loop (loop); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
668 |
0 | 669 if (!exit_at_end) |
670 { | |
671 /* The exit is not at the end of the loop; leave exit test | |
672 in the first copy, so that the loops that start with test | |
673 of exit condition have continuous body after unrolling. */ | |
674 | |
675 if (dump_file) | |
676 fprintf (dump_file, ";; Condition on beginning of loop.\n"); | |
677 | |
678 /* Peel exit_mod iterations. */ | |
679 RESET_BIT (wont_exit, 0); | |
680 if (desc->noloop_assumptions) | |
681 RESET_BIT (wont_exit, 1); | |
682 | |
683 if (exit_mod) | |
684 { | |
685 opt_info_start_duplication (opt_info); | |
686 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), | |
687 exit_mod, | |
688 wont_exit, desc->out_edge, | |
689 &remove_edges, | |
690 DLTHE_FLAG_UPDATE_FREQ | |
691 | (opt_info && exit_mod > 1 | |
692 ? DLTHE_RECORD_COPY_NUMBER | |
693 : 0)); | |
694 gcc_assert (ok); | |
695 | |
696 if (opt_info && exit_mod > 1) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
697 apply_opt_in_copies (opt_info, exit_mod, false, false); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
698 |
0 | 699 desc->noloop_assumptions = NULL_RTX; |
700 desc->niter -= exit_mod; | |
701 desc->niter_max -= exit_mod; | |
702 } | |
703 | |
704 SET_BIT (wont_exit, 1); | |
705 } | |
706 else | |
707 { | |
708 /* Leave exit test in last copy, for the same reason as above if | |
709 the loop tests the condition at the end of loop body. */ | |
710 | |
711 if (dump_file) | |
712 fprintf (dump_file, ";; Condition on end of loop.\n"); | |
713 | |
714 /* We know that niter >= max_unroll + 2; so we do not need to care of | |
715 case when we would exit before reaching the loop. So just peel | |
716 exit_mod + 1 iterations. */ | |
717 if (exit_mod != max_unroll | |
718 || desc->noloop_assumptions) | |
719 { | |
720 RESET_BIT (wont_exit, 0); | |
721 if (desc->noloop_assumptions) | |
722 RESET_BIT (wont_exit, 1); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
723 |
0 | 724 opt_info_start_duplication (opt_info); |
725 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), | |
726 exit_mod + 1, | |
727 wont_exit, desc->out_edge, | |
728 &remove_edges, | |
729 DLTHE_FLAG_UPDATE_FREQ | |
730 | (opt_info && exit_mod > 0 | |
731 ? DLTHE_RECORD_COPY_NUMBER | |
732 : 0)); | |
733 gcc_assert (ok); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
734 |
0 | 735 if (opt_info && exit_mod > 0) |
736 apply_opt_in_copies (opt_info, exit_mod + 1, false, false); | |
737 | |
738 desc->niter -= exit_mod + 1; | |
739 desc->niter_max -= exit_mod + 1; | |
740 desc->noloop_assumptions = NULL_RTX; | |
741 | |
742 SET_BIT (wont_exit, 0); | |
743 SET_BIT (wont_exit, 1); | |
744 } | |
745 | |
746 RESET_BIT (wont_exit, max_unroll); | |
747 } | |
748 | |
749 /* Now unroll the loop. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
750 |
0 | 751 opt_info_start_duplication (opt_info); |
752 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop), | |
753 max_unroll, | |
754 wont_exit, desc->out_edge, | |
755 &remove_edges, | |
756 DLTHE_FLAG_UPDATE_FREQ | |
757 | (opt_info | |
758 ? DLTHE_RECORD_COPY_NUMBER | |
759 : 0)); | |
760 gcc_assert (ok); | |
761 | |
762 if (opt_info) | |
763 { | |
764 apply_opt_in_copies (opt_info, max_unroll, true, true); | |
765 free_opt_info (opt_info); | |
766 } | |
767 | |
768 free (wont_exit); | |
769 | |
770 if (exit_at_end) | |
771 { | |
772 basic_block exit_block = get_bb_copy (desc->in_edge->src); | |
773 /* Find a new in and out edge; they are in the last copy we have made. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
774 |
0 | 775 if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest) |
776 { | |
777 desc->out_edge = EDGE_SUCC (exit_block, 0); | |
778 desc->in_edge = EDGE_SUCC (exit_block, 1); | |
779 } | |
780 else | |
781 { | |
782 desc->out_edge = EDGE_SUCC (exit_block, 1); | |
783 desc->in_edge = EDGE_SUCC (exit_block, 0); | |
784 } | |
785 } | |
786 | |
787 desc->niter /= max_unroll + 1; | |
788 desc->niter_max /= max_unroll + 1; | |
789 desc->niter_expr = GEN_INT (desc->niter); | |
790 | |
791 /* Remove the edges. */ | |
792 for (i = 0; VEC_iterate (edge, remove_edges, i, e); i++) | |
793 remove_path (e); | |
794 VEC_free (edge, heap, remove_edges); | |
795 | |
796 if (dump_file) | |
797 fprintf (dump_file, | |
798 ";; Unrolled loop %d times, constant # of iterations %i insns\n", | |
799 max_unroll, num_loop_insns (loop)); | |
800 } | |
801 | |
802 /* Decide whether to unroll LOOP iterating runtime computable number of times | |
803 and how much. */ | |
804 static void | |
805 decide_unroll_runtime_iterations (struct loop *loop, int flags) | |
806 { | |
807 unsigned nunroll, nunroll_by_av, i; | |
808 struct niter_desc *desc; | |
809 | |
810 if (!(flags & UAP_UNROLL)) | |
811 { | |
812 /* We were not asked to, just return back silently. */ | |
813 return; | |
814 } | |
815 | |
816 if (dump_file) | |
817 fprintf (dump_file, | |
818 "\n;; Considering unrolling loop with runtime " | |
819 "computable number of iterations\n"); | |
820 | |
821 /* nunroll = total number of copies of the original loop body in | |
822 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */ | |
823 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns; | |
824 nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns; | |
825 if (nunroll > nunroll_by_av) | |
826 nunroll = nunroll_by_av; | |
827 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES)) | |
828 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES); | |
829 | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
830 if (targetm.loop_unroll_adjust) |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
831 nunroll = targetm.loop_unroll_adjust (nunroll, loop); |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
832 |
0 | 833 /* Skip big loops. */ |
834 if (nunroll <= 1) | |
835 { | |
836 if (dump_file) | |
837 fprintf (dump_file, ";; Not considering loop, is too big\n"); | |
838 return; | |
839 } | |
840 | |
841 /* Check for simple loops. */ | |
842 desc = get_simple_loop_desc (loop); | |
843 | |
844 /* Check simpleness. */ | |
845 if (!desc->simple_p || desc->assumptions) | |
846 { | |
847 if (dump_file) | |
848 fprintf (dump_file, | |
849 ";; Unable to prove that the number of iterations " | |
850 "can be counted in runtime\n"); | |
851 return; | |
852 } | |
853 | |
854 if (desc->const_iter) | |
855 { | |
856 if (dump_file) | |
857 fprintf (dump_file, ";; Loop iterates constant times\n"); | |
858 return; | |
859 } | |
860 | |
861 /* If we have profile feedback, check whether the loop rolls. */ | |
862 if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll) | |
863 { | |
864 if (dump_file) | |
865 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n"); | |
866 return; | |
867 } | |
868 | |
869 /* Success; now force nunroll to be power of 2, as we are unable to | |
870 cope with overflows in computation of number of iterations. */ | |
871 for (i = 1; 2 * i <= nunroll; i *= 2) | |
872 continue; | |
873 | |
874 loop->lpt_decision.decision = LPT_UNROLL_RUNTIME; | |
875 loop->lpt_decision.times = i - 1; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
876 |
0 | 877 if (dump_file) |
878 fprintf (dump_file, | |
879 ";; Decided to unroll the runtime computable " | |
880 "times rolling loop, %d times.\n", | |
881 loop->lpt_decision.times); | |
882 } | |
883 | |
884 /* Splits edge E and inserts the sequence of instructions INSNS on it, and | |
885 returns the newly created block. If INSNS is NULL_RTX, nothing is changed | |
886 and NULL is returned instead. */ | |
887 | |
888 basic_block | |
889 split_edge_and_insert (edge e, rtx insns) | |
890 { | |
891 basic_block bb; | |
892 | |
893 if (!insns) | |
894 return NULL; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
895 bb = split_edge (e); |
0 | 896 emit_insn_after (insns, BB_END (bb)); |
897 | |
898 /* ??? We used to assume that INSNS can contain control flow insns, and | |
899 that we had to try to find sub basic blocks in BB to maintain a valid | |
900 CFG. For this purpose we used to set the BB_SUPERBLOCK flag on BB | |
901 and call break_superblocks when going out of cfglayout mode. But it | |
902 turns out that this never happens; and that if it does ever happen, | |
903 the verify_flow_info call in loop_optimizer_finalize would fail. | |
904 | |
905 There are two reasons why we expected we could have control flow insns | |
906 in INSNS. The first is when a comparison has to be done in parts, and | |
907 the second is when the number of iterations is computed for loops with | |
908 the number of iterations known at runtime. In both cases, test cases | |
909 to get control flow in INSNS appear to be impossible to construct: | |
910 | |
911 * If do_compare_rtx_and_jump needs several branches to do comparison | |
912 in a mode that needs comparison by parts, we cannot analyze the | |
913 number of iterations of the loop, and we never get to unrolling it. | |
914 | |
915 * The code in expand_divmod that was suspected to cause creation of | |
916 branching code seems to be only accessed for signed division. The | |
917 divisions used by # of iterations analysis are always unsigned. | |
918 Problems might arise on architectures that emits branching code | |
919 for some operations that may appear in the unroller (especially | |
920 for division), but we have no such architectures. | |
921 | |
922 Considering all this, it was decided that we should for now assume | |
923 that INSNS can in theory contain control flow insns, but in practice | |
924 it never does. So we don't handle the theoretical case, and should | |
925 a real failure ever show up, we have a pretty good clue for how to | |
926 fix it. */ | |
927 | |
928 return bb; | |
929 } | |
930 | |
931 /* Unroll LOOP for that we are able to count number of iterations in runtime | |
932 LOOP->LPT_DECISION.TIMES + 1 times. The transformation does this (with some | |
933 extra care for case n < 0): | |
934 | |
935 for (i = 0; i < n; i++) | |
936 body; | |
937 | |
938 ==> | |
939 | |
940 i = 0; | |
941 mod = n % 4; | |
942 | |
943 switch (mod) | |
944 { | |
945 case 3: | |
946 body; i++; | |
947 case 2: | |
948 body; i++; | |
949 case 1: | |
950 body; i++; | |
951 case 0: ; | |
952 } | |
953 | |
954 while (i < n) | |
955 { | |
956 body; i++; | |
957 body; i++; | |
958 body; i++; | |
959 body; i++; | |
960 } | |
961 */ | |
962 static void | |
963 unroll_loop_runtime_iterations (struct loop *loop) | |
964 { | |
965 rtx old_niter, niter, init_code, branch_code, tmp; | |
966 unsigned i, j, p; | |
967 basic_block preheader, *body, swtch, ezc_swtch; | |
968 VEC (basic_block, heap) *dom_bbs; | |
969 sbitmap wont_exit; | |
970 int may_exit_copy; | |
971 unsigned n_peel; | |
972 VEC (edge, heap) *remove_edges; | |
973 edge e; | |
974 bool extra_zero_check, last_may_exit; | |
975 unsigned max_unroll = loop->lpt_decision.times; | |
976 struct niter_desc *desc = get_simple_loop_desc (loop); | |
977 bool exit_at_end = loop_exit_at_end_p (loop); | |
978 struct opt_info *opt_info = NULL; | |
979 bool ok; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
980 |
0 | 981 if (flag_split_ivs_in_unroller |
982 || flag_variable_expansion_in_unroller) | |
983 opt_info = analyze_insns_in_loop (loop); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
984 |
0 | 985 /* Remember blocks whose dominators will have to be updated. */ |
986 dom_bbs = NULL; | |
987 | |
988 body = get_loop_body (loop); | |
989 for (i = 0; i < loop->num_nodes; i++) | |
990 { | |
991 VEC (basic_block, heap) *ldom; | |
992 basic_block bb; | |
993 | |
994 ldom = get_dominated_by (CDI_DOMINATORS, body[i]); | |
995 for (j = 0; VEC_iterate (basic_block, ldom, j, bb); j++) | |
996 if (!flow_bb_inside_loop_p (loop, bb)) | |
997 VEC_safe_push (basic_block, heap, dom_bbs, bb); | |
998 | |
999 VEC_free (basic_block, heap, ldom); | |
1000 } | |
1001 free (body); | |
1002 | |
1003 if (!exit_at_end) | |
1004 { | |
1005 /* Leave exit in first copy (for explanation why see comment in | |
1006 unroll_loop_constant_iterations). */ | |
1007 may_exit_copy = 0; | |
1008 n_peel = max_unroll - 1; | |
1009 extra_zero_check = true; | |
1010 last_may_exit = false; | |
1011 } | |
1012 else | |
1013 { | |
1014 /* Leave exit in last copy (for explanation why see comment in | |
1015 unroll_loop_constant_iterations). */ | |
1016 may_exit_copy = max_unroll; | |
1017 n_peel = max_unroll; | |
1018 extra_zero_check = false; | |
1019 last_may_exit = true; | |
1020 } | |
1021 | |
1022 /* Get expression for number of iterations. */ | |
1023 start_sequence (); | |
1024 old_niter = niter = gen_reg_rtx (desc->mode); | |
1025 tmp = force_operand (copy_rtx (desc->niter_expr), niter); | |
1026 if (tmp != niter) | |
1027 emit_move_insn (niter, tmp); | |
1028 | |
1029 /* Count modulo by ANDing it with max_unroll; we use the fact that | |
1030 the number of unrollings is a power of two, and thus this is correct | |
1031 even if there is overflow in the computation. */ | |
1032 niter = expand_simple_binop (desc->mode, AND, | |
1033 niter, | |
1034 GEN_INT (max_unroll), | |
1035 NULL_RTX, 0, OPTAB_LIB_WIDEN); | |
1036 | |
1037 init_code = get_insns (); | |
1038 end_sequence (); | |
1039 unshare_all_rtl_in_chain (init_code); | |
1040 | |
1041 /* Precondition the loop. */ | |
1042 split_edge_and_insert (loop_preheader_edge (loop), init_code); | |
1043 | |
1044 remove_edges = NULL; | |
1045 | |
1046 wont_exit = sbitmap_alloc (max_unroll + 2); | |
1047 | |
1048 /* Peel the first copy of loop body (almost always we must leave exit test | |
1049 here; the only exception is when we have extra zero check and the number | |
1050 of iterations is reliable. Also record the place of (possible) extra | |
1051 zero check. */ | |
1052 sbitmap_zero (wont_exit); | |
1053 if (extra_zero_check | |
1054 && !desc->noloop_assumptions) | |
1055 SET_BIT (wont_exit, 1); | |
1056 ezc_swtch = loop_preheader_edge (loop)->src; | |
1057 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), | |
1058 1, wont_exit, desc->out_edge, | |
1059 &remove_edges, | |
1060 DLTHE_FLAG_UPDATE_FREQ); | |
1061 gcc_assert (ok); | |
1062 | |
1063 /* Record the place where switch will be built for preconditioning. */ | |
1064 swtch = split_edge (loop_preheader_edge (loop)); | |
1065 | |
1066 for (i = 0; i < n_peel; i++) | |
1067 { | |
1068 /* Peel the copy. */ | |
1069 sbitmap_zero (wont_exit); | |
1070 if (i != n_peel - 1 || !last_may_exit) | |
1071 SET_BIT (wont_exit, 1); | |
1072 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), | |
1073 1, wont_exit, desc->out_edge, | |
1074 &remove_edges, | |
1075 DLTHE_FLAG_UPDATE_FREQ); | |
1076 gcc_assert (ok); | |
1077 | |
1078 /* Create item for switch. */ | |
1079 j = n_peel - i - (extra_zero_check ? 0 : 1); | |
1080 p = REG_BR_PROB_BASE / (i + 2); | |
1081 | |
1082 preheader = split_edge (loop_preheader_edge (loop)); | |
1083 branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ, | |
1084 block_label (preheader), p, | |
1085 NULL_RTX); | |
1086 | |
1087 /* We rely on the fact that the compare and jump cannot be optimized out, | |
1088 and hence the cfg we create is correct. */ | |
1089 gcc_assert (branch_code != NULL_RTX); | |
1090 | |
1091 swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code); | |
1092 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch); | |
1093 single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p; | |
1094 e = make_edge (swtch, preheader, | |
1095 single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP); | |
1096 e->probability = p; | |
1097 } | |
1098 | |
1099 if (extra_zero_check) | |
1100 { | |
1101 /* Add branch for zero iterations. */ | |
1102 p = REG_BR_PROB_BASE / (max_unroll + 1); | |
1103 swtch = ezc_swtch; | |
1104 preheader = split_edge (loop_preheader_edge (loop)); | |
1105 branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ, | |
1106 block_label (preheader), p, | |
1107 NULL_RTX); | |
1108 gcc_assert (branch_code != NULL_RTX); | |
1109 | |
1110 swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code); | |
1111 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch); | |
1112 single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p; | |
1113 e = make_edge (swtch, preheader, | |
1114 single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP); | |
1115 e->probability = p; | |
1116 } | |
1117 | |
1118 /* Recount dominators for outer blocks. */ | |
1119 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false); | |
1120 | |
1121 /* And unroll loop. */ | |
1122 | |
1123 sbitmap_ones (wont_exit); | |
1124 RESET_BIT (wont_exit, may_exit_copy); | |
1125 opt_info_start_duplication (opt_info); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1126 |
0 | 1127 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop), |
1128 max_unroll, | |
1129 wont_exit, desc->out_edge, | |
1130 &remove_edges, | |
1131 DLTHE_FLAG_UPDATE_FREQ | |
1132 | (opt_info | |
1133 ? DLTHE_RECORD_COPY_NUMBER | |
1134 : 0)); | |
1135 gcc_assert (ok); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1136 |
0 | 1137 if (opt_info) |
1138 { | |
1139 apply_opt_in_copies (opt_info, max_unroll, true, true); | |
1140 free_opt_info (opt_info); | |
1141 } | |
1142 | |
1143 free (wont_exit); | |
1144 | |
1145 if (exit_at_end) | |
1146 { | |
1147 basic_block exit_block = get_bb_copy (desc->in_edge->src); | |
1148 /* Find a new in and out edge; they are in the last copy we have | |
1149 made. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1150 |
0 | 1151 if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest) |
1152 { | |
1153 desc->out_edge = EDGE_SUCC (exit_block, 0); | |
1154 desc->in_edge = EDGE_SUCC (exit_block, 1); | |
1155 } | |
1156 else | |
1157 { | |
1158 desc->out_edge = EDGE_SUCC (exit_block, 1); | |
1159 desc->in_edge = EDGE_SUCC (exit_block, 0); | |
1160 } | |
1161 } | |
1162 | |
1163 /* Remove the edges. */ | |
1164 for (i = 0; VEC_iterate (edge, remove_edges, i, e); i++) | |
1165 remove_path (e); | |
1166 VEC_free (edge, heap, remove_edges); | |
1167 | |
1168 /* We must be careful when updating the number of iterations due to | |
1169 preconditioning and the fact that the value must be valid at entry | |
1170 of the loop. After passing through the above code, we see that | |
1171 the correct new number of iterations is this: */ | |
1172 gcc_assert (!desc->const_iter); | |
1173 desc->niter_expr = | |
1174 simplify_gen_binary (UDIV, desc->mode, old_niter, | |
1175 GEN_INT (max_unroll + 1)); | |
1176 desc->niter_max /= max_unroll + 1; | |
1177 if (exit_at_end) | |
1178 { | |
1179 desc->niter_expr = | |
1180 simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx); | |
1181 desc->noloop_assumptions = NULL_RTX; | |
1182 desc->niter_max--; | |
1183 } | |
1184 | |
1185 if (dump_file) | |
1186 fprintf (dump_file, | |
1187 ";; Unrolled loop %d times, counting # of iterations " | |
1188 "in runtime, %i insns\n", | |
1189 max_unroll, num_loop_insns (loop)); | |
1190 | |
1191 VEC_free (basic_block, heap, dom_bbs); | |
1192 } | |
1193 | |
1194 /* Decide whether to simply peel LOOP and how much. */ | |
1195 static void | |
1196 decide_peel_simple (struct loop *loop, int flags) | |
1197 { | |
1198 unsigned npeel; | |
1199 struct niter_desc *desc; | |
1200 | |
1201 if (!(flags & UAP_PEEL)) | |
1202 { | |
1203 /* We were not asked to, just return back silently. */ | |
1204 return; | |
1205 } | |
1206 | |
1207 if (dump_file) | |
1208 fprintf (dump_file, "\n;; Considering simply peeling loop\n"); | |
1209 | |
1210 /* npeel = number of iterations to peel. */ | |
1211 npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns; | |
1212 if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES)) | |
1213 npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES); | |
1214 | |
1215 /* Skip big loops. */ | |
1216 if (!npeel) | |
1217 { | |
1218 if (dump_file) | |
1219 fprintf (dump_file, ";; Not considering loop, is too big\n"); | |
1220 return; | |
1221 } | |
1222 | |
1223 /* Check for simple loops. */ | |
1224 desc = get_simple_loop_desc (loop); | |
1225 | |
1226 /* Check number of iterations. */ | |
1227 if (desc->simple_p && !desc->assumptions && desc->const_iter) | |
1228 { | |
1229 if (dump_file) | |
1230 fprintf (dump_file, ";; Loop iterates constant times\n"); | |
1231 return; | |
1232 } | |
1233 | |
1234 /* Do not simply peel loops with branches inside -- it increases number | |
1235 of mispredicts. */ | |
1236 if (num_loop_branches (loop) > 1) | |
1237 { | |
1238 if (dump_file) | |
1239 fprintf (dump_file, ";; Not peeling, contains branches\n"); | |
1240 return; | |
1241 } | |
1242 | |
1243 if (loop->header->count) | |
1244 { | |
1245 unsigned niter = expected_loop_iterations (loop); | |
1246 if (niter + 1 > npeel) | |
1247 { | |
1248 if (dump_file) | |
1249 { | |
1250 fprintf (dump_file, ";; Not peeling loop, rolls too much ("); | |
1251 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, | |
1252 (HOST_WIDEST_INT) (niter + 1)); | |
1253 fprintf (dump_file, " iterations > %d [maximum peelings])\n", | |
1254 npeel); | |
1255 } | |
1256 return; | |
1257 } | |
1258 npeel = niter + 1; | |
1259 } | |
1260 else | |
1261 { | |
1262 /* For now we have no good heuristics to decide whether loop peeling | |
1263 will be effective, so disable it. */ | |
1264 if (dump_file) | |
1265 fprintf (dump_file, | |
1266 ";; Not peeling loop, no evidence it will be profitable\n"); | |
1267 return; | |
1268 } | |
1269 | |
1270 /* Success. */ | |
1271 loop->lpt_decision.decision = LPT_PEEL_SIMPLE; | |
1272 loop->lpt_decision.times = npeel; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1273 |
0 | 1274 if (dump_file) |
1275 fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n", | |
1276 loop->lpt_decision.times); | |
1277 } | |
1278 | |
1279 /* Peel a LOOP LOOP->LPT_DECISION.TIMES times. The transformation: | |
1280 while (cond) | |
1281 body; | |
1282 | |
1283 ==> | |
1284 | |
1285 if (!cond) goto end; | |
1286 body; | |
1287 if (!cond) goto end; | |
1288 body; | |
1289 while (cond) | |
1290 body; | |
1291 end: ; | |
1292 */ | |
1293 static void | |
1294 peel_loop_simple (struct loop *loop) | |
1295 { | |
1296 sbitmap wont_exit; | |
1297 unsigned npeel = loop->lpt_decision.times; | |
1298 struct niter_desc *desc = get_simple_loop_desc (loop); | |
1299 struct opt_info *opt_info = NULL; | |
1300 bool ok; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1301 |
0 | 1302 if (flag_split_ivs_in_unroller && npeel > 1) |
1303 opt_info = analyze_insns_in_loop (loop); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1304 |
0 | 1305 wont_exit = sbitmap_alloc (npeel + 1); |
1306 sbitmap_zero (wont_exit); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1307 |
0 | 1308 opt_info_start_duplication (opt_info); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1309 |
0 | 1310 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), |
1311 npeel, wont_exit, NULL, | |
1312 NULL, DLTHE_FLAG_UPDATE_FREQ | |
1313 | (opt_info | |
1314 ? DLTHE_RECORD_COPY_NUMBER | |
1315 : 0)); | |
1316 gcc_assert (ok); | |
1317 | |
1318 free (wont_exit); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1319 |
0 | 1320 if (opt_info) |
1321 { | |
1322 apply_opt_in_copies (opt_info, npeel, false, false); | |
1323 free_opt_info (opt_info); | |
1324 } | |
1325 | |
1326 if (desc->simple_p) | |
1327 { | |
1328 if (desc->const_iter) | |
1329 { | |
1330 desc->niter -= npeel; | |
1331 desc->niter_expr = GEN_INT (desc->niter); | |
1332 desc->noloop_assumptions = NULL_RTX; | |
1333 } | |
1334 else | |
1335 { | |
1336 /* We cannot just update niter_expr, as its value might be clobbered | |
1337 inside loop. We could handle this by counting the number into | |
1338 temporary just like we do in runtime unrolling, but it does not | |
1339 seem worthwhile. */ | |
1340 free_simple_loop_desc (loop); | |
1341 } | |
1342 } | |
1343 if (dump_file) | |
1344 fprintf (dump_file, ";; Peeling loop %d times\n", npeel); | |
1345 } | |
1346 | |
1347 /* Decide whether to unroll LOOP stupidly and how much. */ | |
1348 static void | |
1349 decide_unroll_stupid (struct loop *loop, int flags) | |
1350 { | |
1351 unsigned nunroll, nunroll_by_av, i; | |
1352 struct niter_desc *desc; | |
1353 | |
1354 if (!(flags & UAP_UNROLL_ALL)) | |
1355 { | |
1356 /* We were not asked to, just return back silently. */ | |
1357 return; | |
1358 } | |
1359 | |
1360 if (dump_file) | |
1361 fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n"); | |
1362 | |
1363 /* nunroll = total number of copies of the original loop body in | |
1364 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */ | |
1365 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns; | |
1366 nunroll_by_av | |
1367 = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns; | |
1368 if (nunroll > nunroll_by_av) | |
1369 nunroll = nunroll_by_av; | |
1370 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES)) | |
1371 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES); | |
1372 | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1373 if (targetm.loop_unroll_adjust) |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1374 nunroll = targetm.loop_unroll_adjust (nunroll, loop); |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1375 |
0 | 1376 /* Skip big loops. */ |
1377 if (nunroll <= 1) | |
1378 { | |
1379 if (dump_file) | |
1380 fprintf (dump_file, ";; Not considering loop, is too big\n"); | |
1381 return; | |
1382 } | |
1383 | |
1384 /* Check for simple loops. */ | |
1385 desc = get_simple_loop_desc (loop); | |
1386 | |
1387 /* Check simpleness. */ | |
1388 if (desc->simple_p && !desc->assumptions) | |
1389 { | |
1390 if (dump_file) | |
1391 fprintf (dump_file, ";; The loop is simple\n"); | |
1392 return; | |
1393 } | |
1394 | |
1395 /* Do not unroll loops with branches inside -- it increases number | |
1396 of mispredicts. */ | |
1397 if (num_loop_branches (loop) > 1) | |
1398 { | |
1399 if (dump_file) | |
1400 fprintf (dump_file, ";; Not unrolling, contains branches\n"); | |
1401 return; | |
1402 } | |
1403 | |
1404 /* If we have profile feedback, check whether the loop rolls. */ | |
1405 if (loop->header->count | |
1406 && expected_loop_iterations (loop) < 2 * nunroll) | |
1407 { | |
1408 if (dump_file) | |
1409 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n"); | |
1410 return; | |
1411 } | |
1412 | |
1413 /* Success. Now force nunroll to be power of 2, as it seems that this | |
1414 improves results (partially because of better alignments, partially | |
1415 because of some dark magic). */ | |
1416 for (i = 1; 2 * i <= nunroll; i *= 2) | |
1417 continue; | |
1418 | |
1419 loop->lpt_decision.decision = LPT_UNROLL_STUPID; | |
1420 loop->lpt_decision.times = i - 1; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1421 |
0 | 1422 if (dump_file) |
1423 fprintf (dump_file, | |
1424 ";; Decided to unroll the loop stupidly, %d times.\n", | |
1425 loop->lpt_decision.times); | |
1426 } | |
1427 | |
1428 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation: | |
1429 while (cond) | |
1430 body; | |
1431 | |
1432 ==> | |
1433 | |
1434 while (cond) | |
1435 { | |
1436 body; | |
1437 if (!cond) break; | |
1438 body; | |
1439 if (!cond) break; | |
1440 body; | |
1441 if (!cond) break; | |
1442 body; | |
1443 } | |
1444 */ | |
1445 static void | |
1446 unroll_loop_stupid (struct loop *loop) | |
1447 { | |
1448 sbitmap wont_exit; | |
1449 unsigned nunroll = loop->lpt_decision.times; | |
1450 struct niter_desc *desc = get_simple_loop_desc (loop); | |
1451 struct opt_info *opt_info = NULL; | |
1452 bool ok; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1453 |
0 | 1454 if (flag_split_ivs_in_unroller |
1455 || flag_variable_expansion_in_unroller) | |
1456 opt_info = analyze_insns_in_loop (loop); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1457 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1458 |
0 | 1459 wont_exit = sbitmap_alloc (nunroll + 1); |
1460 sbitmap_zero (wont_exit); | |
1461 opt_info_start_duplication (opt_info); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1462 |
0 | 1463 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop), |
1464 nunroll, wont_exit, | |
1465 NULL, NULL, | |
1466 DLTHE_FLAG_UPDATE_FREQ | |
1467 | (opt_info | |
1468 ? DLTHE_RECORD_COPY_NUMBER | |
1469 : 0)); | |
1470 gcc_assert (ok); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1471 |
0 | 1472 if (opt_info) |
1473 { | |
1474 apply_opt_in_copies (opt_info, nunroll, true, true); | |
1475 free_opt_info (opt_info); | |
1476 } | |
1477 | |
1478 free (wont_exit); | |
1479 | |
1480 if (desc->simple_p) | |
1481 { | |
1482 /* We indeed may get here provided that there are nontrivial assumptions | |
1483 for a loop to be really simple. We could update the counts, but the | |
1484 problem is that we are unable to decide which exit will be taken | |
1485 (not really true in case the number of iterations is constant, | |
1486 but noone will do anything with this information, so we do not | |
1487 worry about it). */ | |
1488 desc->simple_p = false; | |
1489 } | |
1490 | |
1491 if (dump_file) | |
1492 fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n", | |
1493 nunroll, num_loop_insns (loop)); | |
1494 } | |
1495 | |
1496 /* A hash function for information about insns to split. */ | |
1497 | |
1498 static hashval_t | |
1499 si_info_hash (const void *ivts) | |
1500 { | |
1501 return (hashval_t) INSN_UID (((const struct iv_to_split *) ivts)->insn); | |
1502 } | |
1503 | |
1504 /* An equality functions for information about insns to split. */ | |
1505 | |
1506 static int | |
1507 si_info_eq (const void *ivts1, const void *ivts2) | |
1508 { | |
1509 const struct iv_to_split *const i1 = (const struct iv_to_split *) ivts1; | |
1510 const struct iv_to_split *const i2 = (const struct iv_to_split *) ivts2; | |
1511 | |
1512 return i1->insn == i2->insn; | |
1513 } | |
1514 | |
1515 /* Return a hash for VES, which is really a "var_to_expand *". */ | |
1516 | |
1517 static hashval_t | |
1518 ve_info_hash (const void *ves) | |
1519 { | |
1520 return (hashval_t) INSN_UID (((const struct var_to_expand *) ves)->insn); | |
1521 } | |
1522 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1523 /* Return true if IVTS1 and IVTS2 (which are really both of type |
0 | 1524 "var_to_expand *") refer to the same instruction. */ |
1525 | |
1526 static int | |
1527 ve_info_eq (const void *ivts1, const void *ivts2) | |
1528 { | |
1529 const struct var_to_expand *const i1 = (const struct var_to_expand *) ivts1; | |
1530 const struct var_to_expand *const i2 = (const struct var_to_expand *) ivts2; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1531 |
0 | 1532 return i1->insn == i2->insn; |
1533 } | |
1534 | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1535 /* Returns true if REG is referenced in one nondebug insn in LOOP. |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1536 Set *DEBUG_USES to the number of debug insns that reference the |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1537 variable. */ |
0 | 1538 |
1539 bool | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1540 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg, |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1541 int *debug_uses) |
0 | 1542 { |
1543 basic_block *body, bb; | |
1544 unsigned i; | |
1545 int count_ref = 0; | |
1546 rtx insn; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1547 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1548 body = get_loop_body (loop); |
0 | 1549 for (i = 0; i < loop->num_nodes; i++) |
1550 { | |
1551 bb = body[i]; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1552 |
0 | 1553 FOR_BB_INSNS (bb, insn) |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1554 if (!rtx_referenced_p (reg, insn)) |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1555 continue; |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1556 else if (DEBUG_INSN_P (insn)) |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1557 ++*debug_uses; |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1558 else if (++count_ref > 1) |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1559 break; |
0 | 1560 } |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1561 free (body); |
0 | 1562 return (count_ref == 1); |
1563 } | |
1564 | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1565 /* Reset the DEBUG_USES debug insns in LOOP that reference REG. */ |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1566 |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1567 static void |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1568 reset_debug_uses_in_loop (struct loop *loop, rtx reg, int debug_uses) |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1569 { |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1570 basic_block *body, bb; |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1571 unsigned i; |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1572 rtx insn; |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1573 |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1574 body = get_loop_body (loop); |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1575 for (i = 0; debug_uses && i < loop->num_nodes; i++) |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1576 { |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1577 bb = body[i]; |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1578 |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1579 FOR_BB_INSNS (bb, insn) |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1580 if (!DEBUG_INSN_P (insn) || !rtx_referenced_p (reg, insn)) |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1581 continue; |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1582 else |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1583 { |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1584 validate_change (insn, &INSN_VAR_LOCATION_LOC (insn), |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1585 gen_rtx_UNKNOWN_VAR_LOC (), 0); |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1586 if (!--debug_uses) |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1587 break; |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1588 } |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1589 } |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1590 free (body); |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1591 } |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1592 |
0 | 1593 /* Determine whether INSN contains an accumulator |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1594 which can be expanded into separate copies, |
0 | 1595 one for each copy of the LOOP body. |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1596 |
0 | 1597 for (i = 0 ; i < n; i++) |
1598 sum += a[i]; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1599 |
0 | 1600 ==> |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1601 |
0 | 1602 sum += a[i] |
1603 .... | |
1604 i = i+1; | |
1605 sum1 += a[i] | |
1606 .... | |
1607 i = i+1 | |
1608 sum2 += a[i]; | |
1609 .... | |
1610 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1611 Return NULL if INSN contains no opportunity for expansion of accumulator. |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1612 Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant |
0 | 1613 information and return a pointer to it. |
1614 */ | |
1615 | |
1616 static struct var_to_expand * | |
1617 analyze_insn_to_expand_var (struct loop *loop, rtx insn) | |
1618 { | |
1619 rtx set, dest, src, op1, op2, something; | |
1620 struct var_to_expand *ves; | |
1621 enum machine_mode mode1, mode2; | |
1622 unsigned accum_pos; | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1623 int debug_uses = 0; |
0 | 1624 |
1625 set = single_set (insn); | |
1626 if (!set) | |
1627 return NULL; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1628 |
0 | 1629 dest = SET_DEST (set); |
1630 src = SET_SRC (set); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1631 |
0 | 1632 if (GET_CODE (src) != PLUS |
1633 && GET_CODE (src) != MINUS | |
1634 && GET_CODE (src) != MULT) | |
1635 return NULL; | |
1636 | |
1637 /* Hmm, this is a bit paradoxical. We know that INSN is a valid insn | |
1638 in MD. But if there is no optab to generate the insn, we can not | |
1639 perform the variable expansion. This can happen if an MD provides | |
1640 an insn but not a named pattern to generate it, for example to avoid | |
1641 producing code that needs additional mode switches like for x87/mmx. | |
1642 | |
1643 So we check have_insn_for which looks for an optab for the operation | |
1644 in SRC. If it doesn't exist, we can't perform the expansion even | |
1645 though INSN is valid. */ | |
1646 if (!have_insn_for (GET_CODE (src), GET_MODE (src))) | |
1647 return NULL; | |
1648 | |
1649 op1 = XEXP (src, 0); | |
1650 op2 = XEXP (src, 1); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1651 |
0 | 1652 if (!REG_P (dest) |
1653 && !(GET_CODE (dest) == SUBREG | |
1654 && REG_P (SUBREG_REG (dest)))) | |
1655 return NULL; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1656 |
0 | 1657 if (rtx_equal_p (dest, op1)) |
1658 accum_pos = 0; | |
1659 else if (rtx_equal_p (dest, op2)) | |
1660 accum_pos = 1; | |
1661 else | |
1662 return NULL; | |
1663 | |
1664 /* The method of expansion that we are using; which includes | |
1665 the initialization of the expansions with zero and the summation of | |
1666 the expansions at the end of the computation will yield wrong results | |
1667 for (x = something - x) thus avoid using it in that case. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1668 if (accum_pos == 1 |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1669 && GET_CODE (src) == MINUS) |
0 | 1670 return NULL; |
1671 | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1672 something = (accum_pos == 0) ? op2 : op1; |
0 | 1673 |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1674 if (rtx_referenced_p (dest, something)) |
0 | 1675 return NULL; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1676 |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1677 if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses)) |
0 | 1678 return NULL; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1679 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1680 mode1 = GET_MODE (dest); |
0 | 1681 mode2 = GET_MODE (something); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1682 if ((FLOAT_MODE_P (mode1) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1683 || FLOAT_MODE_P (mode2)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1684 && !flag_associative_math) |
0 | 1685 return NULL; |
1686 | |
1687 if (dump_file) | |
1688 { | |
1689 fprintf (dump_file, | |
1690 "\n;; Expanding Accumulator "); | |
1691 print_rtl (dump_file, dest); | |
1692 fprintf (dump_file, "\n"); | |
1693 } | |
1694 | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1695 if (debug_uses) |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1696 /* Instead of resetting the debug insns, we could replace each |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1697 debug use in the loop with the sum or product of all expanded |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1698 accummulators. Since we'll only know of all expansions at the |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1699 end, we'd have to keep track of which vars_to_expand a debug |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1700 insn in the loop references, take note of each copy of the |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1701 debug insn during unrolling, and when it's all done, compute |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1702 the sum or product of each variable and adjust the original |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1703 debug insn and each copy thereof. What a pain! */ |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1704 reset_debug_uses_in_loop (loop, dest, debug_uses); |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1705 |
0 | 1706 /* Record the accumulator to expand. */ |
1707 ves = XNEW (struct var_to_expand); | |
1708 ves->insn = insn; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1709 ves->reg = copy_rtx (dest); |
0 | 1710 ves->var_expansions = VEC_alloc (rtx, heap, 1); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1711 ves->next = NULL; |
0 | 1712 ves->op = GET_CODE (src); |
1713 ves->expansion_count = 0; | |
1714 ves->reuse_expansion = 0; | |
1715 ves->accum_pos = accum_pos; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1716 return ves; |
0 | 1717 } |
1718 | |
1719 /* Determine whether there is an induction variable in INSN that | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1720 we would like to split during unrolling. |
0 | 1721 |
1722 I.e. replace | |
1723 | |
1724 i = i + 1; | |
1725 ... | |
1726 i = i + 1; | |
1727 ... | |
1728 i = i + 1; | |
1729 ... | |
1730 | |
1731 type chains by | |
1732 | |
1733 i0 = i + 1 | |
1734 ... | |
1735 i = i0 + 1 | |
1736 ... | |
1737 i = i0 + 2 | |
1738 ... | |
1739 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1740 Return NULL if INSN contains no interesting IVs. Otherwise, allocate |
0 | 1741 an IV_TO_SPLIT structure, fill it with the relevant information and return a |
1742 pointer to it. */ | |
1743 | |
1744 static struct iv_to_split * | |
1745 analyze_iv_to_split_insn (rtx insn) | |
1746 { | |
1747 rtx set, dest; | |
1748 struct rtx_iv iv; | |
1749 struct iv_to_split *ivts; | |
1750 bool ok; | |
1751 | |
1752 /* For now we just split the basic induction variables. Later this may be | |
1753 extended for example by selecting also addresses of memory references. */ | |
1754 set = single_set (insn); | |
1755 if (!set) | |
1756 return NULL; | |
1757 | |
1758 dest = SET_DEST (set); | |
1759 if (!REG_P (dest)) | |
1760 return NULL; | |
1761 | |
1762 if (!biv_p (insn, dest)) | |
1763 return NULL; | |
1764 | |
1765 ok = iv_analyze_result (insn, dest, &iv); | |
1766 | |
1767 /* This used to be an assert under the assumption that if biv_p returns | |
1768 true that iv_analyze_result must also return true. However, that | |
1769 assumption is not strictly correct as evidenced by pr25569. | |
1770 | |
1771 Returning NULL when iv_analyze_result returns false is safe and | |
1772 avoids the problems in pr25569 until the iv_analyze_* routines | |
1773 can be fixed, which is apparently hard and time consuming | |
1774 according to their author. */ | |
1775 if (! ok) | |
1776 return NULL; | |
1777 | |
1778 if (iv.step == const0_rtx | |
1779 || iv.mode != iv.extend_mode) | |
1780 return NULL; | |
1781 | |
1782 /* Record the insn to split. */ | |
1783 ivts = XNEW (struct iv_to_split); | |
1784 ivts->insn = insn; | |
1785 ivts->base_var = NULL_RTX; | |
1786 ivts->step = iv.step; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1787 ivts->next = NULL; |
0 | 1788 ivts->n_loc = 1; |
1789 ivts->loc[0] = 1; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1790 |
0 | 1791 return ivts; |
1792 } | |
1793 | |
1794 /* Determines which of insns in LOOP can be optimized. | |
1795 Return a OPT_INFO struct with the relevant hash tables filled | |
1796 with all insns to be optimized. The FIRST_NEW_BLOCK field | |
1797 is undefined for the return value. */ | |
1798 | |
1799 static struct opt_info * | |
1800 analyze_insns_in_loop (struct loop *loop) | |
1801 { | |
1802 basic_block *body, bb; | |
1803 unsigned i; | |
1804 struct opt_info *opt_info = XCNEW (struct opt_info); | |
1805 rtx insn; | |
1806 struct iv_to_split *ivts = NULL; | |
1807 struct var_to_expand *ves = NULL; | |
1808 PTR *slot1; | |
1809 PTR *slot2; | |
1810 VEC (edge, heap) *edges = get_loop_exit_edges (loop); | |
1811 edge exit; | |
1812 bool can_apply = false; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1813 |
0 | 1814 iv_analysis_loop_init (loop); |
1815 | |
1816 body = get_loop_body (loop); | |
1817 | |
1818 if (flag_split_ivs_in_unroller) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1819 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1820 opt_info->insns_to_split = htab_create (5 * loop->num_nodes, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1821 si_info_hash, si_info_eq, free); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1822 opt_info->iv_to_split_head = NULL; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1823 opt_info->iv_to_split_tail = &opt_info->iv_to_split_head; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1824 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1825 |
0 | 1826 /* Record the loop exit bb and loop preheader before the unrolling. */ |
1827 opt_info->loop_preheader = loop_preheader_edge (loop)->src; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1828 |
0 | 1829 if (VEC_length (edge, edges) == 1) |
1830 { | |
1831 exit = VEC_index (edge, edges, 0); | |
1832 if (!(exit->flags & EDGE_COMPLEX)) | |
1833 { | |
1834 opt_info->loop_exit = split_edge (exit); | |
1835 can_apply = true; | |
1836 } | |
1837 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1838 |
0 | 1839 if (flag_variable_expansion_in_unroller |
1840 && can_apply) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1841 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1842 opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1843 ve_info_hash, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1844 ve_info_eq, free); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1845 opt_info->var_to_expand_head = NULL; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1846 opt_info->var_to_expand_tail = &opt_info->var_to_expand_head; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1847 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1848 |
0 | 1849 for (i = 0; i < loop->num_nodes; i++) |
1850 { | |
1851 bb = body[i]; | |
1852 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) | |
1853 continue; | |
1854 | |
1855 FOR_BB_INSNS (bb, insn) | |
1856 { | |
1857 if (!INSN_P (insn)) | |
1858 continue; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1859 |
0 | 1860 if (opt_info->insns_to_split) |
1861 ivts = analyze_iv_to_split_insn (insn); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1862 |
0 | 1863 if (ivts) |
1864 { | |
1865 slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1866 gcc_assert (*slot1 == NULL); |
0 | 1867 *slot1 = ivts; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1868 *opt_info->iv_to_split_tail = ivts; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1869 opt_info->iv_to_split_tail = &ivts->next; |
0 | 1870 continue; |
1871 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1872 |
0 | 1873 if (opt_info->insns_with_var_to_expand) |
1874 ves = analyze_insn_to_expand_var (loop, insn); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1875 |
0 | 1876 if (ves) |
1877 { | |
1878 slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1879 gcc_assert (*slot2 == NULL); |
0 | 1880 *slot2 = ves; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1881 *opt_info->var_to_expand_tail = ves; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1882 opt_info->var_to_expand_tail = &ves->next; |
0 | 1883 } |
1884 } | |
1885 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1886 |
0 | 1887 VEC_free (edge, heap, edges); |
1888 free (body); | |
1889 return opt_info; | |
1890 } | |
1891 | |
1892 /* Called just before loop duplication. Records start of duplicated area | |
1893 to OPT_INFO. */ | |
1894 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1895 static void |
0 | 1896 opt_info_start_duplication (struct opt_info *opt_info) |
1897 { | |
1898 if (opt_info) | |
1899 opt_info->first_new_block = last_basic_block; | |
1900 } | |
1901 | |
1902 /* Determine the number of iterations between initialization of the base | |
1903 variable and the current copy (N_COPY). N_COPIES is the total number | |
1904 of newly created copies. UNROLLING is true if we are unrolling | |
1905 (not peeling) the loop. */ | |
1906 | |
1907 static unsigned | |
1908 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling) | |
1909 { | |
1910 if (unrolling) | |
1911 { | |
1912 /* If we are unrolling, initialization is done in the original loop | |
1913 body (number 0). */ | |
1914 return n_copy; | |
1915 } | |
1916 else | |
1917 { | |
1918 /* If we are peeling, the copy in that the initialization occurs has | |
1919 number 1. The original loop (number 0) is the last. */ | |
1920 if (n_copy) | |
1921 return n_copy - 1; | |
1922 else | |
1923 return n_copies; | |
1924 } | |
1925 } | |
1926 | |
1927 /* Locate in EXPR the expression corresponding to the location recorded | |
1928 in IVTS, and return a pointer to the RTX for this location. */ | |
1929 | |
1930 static rtx * | |
1931 get_ivts_expr (rtx expr, struct iv_to_split *ivts) | |
1932 { | |
1933 unsigned i; | |
1934 rtx *ret = &expr; | |
1935 | |
1936 for (i = 0; i < ivts->n_loc; i++) | |
1937 ret = &XEXP (*ret, ivts->loc[i]); | |
1938 | |
1939 return ret; | |
1940 } | |
1941 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1942 /* Allocate basic variable for the induction variable chain. */ |
0 | 1943 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1944 static void |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1945 allocate_basic_variable (struct iv_to_split *ivts) |
0 | 1946 { |
1947 rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts); | |
1948 | |
1949 ivts->base_var = gen_reg_rtx (GET_MODE (expr)); | |
1950 } | |
1951 | |
1952 /* Insert initialization of basic variable of IVTS before INSN, taking | |
1953 the initial value from INSN. */ | |
1954 | |
1955 static void | |
1956 insert_base_initialization (struct iv_to_split *ivts, rtx insn) | |
1957 { | |
1958 rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts)); | |
1959 rtx seq; | |
1960 | |
1961 start_sequence (); | |
1962 expr = force_operand (expr, ivts->base_var); | |
1963 if (expr != ivts->base_var) | |
1964 emit_move_insn (ivts->base_var, expr); | |
1965 seq = get_insns (); | |
1966 end_sequence (); | |
1967 | |
1968 emit_insn_before (seq, insn); | |
1969 } | |
1970 | |
1971 /* Replace the use of induction variable described in IVTS in INSN | |
1972 by base variable + DELTA * step. */ | |
1973 | |
1974 static void | |
1975 split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta) | |
1976 { | |
1977 rtx expr, *loc, seq, incr, var; | |
1978 enum machine_mode mode = GET_MODE (ivts->base_var); | |
1979 rtx src, dest, set; | |
1980 | |
1981 /* Construct base + DELTA * step. */ | |
1982 if (!delta) | |
1983 expr = ivts->base_var; | |
1984 else | |
1985 { | |
1986 incr = simplify_gen_binary (MULT, mode, | |
1987 ivts->step, gen_int_mode (delta, mode)); | |
1988 expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var), | |
1989 ivts->base_var, incr); | |
1990 } | |
1991 | |
1992 /* Figure out where to do the replacement. */ | |
1993 loc = get_ivts_expr (single_set (insn), ivts); | |
1994 | |
1995 /* If we can make the replacement right away, we're done. */ | |
1996 if (validate_change (insn, loc, expr, 0)) | |
1997 return; | |
1998 | |
1999 /* Otherwise, force EXPR into a register and try again. */ | |
2000 start_sequence (); | |
2001 var = gen_reg_rtx (mode); | |
2002 expr = force_operand (expr, var); | |
2003 if (expr != var) | |
2004 emit_move_insn (var, expr); | |
2005 seq = get_insns (); | |
2006 end_sequence (); | |
2007 emit_insn_before (seq, insn); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2008 |
0 | 2009 if (validate_change (insn, loc, var, 0)) |
2010 return; | |
2011 | |
2012 /* The last chance. Try recreating the assignment in insn | |
2013 completely from scratch. */ | |
2014 set = single_set (insn); | |
2015 gcc_assert (set); | |
2016 | |
2017 start_sequence (); | |
2018 *loc = var; | |
2019 src = copy_rtx (SET_SRC (set)); | |
2020 dest = copy_rtx (SET_DEST (set)); | |
2021 src = force_operand (src, dest); | |
2022 if (src != dest) | |
2023 emit_move_insn (dest, src); | |
2024 seq = get_insns (); | |
2025 end_sequence (); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2026 |
0 | 2027 emit_insn_before (seq, insn); |
2028 delete_insn (insn); | |
2029 } | |
2030 | |
2031 | |
2032 /* Return one expansion of the accumulator recorded in struct VE. */ | |
2033 | |
2034 static rtx | |
2035 get_expansion (struct var_to_expand *ve) | |
2036 { | |
2037 rtx reg; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2038 |
0 | 2039 if (ve->reuse_expansion == 0) |
2040 reg = ve->reg; | |
2041 else | |
2042 reg = VEC_index (rtx, ve->var_expansions, ve->reuse_expansion - 1); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2043 |
0 | 2044 if (VEC_length (rtx, ve->var_expansions) == (unsigned) ve->reuse_expansion) |
2045 ve->reuse_expansion = 0; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2046 else |
0 | 2047 ve->reuse_expansion++; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2048 |
0 | 2049 return reg; |
2050 } | |
2051 | |
2052 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2053 /* Given INSN replace the uses of the accumulator recorded in VE |
0 | 2054 with a new register. */ |
2055 | |
2056 static void | |
2057 expand_var_during_unrolling (struct var_to_expand *ve, rtx insn) | |
2058 { | |
2059 rtx new_reg, set; | |
2060 bool really_new_expansion = false; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2061 |
0 | 2062 set = single_set (insn); |
2063 gcc_assert (set); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2064 |
0 | 2065 /* Generate a new register only if the expansion limit has not been |
2066 reached. Else reuse an already existing expansion. */ | |
2067 if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count) | |
2068 { | |
2069 really_new_expansion = true; | |
2070 new_reg = gen_reg_rtx (GET_MODE (ve->reg)); | |
2071 } | |
2072 else | |
2073 new_reg = get_expansion (ve); | |
2074 | |
2075 validate_change (insn, &SET_DEST (set), new_reg, 1); | |
2076 validate_change (insn, &XEXP (SET_SRC (set), ve->accum_pos), new_reg, 1); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2077 |
0 | 2078 if (apply_change_group ()) |
2079 if (really_new_expansion) | |
2080 { | |
2081 VEC_safe_push (rtx, heap, ve->var_expansions, new_reg); | |
2082 ve->expansion_count++; | |
2083 } | |
2084 } | |
2085 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2086 /* Initialize the variable expansions in loop preheader. PLACE is the |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2087 loop-preheader basic block where the initialization of the |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2088 expansions should take place. The expansions are initialized with |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2089 (-0) when the operation is plus or minus to honor sign zero. This |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2090 way we can prevent cases where the sign of the final result is |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2091 effected by the sign of the expansion. Here is an example to |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2092 demonstrate this: |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2093 |
0 | 2094 for (i = 0 ; i < n; i++) |
2095 sum += something; | |
2096 | |
2097 ==> | |
2098 | |
2099 sum += something | |
2100 .... | |
2101 i = i+1; | |
2102 sum1 += something | |
2103 .... | |
2104 i = i+1 | |
2105 sum2 += something; | |
2106 .... | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2107 |
0 | 2108 When SUM is initialized with -zero and SOMETHING is also -zero; the |
2109 final result of sum should be -zero thus the expansions sum1 and sum2 | |
2110 should be initialized with -zero as well (otherwise we will get +zero | |
2111 as the final result). */ | |
2112 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2113 static void |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2114 insert_var_expansion_initialization (struct var_to_expand *ve, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2115 basic_block place) |
0 | 2116 { |
2117 rtx seq, var, zero_init, insn; | |
2118 unsigned i; | |
2119 enum machine_mode mode = GET_MODE (ve->reg); | |
2120 bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode); | |
2121 | |
2122 if (VEC_length (rtx, ve->var_expansions) == 0) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2123 return; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2124 |
0 | 2125 start_sequence (); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2126 if (ve->op == PLUS || ve->op == MINUS) |
0 | 2127 for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++) |
2128 { | |
2129 if (honor_signed_zero_p) | |
2130 zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode); | |
2131 else | |
2132 zero_init = CONST0_RTX (mode); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2133 |
0 | 2134 emit_move_insn (var, zero_init); |
2135 } | |
2136 else if (ve->op == MULT) | |
2137 for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++) | |
2138 { | |
2139 zero_init = CONST1_RTX (GET_MODE (var)); | |
2140 emit_move_insn (var, zero_init); | |
2141 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2142 |
0 | 2143 seq = get_insns (); |
2144 end_sequence (); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2145 |
0 | 2146 insn = BB_HEAD (place); |
2147 while (!NOTE_INSN_BASIC_BLOCK_P (insn)) | |
2148 insn = NEXT_INSN (insn); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2149 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2150 emit_insn_after (seq, insn); |
0 | 2151 } |
2152 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2153 /* Combine the variable expansions at the loop exit. PLACE is the |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2154 loop exit basic block where the summation of the expansions should |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2155 take place. */ |
0 | 2156 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2157 static void |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2158 combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place) |
0 | 2159 { |
2160 rtx sum = ve->reg; | |
2161 rtx expr, seq, var, insn; | |
2162 unsigned i; | |
2163 | |
2164 if (VEC_length (rtx, ve->var_expansions) == 0) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2165 return; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2166 |
0 | 2167 start_sequence (); |
2168 if (ve->op == PLUS || ve->op == MINUS) | |
2169 for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++) | |
2170 { | |
2171 sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), | |
2172 var, sum); | |
2173 } | |
2174 else if (ve->op == MULT) | |
2175 for (i = 0; VEC_iterate (rtx, ve->var_expansions, i, var); i++) | |
2176 { | |
2177 sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), | |
2178 var, sum); | |
2179 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2180 |
0 | 2181 expr = force_operand (sum, ve->reg); |
2182 if (expr != ve->reg) | |
2183 emit_move_insn (ve->reg, expr); | |
2184 seq = get_insns (); | |
2185 end_sequence (); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2186 |
0 | 2187 insn = BB_HEAD (place); |
2188 while (!NOTE_INSN_BASIC_BLOCK_P (insn)) | |
2189 insn = NEXT_INSN (insn); | |
2190 | |
2191 emit_insn_after (seq, insn); | |
2192 } | |
2193 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2194 /* Apply loop optimizations in loop copies using the |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2195 data which gathered during the unrolling. Structure |
0 | 2196 OPT_INFO record that data. |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2197 |
0 | 2198 UNROLLING is true if we unrolled (not peeled) the loop. |
2199 REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of | |
2200 the loop (as it should happen in complete unrolling, but not in ordinary | |
2201 peeling of the loop). */ | |
2202 | |
2203 static void | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2204 apply_opt_in_copies (struct opt_info *opt_info, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2205 unsigned n_copies, bool unrolling, |
0 | 2206 bool rewrite_original_loop) |
2207 { | |
2208 unsigned i, delta; | |
2209 basic_block bb, orig_bb; | |
2210 rtx insn, orig_insn, next; | |
2211 struct iv_to_split ivts_templ, *ivts; | |
2212 struct var_to_expand ve_templ, *ves; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2213 |
0 | 2214 /* Sanity check -- we need to put initialization in the original loop |
2215 body. */ | |
2216 gcc_assert (!unrolling || rewrite_original_loop); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2217 |
0 | 2218 /* Allocate the basic variables (i0). */ |
2219 if (opt_info->insns_to_split) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2220 for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2221 allocate_basic_variable (ivts); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2222 |
0 | 2223 for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++) |
2224 { | |
2225 bb = BASIC_BLOCK (i); | |
2226 orig_bb = get_bb_original (bb); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2227 |
0 | 2228 /* bb->aux holds position in copy sequence initialized by |
2229 duplicate_loop_to_header_edge. */ | |
2230 delta = determine_split_iv_delta ((size_t)bb->aux, n_copies, | |
2231 unrolling); | |
2232 bb->aux = 0; | |
2233 orig_insn = BB_HEAD (orig_bb); | |
2234 for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next) | |
2235 { | |
2236 next = NEXT_INSN (insn); | |
2237 if (!INSN_P (insn)) | |
2238 continue; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2239 |
0 | 2240 while (!INSN_P (orig_insn)) |
2241 orig_insn = NEXT_INSN (orig_insn); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2242 |
0 | 2243 ivts_templ.insn = orig_insn; |
2244 ve_templ.insn = orig_insn; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2245 |
0 | 2246 /* Apply splitting iv optimization. */ |
2247 if (opt_info->insns_to_split) | |
2248 { | |
2249 ivts = (struct iv_to_split *) | |
2250 htab_find (opt_info->insns_to_split, &ivts_templ); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2251 |
0 | 2252 if (ivts) |
2253 { | |
2254 gcc_assert (GET_CODE (PATTERN (insn)) | |
2255 == GET_CODE (PATTERN (orig_insn))); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2256 |
0 | 2257 if (!delta) |
2258 insert_base_initialization (ivts, insn); | |
2259 split_iv (ivts, insn, delta); | |
2260 } | |
2261 } | |
2262 /* Apply variable expansion optimization. */ | |
2263 if (unrolling && opt_info->insns_with_var_to_expand) | |
2264 { | |
2265 ves = (struct var_to_expand *) | |
2266 htab_find (opt_info->insns_with_var_to_expand, &ve_templ); | |
2267 if (ves) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2268 { |
0 | 2269 gcc_assert (GET_CODE (PATTERN (insn)) |
2270 == GET_CODE (PATTERN (orig_insn))); | |
2271 expand_var_during_unrolling (ves, insn); | |
2272 } | |
2273 } | |
2274 orig_insn = NEXT_INSN (orig_insn); | |
2275 } | |
2276 } | |
2277 | |
2278 if (!rewrite_original_loop) | |
2279 return; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2280 |
0 | 2281 /* Initialize the variable expansions in the loop preheader |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2282 and take care of combining them at the loop exit. */ |
0 | 2283 if (opt_info->insns_with_var_to_expand) |
2284 { | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2285 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2286 insert_var_expansion_initialization (ves, opt_info->loop_preheader); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2287 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2288 combine_var_copies_in_loop_exit (ves, opt_info->loop_exit); |
0 | 2289 } |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2290 |
0 | 2291 /* Rewrite also the original loop body. Find them as originals of the blocks |
2292 in the last copied iteration, i.e. those that have | |
2293 get_bb_copy (get_bb_original (bb)) == bb. */ | |
2294 for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++) | |
2295 { | |
2296 bb = BASIC_BLOCK (i); | |
2297 orig_bb = get_bb_original (bb); | |
2298 if (get_bb_copy (orig_bb) != bb) | |
2299 continue; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2300 |
0 | 2301 delta = determine_split_iv_delta (0, n_copies, unrolling); |
2302 for (orig_insn = BB_HEAD (orig_bb); | |
2303 orig_insn != NEXT_INSN (BB_END (bb)); | |
2304 orig_insn = next) | |
2305 { | |
2306 next = NEXT_INSN (orig_insn); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2307 |
0 | 2308 if (!INSN_P (orig_insn)) |
2309 continue; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2310 |
0 | 2311 ivts_templ.insn = orig_insn; |
2312 if (opt_info->insns_to_split) | |
2313 { | |
2314 ivts = (struct iv_to_split *) | |
2315 htab_find (opt_info->insns_to_split, &ivts_templ); | |
2316 if (ivts) | |
2317 { | |
2318 if (!delta) | |
2319 insert_base_initialization (ivts, orig_insn); | |
2320 split_iv (ivts, orig_insn, delta); | |
2321 continue; | |
2322 } | |
2323 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2324 |
0 | 2325 } |
2326 } | |
2327 } | |
2328 | |
2329 /* Release OPT_INFO. */ | |
2330 | |
2331 static void | |
2332 free_opt_info (struct opt_info *opt_info) | |
2333 { | |
2334 if (opt_info->insns_to_split) | |
2335 htab_delete (opt_info->insns_to_split); | |
2336 if (opt_info->insns_with_var_to_expand) | |
2337 { | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2338 struct var_to_expand *ves; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2339 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2340 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2341 VEC_free (rtx, heap, ves->var_expansions); |
0 | 2342 htab_delete (opt_info->insns_with_var_to_expand); |
2343 } | |
2344 free (opt_info); | |
2345 } |