Mercurial > hg > CbC > CbC_gcc
annotate gcc/loop-unroll.c @ 67:f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
author | nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 22 Mar 2011 17:18:12 +0900 |
parents | b7f97abdc517 |
children | 04ced10e8804 |
rev | line source |
---|---|
0 | 1 /* Loop unrolling and peeling. |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
2 Copyright (C) 2002, 2003, 2004, 2005, 2007, 2008, 2010 |
0 | 3 Free Software Foundation, Inc. |
4 | |
5 This file is part of GCC. | |
6 | |
7 GCC is free software; you can redistribute it and/or modify it under | |
8 the terms of the GNU General Public License as published by the Free | |
9 Software Foundation; either version 3, or (at your option) any later | |
10 version. | |
11 | |
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 for more details. | |
16 | |
17 You should have received a copy of the GNU General Public License | |
18 along with GCC; see the file COPYING3. If not see | |
19 <http://www.gnu.org/licenses/>. */ | |
20 | |
21 #include "config.h" | |
22 #include "system.h" | |
23 #include "coretypes.h" | |
24 #include "tm.h" | |
25 #include "rtl.h" | |
26 #include "hard-reg-set.h" | |
27 #include "obstack.h" | |
28 #include "basic-block.h" | |
29 #include "cfgloop.h" | |
30 #include "cfglayout.h" | |
31 #include "params.h" | |
32 #include "output.h" | |
33 #include "expr.h" | |
34 #include "hashtab.h" | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
35 #include "recog.h" |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
36 #include "target.h" |
0 | 37 |
38 /* This pass performs loop unrolling and peeling. We only perform these | |
39 optimizations on innermost loops (with single exception) because | |
40 the impact on performance is greatest here, and we want to avoid | |
41 unnecessary code size growth. The gain is caused by greater sequentiality | |
42 of code, better code to optimize for further passes and in some cases | |
43 by fewer testings of exit conditions. The main problem is code growth, | |
44 that impacts performance negatively due to effect of caches. | |
45 | |
46 What we do: | |
47 | |
48 -- complete peeling of once-rolling loops; this is the above mentioned | |
49 exception, as this causes loop to be cancelled completely and | |
50 does not cause code growth | |
51 -- complete peeling of loops that roll (small) constant times. | |
52 -- simple peeling of first iterations of loops that do not roll much | |
53 (according to profile feedback) | |
54 -- unrolling of loops that roll constant times; this is almost always | |
55 win, as we get rid of exit condition tests. | |
56 -- unrolling of loops that roll number of times that we can compute | |
57 in runtime; we also get rid of exit condition tests here, but there | |
58 is the extra expense for calculating the number of iterations | |
59 -- simple unrolling of remaining loops; this is performed only if we | |
60 are asked to, as the gain is questionable in this case and often | |
61 it may even slow down the code | |
62 For more detailed descriptions of each of those, see comments at | |
63 appropriate function below. | |
64 | |
65 There is a lot of parameters (defined and described in params.def) that | |
66 control how much we unroll/peel. | |
67 | |
68 ??? A great problem is that we don't have a good way how to determine | |
69 how many times we should unroll the loop; the experiments I have made | |
70 showed that this choice may affect performance in order of several %. | |
71 */ | |
72 | |
73 /* Information about induction variables to split. */ | |
74 | |
75 struct iv_to_split | |
76 { | |
77 rtx insn; /* The insn in that the induction variable occurs. */ | |
78 rtx base_var; /* The variable on that the values in the further | |
79 iterations are based. */ | |
80 rtx step; /* Step of the induction variable. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
81 struct iv_to_split *next; /* Next entry in walking order. */ |
0 | 82 unsigned n_loc; |
83 unsigned loc[3]; /* Location where the definition of the induction | |
84 variable occurs in the insn. For example if | |
85 N_LOC is 2, the expression is located at | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
86 XEXP (XEXP (single_set, loc[0]), loc[1]). */ |
0 | 87 }; |
88 | |
89 /* Information about accumulators to expand. */ | |
90 | |
91 struct var_to_expand | |
92 { | |
93 rtx insn; /* The insn in that the variable expansion occurs. */ | |
94 rtx reg; /* The accumulator which is expanded. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
95 VEC(rtx,heap) *var_expansions; /* The copies of the accumulator which is expanded. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
96 struct var_to_expand *next; /* Next entry in walking order. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
97 enum rtx_code op; /* The type of the accumulation - addition, subtraction |
0 | 98 or multiplication. */ |
99 int expansion_count; /* Count the number of expansions generated so far. */ | |
100 int reuse_expansion; /* The expansion we intend to reuse to expand | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
101 the accumulator. If REUSE_EXPANSION is 0 reuse |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
102 the original accumulator. Else use |
0 | 103 var_expansions[REUSE_EXPANSION - 1]. */ |
104 unsigned accum_pos; /* The position in which the accumulator is placed in | |
105 the insn src. For example in x = x + something | |
106 accum_pos is 0 while in x = something + x accum_pos | |
107 is 1. */ | |
108 }; | |
109 | |
110 /* Information about optimization applied in | |
111 the unrolled loop. */ | |
112 | |
113 struct opt_info | |
114 { | |
115 htab_t insns_to_split; /* A hashtable of insns to split. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
116 struct iv_to_split *iv_to_split_head; /* The first iv to split. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
117 struct iv_to_split **iv_to_split_tail; /* Pointer to the tail of the list. */ |
0 | 118 htab_t insns_with_var_to_expand; /* A hashtable of insns with accumulators |
119 to expand. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
120 struct var_to_expand *var_to_expand_head; /* The first var to expand. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
121 struct var_to_expand **var_to_expand_tail; /* Pointer to the tail of the list. */ |
0 | 122 unsigned first_new_block; /* The first basic block that was |
123 duplicated. */ | |
124 basic_block loop_exit; /* The loop exit basic block. */ | |
125 basic_block loop_preheader; /* The loop preheader basic block. */ | |
126 }; | |
127 | |
128 static void decide_unrolling_and_peeling (int); | |
129 static void peel_loops_completely (int); | |
130 static void decide_peel_simple (struct loop *, int); | |
131 static void decide_peel_once_rolling (struct loop *, int); | |
132 static void decide_peel_completely (struct loop *, int); | |
133 static void decide_unroll_stupid (struct loop *, int); | |
134 static void decide_unroll_constant_iterations (struct loop *, int); | |
135 static void decide_unroll_runtime_iterations (struct loop *, int); | |
136 static void peel_loop_simple (struct loop *); | |
137 static void peel_loop_completely (struct loop *); | |
138 static void unroll_loop_stupid (struct loop *); | |
139 static void unroll_loop_constant_iterations (struct loop *); | |
140 static void unroll_loop_runtime_iterations (struct loop *); | |
141 static struct opt_info *analyze_insns_in_loop (struct loop *); | |
142 static void opt_info_start_duplication (struct opt_info *); | |
143 static void apply_opt_in_copies (struct opt_info *, unsigned, bool, bool); | |
144 static void free_opt_info (struct opt_info *); | |
145 static struct var_to_expand *analyze_insn_to_expand_var (struct loop*, rtx); | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
146 static bool referenced_in_one_insn_in_loop_p (struct loop *, rtx, int *); |
0 | 147 static struct iv_to_split *analyze_iv_to_split_insn (rtx); |
148 static void expand_var_during_unrolling (struct var_to_expand *, rtx); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
149 static void insert_var_expansion_initialization (struct var_to_expand *, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
150 basic_block); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
151 static void combine_var_copies_in_loop_exit (struct var_to_expand *, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
152 basic_block); |
0 | 153 static rtx get_expansion (struct var_to_expand *); |
154 | |
155 /* Unroll and/or peel (depending on FLAGS) LOOPS. */ | |
156 void | |
157 unroll_and_peel_loops (int flags) | |
158 { | |
159 struct loop *loop; | |
160 bool check; | |
161 loop_iterator li; | |
162 | |
163 /* First perform complete loop peeling (it is almost surely a win, | |
164 and affects parameters for further decision a lot). */ | |
165 peel_loops_completely (flags); | |
166 | |
167 /* Now decide rest of unrolling and peeling. */ | |
168 decide_unrolling_and_peeling (flags); | |
169 | |
170 /* Scan the loops, inner ones first. */ | |
171 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST) | |
172 { | |
173 check = true; | |
174 /* And perform the appropriate transformations. */ | |
175 switch (loop->lpt_decision.decision) | |
176 { | |
177 case LPT_PEEL_COMPLETELY: | |
178 /* Already done. */ | |
179 gcc_unreachable (); | |
180 case LPT_PEEL_SIMPLE: | |
181 peel_loop_simple (loop); | |
182 break; | |
183 case LPT_UNROLL_CONSTANT: | |
184 unroll_loop_constant_iterations (loop); | |
185 break; | |
186 case LPT_UNROLL_RUNTIME: | |
187 unroll_loop_runtime_iterations (loop); | |
188 break; | |
189 case LPT_UNROLL_STUPID: | |
190 unroll_loop_stupid (loop); | |
191 break; | |
192 case LPT_NONE: | |
193 check = false; | |
194 break; | |
195 default: | |
196 gcc_unreachable (); | |
197 } | |
198 if (check) | |
199 { | |
200 #ifdef ENABLE_CHECKING | |
201 verify_dominators (CDI_DOMINATORS); | |
202 verify_loop_structure (); | |
203 #endif | |
204 } | |
205 } | |
206 | |
207 iv_analysis_done (); | |
208 } | |
209 | |
210 /* Check whether exit of the LOOP is at the end of loop body. */ | |
211 | |
212 static bool | |
213 loop_exit_at_end_p (struct loop *loop) | |
214 { | |
215 struct niter_desc *desc = get_simple_loop_desc (loop); | |
216 rtx insn; | |
217 | |
218 if (desc->in_edge->dest != loop->latch) | |
219 return false; | |
220 | |
221 /* Check that the latch is empty. */ | |
222 FOR_BB_INSNS (loop->latch, insn) | |
223 { | |
224 if (INSN_P (insn)) | |
225 return false; | |
226 } | |
227 | |
228 return true; | |
229 } | |
230 | |
231 /* Depending on FLAGS, check whether to peel loops completely and do so. */ | |
232 static void | |
233 peel_loops_completely (int flags) | |
234 { | |
235 struct loop *loop; | |
236 loop_iterator li; | |
237 | |
238 /* Scan the loops, the inner ones first. */ | |
239 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST) | |
240 { | |
241 loop->lpt_decision.decision = LPT_NONE; | |
242 | |
243 if (dump_file) | |
244 fprintf (dump_file, | |
245 "\n;; *** Considering loop %d for complete peeling ***\n", | |
246 loop->num); | |
247 | |
248 loop->ninsns = num_loop_insns (loop); | |
249 | |
250 decide_peel_once_rolling (loop, flags); | |
251 if (loop->lpt_decision.decision == LPT_NONE) | |
252 decide_peel_completely (loop, flags); | |
253 | |
254 if (loop->lpt_decision.decision == LPT_PEEL_COMPLETELY) | |
255 { | |
256 peel_loop_completely (loop); | |
257 #ifdef ENABLE_CHECKING | |
258 verify_dominators (CDI_DOMINATORS); | |
259 verify_loop_structure (); | |
260 #endif | |
261 } | |
262 } | |
263 } | |
264 | |
265 /* Decide whether unroll or peel loops (depending on FLAGS) and how much. */ | |
266 static void | |
267 decide_unrolling_and_peeling (int flags) | |
268 { | |
269 struct loop *loop; | |
270 loop_iterator li; | |
271 | |
272 /* Scan the loops, inner ones first. */ | |
273 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST) | |
274 { | |
275 loop->lpt_decision.decision = LPT_NONE; | |
276 | |
277 if (dump_file) | |
278 fprintf (dump_file, "\n;; *** Considering loop %d ***\n", loop->num); | |
279 | |
280 /* Do not peel cold areas. */ | |
281 if (optimize_loop_for_size_p (loop)) | |
282 { | |
283 if (dump_file) | |
284 fprintf (dump_file, ";; Not considering loop, cold area\n"); | |
285 continue; | |
286 } | |
287 | |
288 /* Can the loop be manipulated? */ | |
289 if (!can_duplicate_loop_p (loop)) | |
290 { | |
291 if (dump_file) | |
292 fprintf (dump_file, | |
293 ";; Not considering loop, cannot duplicate\n"); | |
294 continue; | |
295 } | |
296 | |
297 /* Skip non-innermost loops. */ | |
298 if (loop->inner) | |
299 { | |
300 if (dump_file) | |
301 fprintf (dump_file, ";; Not considering loop, is not innermost\n"); | |
302 continue; | |
303 } | |
304 | |
305 loop->ninsns = num_loop_insns (loop); | |
306 loop->av_ninsns = average_num_loop_insns (loop); | |
307 | |
308 /* Try transformations one by one in decreasing order of | |
309 priority. */ | |
310 | |
311 decide_unroll_constant_iterations (loop, flags); | |
312 if (loop->lpt_decision.decision == LPT_NONE) | |
313 decide_unroll_runtime_iterations (loop, flags); | |
314 if (loop->lpt_decision.decision == LPT_NONE) | |
315 decide_unroll_stupid (loop, flags); | |
316 if (loop->lpt_decision.decision == LPT_NONE) | |
317 decide_peel_simple (loop, flags); | |
318 } | |
319 } | |
320 | |
321 /* Decide whether the LOOP is once rolling and suitable for complete | |
322 peeling. */ | |
323 static void | |
324 decide_peel_once_rolling (struct loop *loop, int flags ATTRIBUTE_UNUSED) | |
325 { | |
326 struct niter_desc *desc; | |
327 | |
328 if (dump_file) | |
329 fprintf (dump_file, "\n;; Considering peeling once rolling loop\n"); | |
330 | |
331 /* Is the loop small enough? */ | |
332 if ((unsigned) PARAM_VALUE (PARAM_MAX_ONCE_PEELED_INSNS) < loop->ninsns) | |
333 { | |
334 if (dump_file) | |
335 fprintf (dump_file, ";; Not considering loop, is too big\n"); | |
336 return; | |
337 } | |
338 | |
339 /* Check for simple loops. */ | |
340 desc = get_simple_loop_desc (loop); | |
341 | |
342 /* Check number of iterations. */ | |
343 if (!desc->simple_p | |
344 || desc->assumptions | |
345 || desc->infinite | |
346 || !desc->const_iter | |
347 || desc->niter != 0) | |
348 { | |
349 if (dump_file) | |
350 fprintf (dump_file, | |
351 ";; Unable to prove that the loop rolls exactly once\n"); | |
352 return; | |
353 } | |
354 | |
355 /* Success. */ | |
356 if (dump_file) | |
357 fprintf (dump_file, ";; Decided to peel exactly once rolling loop\n"); | |
358 loop->lpt_decision.decision = LPT_PEEL_COMPLETELY; | |
359 } | |
360 | |
361 /* Decide whether the LOOP is suitable for complete peeling. */ | |
362 static void | |
363 decide_peel_completely (struct loop *loop, int flags ATTRIBUTE_UNUSED) | |
364 { | |
365 unsigned npeel; | |
366 struct niter_desc *desc; | |
367 | |
368 if (dump_file) | |
369 fprintf (dump_file, "\n;; Considering peeling completely\n"); | |
370 | |
371 /* Skip non-innermost loops. */ | |
372 if (loop->inner) | |
373 { | |
374 if (dump_file) | |
375 fprintf (dump_file, ";; Not considering loop, is not innermost\n"); | |
376 return; | |
377 } | |
378 | |
379 /* Do not peel cold areas. */ | |
380 if (optimize_loop_for_size_p (loop)) | |
381 { | |
382 if (dump_file) | |
383 fprintf (dump_file, ";; Not considering loop, cold area\n"); | |
384 return; | |
385 } | |
386 | |
387 /* Can the loop be manipulated? */ | |
388 if (!can_duplicate_loop_p (loop)) | |
389 { | |
390 if (dump_file) | |
391 fprintf (dump_file, | |
392 ";; Not considering loop, cannot duplicate\n"); | |
393 return; | |
394 } | |
395 | |
396 /* npeel = number of iterations to peel. */ | |
397 npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEELED_INSNS) / loop->ninsns; | |
398 if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES)) | |
399 npeel = PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES); | |
400 | |
401 /* Is the loop small enough? */ | |
402 if (!npeel) | |
403 { | |
404 if (dump_file) | |
405 fprintf (dump_file, ";; Not considering loop, is too big\n"); | |
406 return; | |
407 } | |
408 | |
409 /* Check for simple loops. */ | |
410 desc = get_simple_loop_desc (loop); | |
411 | |
412 /* Check number of iterations. */ | |
413 if (!desc->simple_p | |
414 || desc->assumptions | |
415 || !desc->const_iter | |
416 || desc->infinite) | |
417 { | |
418 if (dump_file) | |
419 fprintf (dump_file, | |
420 ";; Unable to prove that the loop iterates constant times\n"); | |
421 return; | |
422 } | |
423 | |
424 if (desc->niter > npeel - 1) | |
425 { | |
426 if (dump_file) | |
427 { | |
428 fprintf (dump_file, | |
429 ";; Not peeling loop completely, rolls too much ("); | |
430 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, desc->niter); | |
431 fprintf (dump_file, " iterations > %d [maximum peelings])\n", npeel); | |
432 } | |
433 return; | |
434 } | |
435 | |
436 /* Success. */ | |
437 if (dump_file) | |
438 fprintf (dump_file, ";; Decided to peel loop completely\n"); | |
439 loop->lpt_decision.decision = LPT_PEEL_COMPLETELY; | |
440 } | |
441 | |
442 /* Peel all iterations of LOOP, remove exit edges and cancel the loop | |
443 completely. The transformation done: | |
444 | |
445 for (i = 0; i < 4; i++) | |
446 body; | |
447 | |
448 ==> | |
449 | |
450 i = 0; | |
451 body; i++; | |
452 body; i++; | |
453 body; i++; | |
454 body; i++; | |
455 */ | |
456 static void | |
457 peel_loop_completely (struct loop *loop) | |
458 { | |
459 sbitmap wont_exit; | |
460 unsigned HOST_WIDE_INT npeel; | |
461 unsigned i; | |
462 VEC (edge, heap) *remove_edges; | |
463 edge ein; | |
464 struct niter_desc *desc = get_simple_loop_desc (loop); | |
465 struct opt_info *opt_info = NULL; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
466 |
0 | 467 npeel = desc->niter; |
468 | |
469 if (npeel) | |
470 { | |
471 bool ok; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
472 |
0 | 473 wont_exit = sbitmap_alloc (npeel + 1); |
474 sbitmap_ones (wont_exit); | |
475 RESET_BIT (wont_exit, 0); | |
476 if (desc->noloop_assumptions) | |
477 RESET_BIT (wont_exit, 1); | |
478 | |
479 remove_edges = NULL; | |
480 | |
481 if (flag_split_ivs_in_unroller) | |
482 opt_info = analyze_insns_in_loop (loop); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
483 |
0 | 484 opt_info_start_duplication (opt_info); |
485 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), | |
486 npeel, | |
487 wont_exit, desc->out_edge, | |
488 &remove_edges, | |
489 DLTHE_FLAG_UPDATE_FREQ | |
490 | DLTHE_FLAG_COMPLETTE_PEEL | |
491 | (opt_info | |
492 ? DLTHE_RECORD_COPY_NUMBER : 0)); | |
493 gcc_assert (ok); | |
494 | |
495 free (wont_exit); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
496 |
0 | 497 if (opt_info) |
498 { | |
499 apply_opt_in_copies (opt_info, npeel, false, true); | |
500 free_opt_info (opt_info); | |
501 } | |
502 | |
503 /* Remove the exit edges. */ | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
504 FOR_EACH_VEC_ELT (edge, remove_edges, i, ein) |
0 | 505 remove_path (ein); |
506 VEC_free (edge, heap, remove_edges); | |
507 } | |
508 | |
509 ein = desc->in_edge; | |
510 free_simple_loop_desc (loop); | |
511 | |
512 /* Now remove the unreachable part of the last iteration and cancel | |
513 the loop. */ | |
514 remove_path (ein); | |
515 | |
516 if (dump_file) | |
517 fprintf (dump_file, ";; Peeled loop completely, %d times\n", (int) npeel); | |
518 } | |
519 | |
520 /* Decide whether to unroll LOOP iterating constant number of times | |
521 and how much. */ | |
522 | |
523 static void | |
524 decide_unroll_constant_iterations (struct loop *loop, int flags) | |
525 { | |
526 unsigned nunroll, nunroll_by_av, best_copies, best_unroll = 0, n_copies, i; | |
527 struct niter_desc *desc; | |
528 | |
529 if (!(flags & UAP_UNROLL)) | |
530 { | |
531 /* We were not asked to, just return back silently. */ | |
532 return; | |
533 } | |
534 | |
535 if (dump_file) | |
536 fprintf (dump_file, | |
537 "\n;; Considering unrolling loop with constant " | |
538 "number of iterations\n"); | |
539 | |
540 /* nunroll = total number of copies of the original loop body in | |
541 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */ | |
542 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns; | |
543 nunroll_by_av | |
544 = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns; | |
545 if (nunroll > nunroll_by_av) | |
546 nunroll = nunroll_by_av; | |
547 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES)) | |
548 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES); | |
549 | |
550 /* Skip big loops. */ | |
551 if (nunroll <= 1) | |
552 { | |
553 if (dump_file) | |
554 fprintf (dump_file, ";; Not considering loop, is too big\n"); | |
555 return; | |
556 } | |
557 | |
558 /* Check for simple loops. */ | |
559 desc = get_simple_loop_desc (loop); | |
560 | |
561 /* Check number of iterations. */ | |
562 if (!desc->simple_p || !desc->const_iter || desc->assumptions) | |
563 { | |
564 if (dump_file) | |
565 fprintf (dump_file, | |
566 ";; Unable to prove that the loop iterates constant times\n"); | |
567 return; | |
568 } | |
569 | |
570 /* Check whether the loop rolls enough to consider. */ | |
571 if (desc->niter < 2 * nunroll) | |
572 { | |
573 if (dump_file) | |
574 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n"); | |
575 return; | |
576 } | |
577 | |
578 /* Success; now compute number of iterations to unroll. We alter | |
579 nunroll so that as few as possible copies of loop body are | |
580 necessary, while still not decreasing the number of unrollings | |
581 too much (at most by 1). */ | |
582 best_copies = 2 * nunroll + 10; | |
583 | |
584 i = 2 * nunroll + 2; | |
585 if (i - 1 >= desc->niter) | |
586 i = desc->niter - 2; | |
587 | |
588 for (; i >= nunroll - 1; i--) | |
589 { | |
590 unsigned exit_mod = desc->niter % (i + 1); | |
591 | |
592 if (!loop_exit_at_end_p (loop)) | |
593 n_copies = exit_mod + i + 1; | |
594 else if (exit_mod != (unsigned) i | |
595 || desc->noloop_assumptions != NULL_RTX) | |
596 n_copies = exit_mod + i + 2; | |
597 else | |
598 n_copies = i + 1; | |
599 | |
600 if (n_copies < best_copies) | |
601 { | |
602 best_copies = n_copies; | |
603 best_unroll = i; | |
604 } | |
605 } | |
606 | |
607 if (dump_file) | |
608 fprintf (dump_file, ";; max_unroll %d (%d copies, initial %d).\n", | |
609 best_unroll + 1, best_copies, nunroll); | |
610 | |
611 loop->lpt_decision.decision = LPT_UNROLL_CONSTANT; | |
612 loop->lpt_decision.times = best_unroll; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
613 |
0 | 614 if (dump_file) |
615 fprintf (dump_file, | |
616 ";; Decided to unroll the constant times rolling loop, %d times.\n", | |
617 loop->lpt_decision.times); | |
618 } | |
619 | |
620 /* Unroll LOOP with constant number of iterations LOOP->LPT_DECISION.TIMES + 1 | |
621 times. The transformation does this: | |
622 | |
623 for (i = 0; i < 102; i++) | |
624 body; | |
625 | |
626 ==> | |
627 | |
628 i = 0; | |
629 body; i++; | |
630 body; i++; | |
631 while (i < 102) | |
632 { | |
633 body; i++; | |
634 body; i++; | |
635 body; i++; | |
636 body; i++; | |
637 } | |
638 */ | |
639 static void | |
640 unroll_loop_constant_iterations (struct loop *loop) | |
641 { | |
642 unsigned HOST_WIDE_INT niter; | |
643 unsigned exit_mod; | |
644 sbitmap wont_exit; | |
645 unsigned i; | |
646 VEC (edge, heap) *remove_edges; | |
647 edge e; | |
648 unsigned max_unroll = loop->lpt_decision.times; | |
649 struct niter_desc *desc = get_simple_loop_desc (loop); | |
650 bool exit_at_end = loop_exit_at_end_p (loop); | |
651 struct opt_info *opt_info = NULL; | |
652 bool ok; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
653 |
0 | 654 niter = desc->niter; |
655 | |
656 /* Should not get here (such loop should be peeled instead). */ | |
657 gcc_assert (niter > max_unroll + 1); | |
658 | |
659 exit_mod = niter % (max_unroll + 1); | |
660 | |
661 wont_exit = sbitmap_alloc (max_unroll + 1); | |
662 sbitmap_ones (wont_exit); | |
663 | |
664 remove_edges = NULL; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
665 if (flag_split_ivs_in_unroller |
0 | 666 || flag_variable_expansion_in_unroller) |
667 opt_info = analyze_insns_in_loop (loop); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
668 |
0 | 669 if (!exit_at_end) |
670 { | |
671 /* The exit is not at the end of the loop; leave exit test | |
672 in the first copy, so that the loops that start with test | |
673 of exit condition have continuous body after unrolling. */ | |
674 | |
675 if (dump_file) | |
676 fprintf (dump_file, ";; Condition on beginning of loop.\n"); | |
677 | |
678 /* Peel exit_mod iterations. */ | |
679 RESET_BIT (wont_exit, 0); | |
680 if (desc->noloop_assumptions) | |
681 RESET_BIT (wont_exit, 1); | |
682 | |
683 if (exit_mod) | |
684 { | |
685 opt_info_start_duplication (opt_info); | |
686 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), | |
687 exit_mod, | |
688 wont_exit, desc->out_edge, | |
689 &remove_edges, | |
690 DLTHE_FLAG_UPDATE_FREQ | |
691 | (opt_info && exit_mod > 1 | |
692 ? DLTHE_RECORD_COPY_NUMBER | |
693 : 0)); | |
694 gcc_assert (ok); | |
695 | |
696 if (opt_info && exit_mod > 1) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
697 apply_opt_in_copies (opt_info, exit_mod, false, false); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
698 |
0 | 699 desc->noloop_assumptions = NULL_RTX; |
700 desc->niter -= exit_mod; | |
701 desc->niter_max -= exit_mod; | |
702 } | |
703 | |
704 SET_BIT (wont_exit, 1); | |
705 } | |
706 else | |
707 { | |
708 /* Leave exit test in last copy, for the same reason as above if | |
709 the loop tests the condition at the end of loop body. */ | |
710 | |
711 if (dump_file) | |
712 fprintf (dump_file, ";; Condition on end of loop.\n"); | |
713 | |
714 /* We know that niter >= max_unroll + 2; so we do not need to care of | |
715 case when we would exit before reaching the loop. So just peel | |
716 exit_mod + 1 iterations. */ | |
717 if (exit_mod != max_unroll | |
718 || desc->noloop_assumptions) | |
719 { | |
720 RESET_BIT (wont_exit, 0); | |
721 if (desc->noloop_assumptions) | |
722 RESET_BIT (wont_exit, 1); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
723 |
0 | 724 opt_info_start_duplication (opt_info); |
725 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), | |
726 exit_mod + 1, | |
727 wont_exit, desc->out_edge, | |
728 &remove_edges, | |
729 DLTHE_FLAG_UPDATE_FREQ | |
730 | (opt_info && exit_mod > 0 | |
731 ? DLTHE_RECORD_COPY_NUMBER | |
732 : 0)); | |
733 gcc_assert (ok); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
734 |
0 | 735 if (opt_info && exit_mod > 0) |
736 apply_opt_in_copies (opt_info, exit_mod + 1, false, false); | |
737 | |
738 desc->niter -= exit_mod + 1; | |
739 desc->niter_max -= exit_mod + 1; | |
740 desc->noloop_assumptions = NULL_RTX; | |
741 | |
742 SET_BIT (wont_exit, 0); | |
743 SET_BIT (wont_exit, 1); | |
744 } | |
745 | |
746 RESET_BIT (wont_exit, max_unroll); | |
747 } | |
748 | |
749 /* Now unroll the loop. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
750 |
0 | 751 opt_info_start_duplication (opt_info); |
752 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop), | |
753 max_unroll, | |
754 wont_exit, desc->out_edge, | |
755 &remove_edges, | |
756 DLTHE_FLAG_UPDATE_FREQ | |
757 | (opt_info | |
758 ? DLTHE_RECORD_COPY_NUMBER | |
759 : 0)); | |
760 gcc_assert (ok); | |
761 | |
762 if (opt_info) | |
763 { | |
764 apply_opt_in_copies (opt_info, max_unroll, true, true); | |
765 free_opt_info (opt_info); | |
766 } | |
767 | |
768 free (wont_exit); | |
769 | |
770 if (exit_at_end) | |
771 { | |
772 basic_block exit_block = get_bb_copy (desc->in_edge->src); | |
773 /* Find a new in and out edge; they are in the last copy we have made. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
774 |
0 | 775 if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest) |
776 { | |
777 desc->out_edge = EDGE_SUCC (exit_block, 0); | |
778 desc->in_edge = EDGE_SUCC (exit_block, 1); | |
779 } | |
780 else | |
781 { | |
782 desc->out_edge = EDGE_SUCC (exit_block, 1); | |
783 desc->in_edge = EDGE_SUCC (exit_block, 0); | |
784 } | |
785 } | |
786 | |
787 desc->niter /= max_unroll + 1; | |
788 desc->niter_max /= max_unroll + 1; | |
789 desc->niter_expr = GEN_INT (desc->niter); | |
790 | |
791 /* Remove the edges. */ | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
792 FOR_EACH_VEC_ELT (edge, remove_edges, i, e) |
0 | 793 remove_path (e); |
794 VEC_free (edge, heap, remove_edges); | |
795 | |
796 if (dump_file) | |
797 fprintf (dump_file, | |
798 ";; Unrolled loop %d times, constant # of iterations %i insns\n", | |
799 max_unroll, num_loop_insns (loop)); | |
800 } | |
801 | |
802 /* Decide whether to unroll LOOP iterating runtime computable number of times | |
803 and how much. */ | |
804 static void | |
805 decide_unroll_runtime_iterations (struct loop *loop, int flags) | |
806 { | |
807 unsigned nunroll, nunroll_by_av, i; | |
808 struct niter_desc *desc; | |
809 | |
810 if (!(flags & UAP_UNROLL)) | |
811 { | |
812 /* We were not asked to, just return back silently. */ | |
813 return; | |
814 } | |
815 | |
816 if (dump_file) | |
817 fprintf (dump_file, | |
818 "\n;; Considering unrolling loop with runtime " | |
819 "computable number of iterations\n"); | |
820 | |
821 /* nunroll = total number of copies of the original loop body in | |
822 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */ | |
823 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns; | |
824 nunroll_by_av = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns; | |
825 if (nunroll > nunroll_by_av) | |
826 nunroll = nunroll_by_av; | |
827 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES)) | |
828 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES); | |
829 | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
830 if (targetm.loop_unroll_adjust) |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
831 nunroll = targetm.loop_unroll_adjust (nunroll, loop); |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
832 |
0 | 833 /* Skip big loops. */ |
834 if (nunroll <= 1) | |
835 { | |
836 if (dump_file) | |
837 fprintf (dump_file, ";; Not considering loop, is too big\n"); | |
838 return; | |
839 } | |
840 | |
841 /* Check for simple loops. */ | |
842 desc = get_simple_loop_desc (loop); | |
843 | |
844 /* Check simpleness. */ | |
845 if (!desc->simple_p || desc->assumptions) | |
846 { | |
847 if (dump_file) | |
848 fprintf (dump_file, | |
849 ";; Unable to prove that the number of iterations " | |
850 "can be counted in runtime\n"); | |
851 return; | |
852 } | |
853 | |
854 if (desc->const_iter) | |
855 { | |
856 if (dump_file) | |
857 fprintf (dump_file, ";; Loop iterates constant times\n"); | |
858 return; | |
859 } | |
860 | |
861 /* If we have profile feedback, check whether the loop rolls. */ | |
862 if (loop->header->count && expected_loop_iterations (loop) < 2 * nunroll) | |
863 { | |
864 if (dump_file) | |
865 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n"); | |
866 return; | |
867 } | |
868 | |
869 /* Success; now force nunroll to be power of 2, as we are unable to | |
870 cope with overflows in computation of number of iterations. */ | |
871 for (i = 1; 2 * i <= nunroll; i *= 2) | |
872 continue; | |
873 | |
874 loop->lpt_decision.decision = LPT_UNROLL_RUNTIME; | |
875 loop->lpt_decision.times = i - 1; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
876 |
0 | 877 if (dump_file) |
878 fprintf (dump_file, | |
879 ";; Decided to unroll the runtime computable " | |
880 "times rolling loop, %d times.\n", | |
881 loop->lpt_decision.times); | |
882 } | |
883 | |
884 /* Splits edge E and inserts the sequence of instructions INSNS on it, and | |
885 returns the newly created block. If INSNS is NULL_RTX, nothing is changed | |
886 and NULL is returned instead. */ | |
887 | |
888 basic_block | |
889 split_edge_and_insert (edge e, rtx insns) | |
890 { | |
891 basic_block bb; | |
892 | |
893 if (!insns) | |
894 return NULL; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
895 bb = split_edge (e); |
0 | 896 emit_insn_after (insns, BB_END (bb)); |
897 | |
898 /* ??? We used to assume that INSNS can contain control flow insns, and | |
899 that we had to try to find sub basic blocks in BB to maintain a valid | |
900 CFG. For this purpose we used to set the BB_SUPERBLOCK flag on BB | |
901 and call break_superblocks when going out of cfglayout mode. But it | |
902 turns out that this never happens; and that if it does ever happen, | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
903 the TODO_verify_flow at the end of the RTL loop passes would fail. |
0 | 904 |
905 There are two reasons why we expected we could have control flow insns | |
906 in INSNS. The first is when a comparison has to be done in parts, and | |
907 the second is when the number of iterations is computed for loops with | |
908 the number of iterations known at runtime. In both cases, test cases | |
909 to get control flow in INSNS appear to be impossible to construct: | |
910 | |
911 * If do_compare_rtx_and_jump needs several branches to do comparison | |
912 in a mode that needs comparison by parts, we cannot analyze the | |
913 number of iterations of the loop, and we never get to unrolling it. | |
914 | |
915 * The code in expand_divmod that was suspected to cause creation of | |
916 branching code seems to be only accessed for signed division. The | |
917 divisions used by # of iterations analysis are always unsigned. | |
918 Problems might arise on architectures that emits branching code | |
919 for some operations that may appear in the unroller (especially | |
920 for division), but we have no such architectures. | |
921 | |
922 Considering all this, it was decided that we should for now assume | |
923 that INSNS can in theory contain control flow insns, but in practice | |
924 it never does. So we don't handle the theoretical case, and should | |
925 a real failure ever show up, we have a pretty good clue for how to | |
926 fix it. */ | |
927 | |
928 return bb; | |
929 } | |
930 | |
931 /* Unroll LOOP for that we are able to count number of iterations in runtime | |
932 LOOP->LPT_DECISION.TIMES + 1 times. The transformation does this (with some | |
933 extra care for case n < 0): | |
934 | |
935 for (i = 0; i < n; i++) | |
936 body; | |
937 | |
938 ==> | |
939 | |
940 i = 0; | |
941 mod = n % 4; | |
942 | |
943 switch (mod) | |
944 { | |
945 case 3: | |
946 body; i++; | |
947 case 2: | |
948 body; i++; | |
949 case 1: | |
950 body; i++; | |
951 case 0: ; | |
952 } | |
953 | |
954 while (i < n) | |
955 { | |
956 body; i++; | |
957 body; i++; | |
958 body; i++; | |
959 body; i++; | |
960 } | |
961 */ | |
962 static void | |
963 unroll_loop_runtime_iterations (struct loop *loop) | |
964 { | |
965 rtx old_niter, niter, init_code, branch_code, tmp; | |
966 unsigned i, j, p; | |
967 basic_block preheader, *body, swtch, ezc_swtch; | |
968 VEC (basic_block, heap) *dom_bbs; | |
969 sbitmap wont_exit; | |
970 int may_exit_copy; | |
971 unsigned n_peel; | |
972 VEC (edge, heap) *remove_edges; | |
973 edge e; | |
974 bool extra_zero_check, last_may_exit; | |
975 unsigned max_unroll = loop->lpt_decision.times; | |
976 struct niter_desc *desc = get_simple_loop_desc (loop); | |
977 bool exit_at_end = loop_exit_at_end_p (loop); | |
978 struct opt_info *opt_info = NULL; | |
979 bool ok; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
980 |
0 | 981 if (flag_split_ivs_in_unroller |
982 || flag_variable_expansion_in_unroller) | |
983 opt_info = analyze_insns_in_loop (loop); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
984 |
0 | 985 /* Remember blocks whose dominators will have to be updated. */ |
986 dom_bbs = NULL; | |
987 | |
988 body = get_loop_body (loop); | |
989 for (i = 0; i < loop->num_nodes; i++) | |
990 { | |
991 VEC (basic_block, heap) *ldom; | |
992 basic_block bb; | |
993 | |
994 ldom = get_dominated_by (CDI_DOMINATORS, body[i]); | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
995 FOR_EACH_VEC_ELT (basic_block, ldom, j, bb) |
0 | 996 if (!flow_bb_inside_loop_p (loop, bb)) |
997 VEC_safe_push (basic_block, heap, dom_bbs, bb); | |
998 | |
999 VEC_free (basic_block, heap, ldom); | |
1000 } | |
1001 free (body); | |
1002 | |
1003 if (!exit_at_end) | |
1004 { | |
1005 /* Leave exit in first copy (for explanation why see comment in | |
1006 unroll_loop_constant_iterations). */ | |
1007 may_exit_copy = 0; | |
1008 n_peel = max_unroll - 1; | |
1009 extra_zero_check = true; | |
1010 last_may_exit = false; | |
1011 } | |
1012 else | |
1013 { | |
1014 /* Leave exit in last copy (for explanation why see comment in | |
1015 unroll_loop_constant_iterations). */ | |
1016 may_exit_copy = max_unroll; | |
1017 n_peel = max_unroll; | |
1018 extra_zero_check = false; | |
1019 last_may_exit = true; | |
1020 } | |
1021 | |
1022 /* Get expression for number of iterations. */ | |
1023 start_sequence (); | |
1024 old_niter = niter = gen_reg_rtx (desc->mode); | |
1025 tmp = force_operand (copy_rtx (desc->niter_expr), niter); | |
1026 if (tmp != niter) | |
1027 emit_move_insn (niter, tmp); | |
1028 | |
1029 /* Count modulo by ANDing it with max_unroll; we use the fact that | |
1030 the number of unrollings is a power of two, and thus this is correct | |
1031 even if there is overflow in the computation. */ | |
1032 niter = expand_simple_binop (desc->mode, AND, | |
1033 niter, | |
1034 GEN_INT (max_unroll), | |
1035 NULL_RTX, 0, OPTAB_LIB_WIDEN); | |
1036 | |
1037 init_code = get_insns (); | |
1038 end_sequence (); | |
1039 unshare_all_rtl_in_chain (init_code); | |
1040 | |
1041 /* Precondition the loop. */ | |
1042 split_edge_and_insert (loop_preheader_edge (loop), init_code); | |
1043 | |
1044 remove_edges = NULL; | |
1045 | |
1046 wont_exit = sbitmap_alloc (max_unroll + 2); | |
1047 | |
1048 /* Peel the first copy of loop body (almost always we must leave exit test | |
1049 here; the only exception is when we have extra zero check and the number | |
1050 of iterations is reliable. Also record the place of (possible) extra | |
1051 zero check. */ | |
1052 sbitmap_zero (wont_exit); | |
1053 if (extra_zero_check | |
1054 && !desc->noloop_assumptions) | |
1055 SET_BIT (wont_exit, 1); | |
1056 ezc_swtch = loop_preheader_edge (loop)->src; | |
1057 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), | |
1058 1, wont_exit, desc->out_edge, | |
1059 &remove_edges, | |
1060 DLTHE_FLAG_UPDATE_FREQ); | |
1061 gcc_assert (ok); | |
1062 | |
1063 /* Record the place where switch will be built for preconditioning. */ | |
1064 swtch = split_edge (loop_preheader_edge (loop)); | |
1065 | |
1066 for (i = 0; i < n_peel; i++) | |
1067 { | |
1068 /* Peel the copy. */ | |
1069 sbitmap_zero (wont_exit); | |
1070 if (i != n_peel - 1 || !last_may_exit) | |
1071 SET_BIT (wont_exit, 1); | |
1072 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), | |
1073 1, wont_exit, desc->out_edge, | |
1074 &remove_edges, | |
1075 DLTHE_FLAG_UPDATE_FREQ); | |
1076 gcc_assert (ok); | |
1077 | |
1078 /* Create item for switch. */ | |
1079 j = n_peel - i - (extra_zero_check ? 0 : 1); | |
1080 p = REG_BR_PROB_BASE / (i + 2); | |
1081 | |
1082 preheader = split_edge (loop_preheader_edge (loop)); | |
1083 branch_code = compare_and_jump_seq (copy_rtx (niter), GEN_INT (j), EQ, | |
1084 block_label (preheader), p, | |
1085 NULL_RTX); | |
1086 | |
1087 /* We rely on the fact that the compare and jump cannot be optimized out, | |
1088 and hence the cfg we create is correct. */ | |
1089 gcc_assert (branch_code != NULL_RTX); | |
1090 | |
1091 swtch = split_edge_and_insert (single_pred_edge (swtch), branch_code); | |
1092 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch); | |
1093 single_pred_edge (swtch)->probability = REG_BR_PROB_BASE - p; | |
1094 e = make_edge (swtch, preheader, | |
1095 single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP); | |
1096 e->probability = p; | |
1097 } | |
1098 | |
1099 if (extra_zero_check) | |
1100 { | |
1101 /* Add branch for zero iterations. */ | |
1102 p = REG_BR_PROB_BASE / (max_unroll + 1); | |
1103 swtch = ezc_swtch; | |
1104 preheader = split_edge (loop_preheader_edge (loop)); | |
1105 branch_code = compare_and_jump_seq (copy_rtx (niter), const0_rtx, EQ, | |
1106 block_label (preheader), p, | |
1107 NULL_RTX); | |
1108 gcc_assert (branch_code != NULL_RTX); | |
1109 | |
1110 swtch = split_edge_and_insert (single_succ_edge (swtch), branch_code); | |
1111 set_immediate_dominator (CDI_DOMINATORS, preheader, swtch); | |
1112 single_succ_edge (swtch)->probability = REG_BR_PROB_BASE - p; | |
1113 e = make_edge (swtch, preheader, | |
1114 single_succ_edge (swtch)->flags & EDGE_IRREDUCIBLE_LOOP); | |
1115 e->probability = p; | |
1116 } | |
1117 | |
1118 /* Recount dominators for outer blocks. */ | |
1119 iterate_fix_dominators (CDI_DOMINATORS, dom_bbs, false); | |
1120 | |
1121 /* And unroll loop. */ | |
1122 | |
1123 sbitmap_ones (wont_exit); | |
1124 RESET_BIT (wont_exit, may_exit_copy); | |
1125 opt_info_start_duplication (opt_info); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1126 |
0 | 1127 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop), |
1128 max_unroll, | |
1129 wont_exit, desc->out_edge, | |
1130 &remove_edges, | |
1131 DLTHE_FLAG_UPDATE_FREQ | |
1132 | (opt_info | |
1133 ? DLTHE_RECORD_COPY_NUMBER | |
1134 : 0)); | |
1135 gcc_assert (ok); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1136 |
0 | 1137 if (opt_info) |
1138 { | |
1139 apply_opt_in_copies (opt_info, max_unroll, true, true); | |
1140 free_opt_info (opt_info); | |
1141 } | |
1142 | |
1143 free (wont_exit); | |
1144 | |
1145 if (exit_at_end) | |
1146 { | |
1147 basic_block exit_block = get_bb_copy (desc->in_edge->src); | |
1148 /* Find a new in and out edge; they are in the last copy we have | |
1149 made. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1150 |
0 | 1151 if (EDGE_SUCC (exit_block, 0)->dest == desc->out_edge->dest) |
1152 { | |
1153 desc->out_edge = EDGE_SUCC (exit_block, 0); | |
1154 desc->in_edge = EDGE_SUCC (exit_block, 1); | |
1155 } | |
1156 else | |
1157 { | |
1158 desc->out_edge = EDGE_SUCC (exit_block, 1); | |
1159 desc->in_edge = EDGE_SUCC (exit_block, 0); | |
1160 } | |
1161 } | |
1162 | |
1163 /* Remove the edges. */ | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1164 FOR_EACH_VEC_ELT (edge, remove_edges, i, e) |
0 | 1165 remove_path (e); |
1166 VEC_free (edge, heap, remove_edges); | |
1167 | |
1168 /* We must be careful when updating the number of iterations due to | |
1169 preconditioning and the fact that the value must be valid at entry | |
1170 of the loop. After passing through the above code, we see that | |
1171 the correct new number of iterations is this: */ | |
1172 gcc_assert (!desc->const_iter); | |
1173 desc->niter_expr = | |
1174 simplify_gen_binary (UDIV, desc->mode, old_niter, | |
1175 GEN_INT (max_unroll + 1)); | |
1176 desc->niter_max /= max_unroll + 1; | |
1177 if (exit_at_end) | |
1178 { | |
1179 desc->niter_expr = | |
1180 simplify_gen_binary (MINUS, desc->mode, desc->niter_expr, const1_rtx); | |
1181 desc->noloop_assumptions = NULL_RTX; | |
1182 desc->niter_max--; | |
1183 } | |
1184 | |
1185 if (dump_file) | |
1186 fprintf (dump_file, | |
1187 ";; Unrolled loop %d times, counting # of iterations " | |
1188 "in runtime, %i insns\n", | |
1189 max_unroll, num_loop_insns (loop)); | |
1190 | |
1191 VEC_free (basic_block, heap, dom_bbs); | |
1192 } | |
1193 | |
1194 /* Decide whether to simply peel LOOP and how much. */ | |
1195 static void | |
1196 decide_peel_simple (struct loop *loop, int flags) | |
1197 { | |
1198 unsigned npeel; | |
1199 struct niter_desc *desc; | |
1200 | |
1201 if (!(flags & UAP_PEEL)) | |
1202 { | |
1203 /* We were not asked to, just return back silently. */ | |
1204 return; | |
1205 } | |
1206 | |
1207 if (dump_file) | |
1208 fprintf (dump_file, "\n;; Considering simply peeling loop\n"); | |
1209 | |
1210 /* npeel = number of iterations to peel. */ | |
1211 npeel = PARAM_VALUE (PARAM_MAX_PEELED_INSNS) / loop->ninsns; | |
1212 if (npeel > (unsigned) PARAM_VALUE (PARAM_MAX_PEEL_TIMES)) | |
1213 npeel = PARAM_VALUE (PARAM_MAX_PEEL_TIMES); | |
1214 | |
1215 /* Skip big loops. */ | |
1216 if (!npeel) | |
1217 { | |
1218 if (dump_file) | |
1219 fprintf (dump_file, ";; Not considering loop, is too big\n"); | |
1220 return; | |
1221 } | |
1222 | |
1223 /* Check for simple loops. */ | |
1224 desc = get_simple_loop_desc (loop); | |
1225 | |
1226 /* Check number of iterations. */ | |
1227 if (desc->simple_p && !desc->assumptions && desc->const_iter) | |
1228 { | |
1229 if (dump_file) | |
1230 fprintf (dump_file, ";; Loop iterates constant times\n"); | |
1231 return; | |
1232 } | |
1233 | |
1234 /* Do not simply peel loops with branches inside -- it increases number | |
1235 of mispredicts. */ | |
1236 if (num_loop_branches (loop) > 1) | |
1237 { | |
1238 if (dump_file) | |
1239 fprintf (dump_file, ";; Not peeling, contains branches\n"); | |
1240 return; | |
1241 } | |
1242 | |
1243 if (loop->header->count) | |
1244 { | |
1245 unsigned niter = expected_loop_iterations (loop); | |
1246 if (niter + 1 > npeel) | |
1247 { | |
1248 if (dump_file) | |
1249 { | |
1250 fprintf (dump_file, ";; Not peeling loop, rolls too much ("); | |
1251 fprintf (dump_file, HOST_WIDEST_INT_PRINT_DEC, | |
1252 (HOST_WIDEST_INT) (niter + 1)); | |
1253 fprintf (dump_file, " iterations > %d [maximum peelings])\n", | |
1254 npeel); | |
1255 } | |
1256 return; | |
1257 } | |
1258 npeel = niter + 1; | |
1259 } | |
1260 else | |
1261 { | |
1262 /* For now we have no good heuristics to decide whether loop peeling | |
1263 will be effective, so disable it. */ | |
1264 if (dump_file) | |
1265 fprintf (dump_file, | |
1266 ";; Not peeling loop, no evidence it will be profitable\n"); | |
1267 return; | |
1268 } | |
1269 | |
1270 /* Success. */ | |
1271 loop->lpt_decision.decision = LPT_PEEL_SIMPLE; | |
1272 loop->lpt_decision.times = npeel; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1273 |
0 | 1274 if (dump_file) |
1275 fprintf (dump_file, ";; Decided to simply peel the loop, %d times.\n", | |
1276 loop->lpt_decision.times); | |
1277 } | |
1278 | |
1279 /* Peel a LOOP LOOP->LPT_DECISION.TIMES times. The transformation: | |
1280 while (cond) | |
1281 body; | |
1282 | |
1283 ==> | |
1284 | |
1285 if (!cond) goto end; | |
1286 body; | |
1287 if (!cond) goto end; | |
1288 body; | |
1289 while (cond) | |
1290 body; | |
1291 end: ; | |
1292 */ | |
1293 static void | |
1294 peel_loop_simple (struct loop *loop) | |
1295 { | |
1296 sbitmap wont_exit; | |
1297 unsigned npeel = loop->lpt_decision.times; | |
1298 struct niter_desc *desc = get_simple_loop_desc (loop); | |
1299 struct opt_info *opt_info = NULL; | |
1300 bool ok; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1301 |
0 | 1302 if (flag_split_ivs_in_unroller && npeel > 1) |
1303 opt_info = analyze_insns_in_loop (loop); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1304 |
0 | 1305 wont_exit = sbitmap_alloc (npeel + 1); |
1306 sbitmap_zero (wont_exit); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1307 |
0 | 1308 opt_info_start_duplication (opt_info); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1309 |
0 | 1310 ok = duplicate_loop_to_header_edge (loop, loop_preheader_edge (loop), |
1311 npeel, wont_exit, NULL, | |
1312 NULL, DLTHE_FLAG_UPDATE_FREQ | |
1313 | (opt_info | |
1314 ? DLTHE_RECORD_COPY_NUMBER | |
1315 : 0)); | |
1316 gcc_assert (ok); | |
1317 | |
1318 free (wont_exit); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1319 |
0 | 1320 if (opt_info) |
1321 { | |
1322 apply_opt_in_copies (opt_info, npeel, false, false); | |
1323 free_opt_info (opt_info); | |
1324 } | |
1325 | |
1326 if (desc->simple_p) | |
1327 { | |
1328 if (desc->const_iter) | |
1329 { | |
1330 desc->niter -= npeel; | |
1331 desc->niter_expr = GEN_INT (desc->niter); | |
1332 desc->noloop_assumptions = NULL_RTX; | |
1333 } | |
1334 else | |
1335 { | |
1336 /* We cannot just update niter_expr, as its value might be clobbered | |
1337 inside loop. We could handle this by counting the number into | |
1338 temporary just like we do in runtime unrolling, but it does not | |
1339 seem worthwhile. */ | |
1340 free_simple_loop_desc (loop); | |
1341 } | |
1342 } | |
1343 if (dump_file) | |
1344 fprintf (dump_file, ";; Peeling loop %d times\n", npeel); | |
1345 } | |
1346 | |
1347 /* Decide whether to unroll LOOP stupidly and how much. */ | |
1348 static void | |
1349 decide_unroll_stupid (struct loop *loop, int flags) | |
1350 { | |
1351 unsigned nunroll, nunroll_by_av, i; | |
1352 struct niter_desc *desc; | |
1353 | |
1354 if (!(flags & UAP_UNROLL_ALL)) | |
1355 { | |
1356 /* We were not asked to, just return back silently. */ | |
1357 return; | |
1358 } | |
1359 | |
1360 if (dump_file) | |
1361 fprintf (dump_file, "\n;; Considering unrolling loop stupidly\n"); | |
1362 | |
1363 /* nunroll = total number of copies of the original loop body in | |
1364 unrolled loop (i.e. if it is 2, we have to duplicate loop body once. */ | |
1365 nunroll = PARAM_VALUE (PARAM_MAX_UNROLLED_INSNS) / loop->ninsns; | |
1366 nunroll_by_av | |
1367 = PARAM_VALUE (PARAM_MAX_AVERAGE_UNROLLED_INSNS) / loop->av_ninsns; | |
1368 if (nunroll > nunroll_by_av) | |
1369 nunroll = nunroll_by_av; | |
1370 if (nunroll > (unsigned) PARAM_VALUE (PARAM_MAX_UNROLL_TIMES)) | |
1371 nunroll = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES); | |
1372 | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1373 if (targetm.loop_unroll_adjust) |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1374 nunroll = targetm.loop_unroll_adjust (nunroll, loop); |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1375 |
0 | 1376 /* Skip big loops. */ |
1377 if (nunroll <= 1) | |
1378 { | |
1379 if (dump_file) | |
1380 fprintf (dump_file, ";; Not considering loop, is too big\n"); | |
1381 return; | |
1382 } | |
1383 | |
1384 /* Check for simple loops. */ | |
1385 desc = get_simple_loop_desc (loop); | |
1386 | |
1387 /* Check simpleness. */ | |
1388 if (desc->simple_p && !desc->assumptions) | |
1389 { | |
1390 if (dump_file) | |
1391 fprintf (dump_file, ";; The loop is simple\n"); | |
1392 return; | |
1393 } | |
1394 | |
1395 /* Do not unroll loops with branches inside -- it increases number | |
1396 of mispredicts. */ | |
1397 if (num_loop_branches (loop) > 1) | |
1398 { | |
1399 if (dump_file) | |
1400 fprintf (dump_file, ";; Not unrolling, contains branches\n"); | |
1401 return; | |
1402 } | |
1403 | |
1404 /* If we have profile feedback, check whether the loop rolls. */ | |
1405 if (loop->header->count | |
1406 && expected_loop_iterations (loop) < 2 * nunroll) | |
1407 { | |
1408 if (dump_file) | |
1409 fprintf (dump_file, ";; Not unrolling loop, doesn't roll\n"); | |
1410 return; | |
1411 } | |
1412 | |
1413 /* Success. Now force nunroll to be power of 2, as it seems that this | |
1414 improves results (partially because of better alignments, partially | |
1415 because of some dark magic). */ | |
1416 for (i = 1; 2 * i <= nunroll; i *= 2) | |
1417 continue; | |
1418 | |
1419 loop->lpt_decision.decision = LPT_UNROLL_STUPID; | |
1420 loop->lpt_decision.times = i - 1; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1421 |
0 | 1422 if (dump_file) |
1423 fprintf (dump_file, | |
1424 ";; Decided to unroll the loop stupidly, %d times.\n", | |
1425 loop->lpt_decision.times); | |
1426 } | |
1427 | |
1428 /* Unroll a LOOP LOOP->LPT_DECISION.TIMES times. The transformation: | |
1429 while (cond) | |
1430 body; | |
1431 | |
1432 ==> | |
1433 | |
1434 while (cond) | |
1435 { | |
1436 body; | |
1437 if (!cond) break; | |
1438 body; | |
1439 if (!cond) break; | |
1440 body; | |
1441 if (!cond) break; | |
1442 body; | |
1443 } | |
1444 */ | |
1445 static void | |
1446 unroll_loop_stupid (struct loop *loop) | |
1447 { | |
1448 sbitmap wont_exit; | |
1449 unsigned nunroll = loop->lpt_decision.times; | |
1450 struct niter_desc *desc = get_simple_loop_desc (loop); | |
1451 struct opt_info *opt_info = NULL; | |
1452 bool ok; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1453 |
0 | 1454 if (flag_split_ivs_in_unroller |
1455 || flag_variable_expansion_in_unroller) | |
1456 opt_info = analyze_insns_in_loop (loop); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1457 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1458 |
0 | 1459 wont_exit = sbitmap_alloc (nunroll + 1); |
1460 sbitmap_zero (wont_exit); | |
1461 opt_info_start_duplication (opt_info); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1462 |
0 | 1463 ok = duplicate_loop_to_header_edge (loop, loop_latch_edge (loop), |
1464 nunroll, wont_exit, | |
1465 NULL, NULL, | |
1466 DLTHE_FLAG_UPDATE_FREQ | |
1467 | (opt_info | |
1468 ? DLTHE_RECORD_COPY_NUMBER | |
1469 : 0)); | |
1470 gcc_assert (ok); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1471 |
0 | 1472 if (opt_info) |
1473 { | |
1474 apply_opt_in_copies (opt_info, nunroll, true, true); | |
1475 free_opt_info (opt_info); | |
1476 } | |
1477 | |
1478 free (wont_exit); | |
1479 | |
1480 if (desc->simple_p) | |
1481 { | |
1482 /* We indeed may get here provided that there are nontrivial assumptions | |
1483 for a loop to be really simple. We could update the counts, but the | |
1484 problem is that we are unable to decide which exit will be taken | |
1485 (not really true in case the number of iterations is constant, | |
1486 but noone will do anything with this information, so we do not | |
1487 worry about it). */ | |
1488 desc->simple_p = false; | |
1489 } | |
1490 | |
1491 if (dump_file) | |
1492 fprintf (dump_file, ";; Unrolled loop %d times, %i insns\n", | |
1493 nunroll, num_loop_insns (loop)); | |
1494 } | |
1495 | |
1496 /* A hash function for information about insns to split. */ | |
1497 | |
1498 static hashval_t | |
1499 si_info_hash (const void *ivts) | |
1500 { | |
1501 return (hashval_t) INSN_UID (((const struct iv_to_split *) ivts)->insn); | |
1502 } | |
1503 | |
1504 /* An equality functions for information about insns to split. */ | |
1505 | |
1506 static int | |
1507 si_info_eq (const void *ivts1, const void *ivts2) | |
1508 { | |
1509 const struct iv_to_split *const i1 = (const struct iv_to_split *) ivts1; | |
1510 const struct iv_to_split *const i2 = (const struct iv_to_split *) ivts2; | |
1511 | |
1512 return i1->insn == i2->insn; | |
1513 } | |
1514 | |
1515 /* Return a hash for VES, which is really a "var_to_expand *". */ | |
1516 | |
1517 static hashval_t | |
1518 ve_info_hash (const void *ves) | |
1519 { | |
1520 return (hashval_t) INSN_UID (((const struct var_to_expand *) ves)->insn); | |
1521 } | |
1522 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1523 /* Return true if IVTS1 and IVTS2 (which are really both of type |
0 | 1524 "var_to_expand *") refer to the same instruction. */ |
1525 | |
1526 static int | |
1527 ve_info_eq (const void *ivts1, const void *ivts2) | |
1528 { | |
1529 const struct var_to_expand *const i1 = (const struct var_to_expand *) ivts1; | |
1530 const struct var_to_expand *const i2 = (const struct var_to_expand *) ivts2; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1531 |
0 | 1532 return i1->insn == i2->insn; |
1533 } | |
1534 | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1535 /* Returns true if REG is referenced in one nondebug insn in LOOP. |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1536 Set *DEBUG_USES to the number of debug insns that reference the |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1537 variable. */ |
0 | 1538 |
1539 bool | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1540 referenced_in_one_insn_in_loop_p (struct loop *loop, rtx reg, |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1541 int *debug_uses) |
0 | 1542 { |
1543 basic_block *body, bb; | |
1544 unsigned i; | |
1545 int count_ref = 0; | |
1546 rtx insn; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1547 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1548 body = get_loop_body (loop); |
0 | 1549 for (i = 0; i < loop->num_nodes; i++) |
1550 { | |
1551 bb = body[i]; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1552 |
0 | 1553 FOR_BB_INSNS (bb, insn) |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1554 if (!rtx_referenced_p (reg, insn)) |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1555 continue; |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1556 else if (DEBUG_INSN_P (insn)) |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1557 ++*debug_uses; |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1558 else if (++count_ref > 1) |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1559 break; |
0 | 1560 } |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1561 free (body); |
0 | 1562 return (count_ref == 1); |
1563 } | |
1564 | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1565 /* Reset the DEBUG_USES debug insns in LOOP that reference REG. */ |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1566 |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1567 static void |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1568 reset_debug_uses_in_loop (struct loop *loop, rtx reg, int debug_uses) |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1569 { |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1570 basic_block *body, bb; |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1571 unsigned i; |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1572 rtx insn; |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1573 |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1574 body = get_loop_body (loop); |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1575 for (i = 0; debug_uses && i < loop->num_nodes; i++) |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1576 { |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1577 bb = body[i]; |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1578 |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1579 FOR_BB_INSNS (bb, insn) |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1580 if (!DEBUG_INSN_P (insn) || !rtx_referenced_p (reg, insn)) |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1581 continue; |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1582 else |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1583 { |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1584 validate_change (insn, &INSN_VAR_LOCATION_LOC (insn), |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1585 gen_rtx_UNKNOWN_VAR_LOC (), 0); |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1586 if (!--debug_uses) |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1587 break; |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1588 } |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1589 } |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1590 free (body); |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1591 } |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1592 |
0 | 1593 /* Determine whether INSN contains an accumulator |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1594 which can be expanded into separate copies, |
0 | 1595 one for each copy of the LOOP body. |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1596 |
0 | 1597 for (i = 0 ; i < n; i++) |
1598 sum += a[i]; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1599 |
0 | 1600 ==> |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1601 |
0 | 1602 sum += a[i] |
1603 .... | |
1604 i = i+1; | |
1605 sum1 += a[i] | |
1606 .... | |
1607 i = i+1 | |
1608 sum2 += a[i]; | |
1609 .... | |
1610 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1611 Return NULL if INSN contains no opportunity for expansion of accumulator. |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1612 Otherwise, allocate a VAR_TO_EXPAND structure, fill it with the relevant |
0 | 1613 information and return a pointer to it. |
1614 */ | |
1615 | |
1616 static struct var_to_expand * | |
1617 analyze_insn_to_expand_var (struct loop *loop, rtx insn) | |
1618 { | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1619 rtx set, dest, src; |
0 | 1620 struct var_to_expand *ves; |
1621 unsigned accum_pos; | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1622 enum rtx_code code; |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1623 int debug_uses = 0; |
0 | 1624 |
1625 set = single_set (insn); | |
1626 if (!set) | |
1627 return NULL; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1628 |
0 | 1629 dest = SET_DEST (set); |
1630 src = SET_SRC (set); | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1631 code = GET_CODE (src); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1632 |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1633 if (code != PLUS && code != MINUS && code != MULT && code != FMA) |
0 | 1634 return NULL; |
1635 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1636 if (FLOAT_MODE_P (GET_MODE (dest))) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1637 { |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1638 if (!flag_associative_math) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1639 return NULL; |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1640 /* In the case of FMA, we're also changing the rounding. */ |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1641 if (code == FMA && !flag_unsafe_math_optimizations) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1642 return NULL; |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1643 } |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1644 |
0 | 1645 /* Hmm, this is a bit paradoxical. We know that INSN is a valid insn |
1646 in MD. But if there is no optab to generate the insn, we can not | |
1647 perform the variable expansion. This can happen if an MD provides | |
1648 an insn but not a named pattern to generate it, for example to avoid | |
1649 producing code that needs additional mode switches like for x87/mmx. | |
1650 | |
1651 So we check have_insn_for which looks for an optab for the operation | |
1652 in SRC. If it doesn't exist, we can't perform the expansion even | |
1653 though INSN is valid. */ | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1654 if (!have_insn_for (code, GET_MODE (src))) |
0 | 1655 return NULL; |
1656 | |
1657 if (!REG_P (dest) | |
1658 && !(GET_CODE (dest) == SUBREG | |
1659 && REG_P (SUBREG_REG (dest)))) | |
1660 return NULL; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1661 |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1662 /* Find the accumulator use within the operation. */ |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1663 if (code == FMA) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1664 { |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1665 /* We only support accumulation via FMA in the ADD position. */ |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1666 if (!rtx_equal_p (dest, XEXP (src, 2))) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1667 return NULL; |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1668 accum_pos = 2; |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1669 } |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1670 else if (rtx_equal_p (dest, XEXP (src, 0))) |
0 | 1671 accum_pos = 0; |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1672 else if (rtx_equal_p (dest, XEXP (src, 1))) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1673 { |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1674 /* The method of expansion that we are using; which includes the |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1675 initialization of the expansions with zero and the summation of |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1676 the expansions at the end of the computation will yield wrong |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1677 results for (x = something - x) thus avoid using it in that case. */ |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1678 if (code == MINUS) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1679 return NULL; |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1680 accum_pos = 1; |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1681 } |
0 | 1682 else |
1683 return NULL; | |
1684 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1685 /* It must not otherwise be used. */ |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1686 if (code == FMA) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1687 { |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1688 if (rtx_referenced_p (dest, XEXP (src, 0)) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1689 || rtx_referenced_p (dest, XEXP (src, 1))) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1690 return NULL; |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1691 } |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1692 else if (rtx_referenced_p (dest, XEXP (src, 1 - accum_pos))) |
0 | 1693 return NULL; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1694 |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1695 /* It must be used in exactly one insn. */ |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1696 if (!referenced_in_one_insn_in_loop_p (loop, dest, &debug_uses)) |
0 | 1697 return NULL; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1698 |
0 | 1699 if (dump_file) |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1700 { |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1701 fprintf (dump_file, "\n;; Expanding Accumulator "); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1702 print_rtl (dump_file, dest); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1703 fprintf (dump_file, "\n"); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1704 } |
0 | 1705 |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1706 if (debug_uses) |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1707 /* Instead of resetting the debug insns, we could replace each |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1708 debug use in the loop with the sum or product of all expanded |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1709 accummulators. Since we'll only know of all expansions at the |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1710 end, we'd have to keep track of which vars_to_expand a debug |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1711 insn in the loop references, take note of each copy of the |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1712 debug insn during unrolling, and when it's all done, compute |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1713 the sum or product of each variable and adjust the original |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1714 debug insn and each copy thereof. What a pain! */ |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1715 reset_debug_uses_in_loop (loop, dest, debug_uses); |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1716 |
0 | 1717 /* Record the accumulator to expand. */ |
1718 ves = XNEW (struct var_to_expand); | |
1719 ves->insn = insn; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1720 ves->reg = copy_rtx (dest); |
0 | 1721 ves->var_expansions = VEC_alloc (rtx, heap, 1); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1722 ves->next = NULL; |
0 | 1723 ves->op = GET_CODE (src); |
1724 ves->expansion_count = 0; | |
1725 ves->reuse_expansion = 0; | |
1726 ves->accum_pos = accum_pos; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1727 return ves; |
0 | 1728 } |
1729 | |
1730 /* Determine whether there is an induction variable in INSN that | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1731 we would like to split during unrolling. |
0 | 1732 |
1733 I.e. replace | |
1734 | |
1735 i = i + 1; | |
1736 ... | |
1737 i = i + 1; | |
1738 ... | |
1739 i = i + 1; | |
1740 ... | |
1741 | |
1742 type chains by | |
1743 | |
1744 i0 = i + 1 | |
1745 ... | |
1746 i = i0 + 1 | |
1747 ... | |
1748 i = i0 + 2 | |
1749 ... | |
1750 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1751 Return NULL if INSN contains no interesting IVs. Otherwise, allocate |
0 | 1752 an IV_TO_SPLIT structure, fill it with the relevant information and return a |
1753 pointer to it. */ | |
1754 | |
1755 static struct iv_to_split * | |
1756 analyze_iv_to_split_insn (rtx insn) | |
1757 { | |
1758 rtx set, dest; | |
1759 struct rtx_iv iv; | |
1760 struct iv_to_split *ivts; | |
1761 bool ok; | |
1762 | |
1763 /* For now we just split the basic induction variables. Later this may be | |
1764 extended for example by selecting also addresses of memory references. */ | |
1765 set = single_set (insn); | |
1766 if (!set) | |
1767 return NULL; | |
1768 | |
1769 dest = SET_DEST (set); | |
1770 if (!REG_P (dest)) | |
1771 return NULL; | |
1772 | |
1773 if (!biv_p (insn, dest)) | |
1774 return NULL; | |
1775 | |
1776 ok = iv_analyze_result (insn, dest, &iv); | |
1777 | |
1778 /* This used to be an assert under the assumption that if biv_p returns | |
1779 true that iv_analyze_result must also return true. However, that | |
1780 assumption is not strictly correct as evidenced by pr25569. | |
1781 | |
1782 Returning NULL when iv_analyze_result returns false is safe and | |
1783 avoids the problems in pr25569 until the iv_analyze_* routines | |
1784 can be fixed, which is apparently hard and time consuming | |
1785 according to their author. */ | |
1786 if (! ok) | |
1787 return NULL; | |
1788 | |
1789 if (iv.step == const0_rtx | |
1790 || iv.mode != iv.extend_mode) | |
1791 return NULL; | |
1792 | |
1793 /* Record the insn to split. */ | |
1794 ivts = XNEW (struct iv_to_split); | |
1795 ivts->insn = insn; | |
1796 ivts->base_var = NULL_RTX; | |
1797 ivts->step = iv.step; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1798 ivts->next = NULL; |
0 | 1799 ivts->n_loc = 1; |
1800 ivts->loc[0] = 1; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1801 |
0 | 1802 return ivts; |
1803 } | |
1804 | |
1805 /* Determines which of insns in LOOP can be optimized. | |
1806 Return a OPT_INFO struct with the relevant hash tables filled | |
1807 with all insns to be optimized. The FIRST_NEW_BLOCK field | |
1808 is undefined for the return value. */ | |
1809 | |
1810 static struct opt_info * | |
1811 analyze_insns_in_loop (struct loop *loop) | |
1812 { | |
1813 basic_block *body, bb; | |
1814 unsigned i; | |
1815 struct opt_info *opt_info = XCNEW (struct opt_info); | |
1816 rtx insn; | |
1817 struct iv_to_split *ivts = NULL; | |
1818 struct var_to_expand *ves = NULL; | |
1819 PTR *slot1; | |
1820 PTR *slot2; | |
1821 VEC (edge, heap) *edges = get_loop_exit_edges (loop); | |
1822 edge exit; | |
1823 bool can_apply = false; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1824 |
0 | 1825 iv_analysis_loop_init (loop); |
1826 | |
1827 body = get_loop_body (loop); | |
1828 | |
1829 if (flag_split_ivs_in_unroller) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1830 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1831 opt_info->insns_to_split = htab_create (5 * loop->num_nodes, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1832 si_info_hash, si_info_eq, free); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1833 opt_info->iv_to_split_head = NULL; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1834 opt_info->iv_to_split_tail = &opt_info->iv_to_split_head; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1835 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1836 |
0 | 1837 /* Record the loop exit bb and loop preheader before the unrolling. */ |
1838 opt_info->loop_preheader = loop_preheader_edge (loop)->src; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1839 |
0 | 1840 if (VEC_length (edge, edges) == 1) |
1841 { | |
1842 exit = VEC_index (edge, edges, 0); | |
1843 if (!(exit->flags & EDGE_COMPLEX)) | |
1844 { | |
1845 opt_info->loop_exit = split_edge (exit); | |
1846 can_apply = true; | |
1847 } | |
1848 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1849 |
0 | 1850 if (flag_variable_expansion_in_unroller |
1851 && can_apply) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1852 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1853 opt_info->insns_with_var_to_expand = htab_create (5 * loop->num_nodes, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1854 ve_info_hash, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1855 ve_info_eq, free); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1856 opt_info->var_to_expand_head = NULL; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1857 opt_info->var_to_expand_tail = &opt_info->var_to_expand_head; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1858 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1859 |
0 | 1860 for (i = 0; i < loop->num_nodes; i++) |
1861 { | |
1862 bb = body[i]; | |
1863 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, bb)) | |
1864 continue; | |
1865 | |
1866 FOR_BB_INSNS (bb, insn) | |
1867 { | |
1868 if (!INSN_P (insn)) | |
1869 continue; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1870 |
0 | 1871 if (opt_info->insns_to_split) |
1872 ivts = analyze_iv_to_split_insn (insn); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1873 |
0 | 1874 if (ivts) |
1875 { | |
1876 slot1 = htab_find_slot (opt_info->insns_to_split, ivts, INSERT); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1877 gcc_assert (*slot1 == NULL); |
0 | 1878 *slot1 = ivts; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1879 *opt_info->iv_to_split_tail = ivts; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1880 opt_info->iv_to_split_tail = &ivts->next; |
0 | 1881 continue; |
1882 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1883 |
0 | 1884 if (opt_info->insns_with_var_to_expand) |
1885 ves = analyze_insn_to_expand_var (loop, insn); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1886 |
0 | 1887 if (ves) |
1888 { | |
1889 slot2 = htab_find_slot (opt_info->insns_with_var_to_expand, ves, INSERT); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1890 gcc_assert (*slot2 == NULL); |
0 | 1891 *slot2 = ves; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1892 *opt_info->var_to_expand_tail = ves; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1893 opt_info->var_to_expand_tail = &ves->next; |
0 | 1894 } |
1895 } | |
1896 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1897 |
0 | 1898 VEC_free (edge, heap, edges); |
1899 free (body); | |
1900 return opt_info; | |
1901 } | |
1902 | |
1903 /* Called just before loop duplication. Records start of duplicated area | |
1904 to OPT_INFO. */ | |
1905 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1906 static void |
0 | 1907 opt_info_start_duplication (struct opt_info *opt_info) |
1908 { | |
1909 if (opt_info) | |
1910 opt_info->first_new_block = last_basic_block; | |
1911 } | |
1912 | |
1913 /* Determine the number of iterations between initialization of the base | |
1914 variable and the current copy (N_COPY). N_COPIES is the total number | |
1915 of newly created copies. UNROLLING is true if we are unrolling | |
1916 (not peeling) the loop. */ | |
1917 | |
1918 static unsigned | |
1919 determine_split_iv_delta (unsigned n_copy, unsigned n_copies, bool unrolling) | |
1920 { | |
1921 if (unrolling) | |
1922 { | |
1923 /* If we are unrolling, initialization is done in the original loop | |
1924 body (number 0). */ | |
1925 return n_copy; | |
1926 } | |
1927 else | |
1928 { | |
1929 /* If we are peeling, the copy in that the initialization occurs has | |
1930 number 1. The original loop (number 0) is the last. */ | |
1931 if (n_copy) | |
1932 return n_copy - 1; | |
1933 else | |
1934 return n_copies; | |
1935 } | |
1936 } | |
1937 | |
1938 /* Locate in EXPR the expression corresponding to the location recorded | |
1939 in IVTS, and return a pointer to the RTX for this location. */ | |
1940 | |
1941 static rtx * | |
1942 get_ivts_expr (rtx expr, struct iv_to_split *ivts) | |
1943 { | |
1944 unsigned i; | |
1945 rtx *ret = &expr; | |
1946 | |
1947 for (i = 0; i < ivts->n_loc; i++) | |
1948 ret = &XEXP (*ret, ivts->loc[i]); | |
1949 | |
1950 return ret; | |
1951 } | |
1952 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1953 /* Allocate basic variable for the induction variable chain. */ |
0 | 1954 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1955 static void |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1956 allocate_basic_variable (struct iv_to_split *ivts) |
0 | 1957 { |
1958 rtx expr = *get_ivts_expr (single_set (ivts->insn), ivts); | |
1959 | |
1960 ivts->base_var = gen_reg_rtx (GET_MODE (expr)); | |
1961 } | |
1962 | |
1963 /* Insert initialization of basic variable of IVTS before INSN, taking | |
1964 the initial value from INSN. */ | |
1965 | |
1966 static void | |
1967 insert_base_initialization (struct iv_to_split *ivts, rtx insn) | |
1968 { | |
1969 rtx expr = copy_rtx (*get_ivts_expr (single_set (insn), ivts)); | |
1970 rtx seq; | |
1971 | |
1972 start_sequence (); | |
1973 expr = force_operand (expr, ivts->base_var); | |
1974 if (expr != ivts->base_var) | |
1975 emit_move_insn (ivts->base_var, expr); | |
1976 seq = get_insns (); | |
1977 end_sequence (); | |
1978 | |
1979 emit_insn_before (seq, insn); | |
1980 } | |
1981 | |
1982 /* Replace the use of induction variable described in IVTS in INSN | |
1983 by base variable + DELTA * step. */ | |
1984 | |
1985 static void | |
1986 split_iv (struct iv_to_split *ivts, rtx insn, unsigned delta) | |
1987 { | |
1988 rtx expr, *loc, seq, incr, var; | |
1989 enum machine_mode mode = GET_MODE (ivts->base_var); | |
1990 rtx src, dest, set; | |
1991 | |
1992 /* Construct base + DELTA * step. */ | |
1993 if (!delta) | |
1994 expr = ivts->base_var; | |
1995 else | |
1996 { | |
1997 incr = simplify_gen_binary (MULT, mode, | |
1998 ivts->step, gen_int_mode (delta, mode)); | |
1999 expr = simplify_gen_binary (PLUS, GET_MODE (ivts->base_var), | |
2000 ivts->base_var, incr); | |
2001 } | |
2002 | |
2003 /* Figure out where to do the replacement. */ | |
2004 loc = get_ivts_expr (single_set (insn), ivts); | |
2005 | |
2006 /* If we can make the replacement right away, we're done. */ | |
2007 if (validate_change (insn, loc, expr, 0)) | |
2008 return; | |
2009 | |
2010 /* Otherwise, force EXPR into a register and try again. */ | |
2011 start_sequence (); | |
2012 var = gen_reg_rtx (mode); | |
2013 expr = force_operand (expr, var); | |
2014 if (expr != var) | |
2015 emit_move_insn (var, expr); | |
2016 seq = get_insns (); | |
2017 end_sequence (); | |
2018 emit_insn_before (seq, insn); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2019 |
0 | 2020 if (validate_change (insn, loc, var, 0)) |
2021 return; | |
2022 | |
2023 /* The last chance. Try recreating the assignment in insn | |
2024 completely from scratch. */ | |
2025 set = single_set (insn); | |
2026 gcc_assert (set); | |
2027 | |
2028 start_sequence (); | |
2029 *loc = var; | |
2030 src = copy_rtx (SET_SRC (set)); | |
2031 dest = copy_rtx (SET_DEST (set)); | |
2032 src = force_operand (src, dest); | |
2033 if (src != dest) | |
2034 emit_move_insn (dest, src); | |
2035 seq = get_insns (); | |
2036 end_sequence (); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2037 |
0 | 2038 emit_insn_before (seq, insn); |
2039 delete_insn (insn); | |
2040 } | |
2041 | |
2042 | |
2043 /* Return one expansion of the accumulator recorded in struct VE. */ | |
2044 | |
2045 static rtx | |
2046 get_expansion (struct var_to_expand *ve) | |
2047 { | |
2048 rtx reg; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2049 |
0 | 2050 if (ve->reuse_expansion == 0) |
2051 reg = ve->reg; | |
2052 else | |
2053 reg = VEC_index (rtx, ve->var_expansions, ve->reuse_expansion - 1); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2054 |
0 | 2055 if (VEC_length (rtx, ve->var_expansions) == (unsigned) ve->reuse_expansion) |
2056 ve->reuse_expansion = 0; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2057 else |
0 | 2058 ve->reuse_expansion++; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2059 |
0 | 2060 return reg; |
2061 } | |
2062 | |
2063 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2064 /* Given INSN replace the uses of the accumulator recorded in VE |
0 | 2065 with a new register. */ |
2066 | |
2067 static void | |
2068 expand_var_during_unrolling (struct var_to_expand *ve, rtx insn) | |
2069 { | |
2070 rtx new_reg, set; | |
2071 bool really_new_expansion = false; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2072 |
0 | 2073 set = single_set (insn); |
2074 gcc_assert (set); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2075 |
0 | 2076 /* Generate a new register only if the expansion limit has not been |
2077 reached. Else reuse an already existing expansion. */ | |
2078 if (PARAM_VALUE (PARAM_MAX_VARIABLE_EXPANSIONS) > ve->expansion_count) | |
2079 { | |
2080 really_new_expansion = true; | |
2081 new_reg = gen_reg_rtx (GET_MODE (ve->reg)); | |
2082 } | |
2083 else | |
2084 new_reg = get_expansion (ve); | |
2085 | |
2086 validate_change (insn, &SET_DEST (set), new_reg, 1); | |
2087 validate_change (insn, &XEXP (SET_SRC (set), ve->accum_pos), new_reg, 1); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2088 |
0 | 2089 if (apply_change_group ()) |
2090 if (really_new_expansion) | |
2091 { | |
2092 VEC_safe_push (rtx, heap, ve->var_expansions, new_reg); | |
2093 ve->expansion_count++; | |
2094 } | |
2095 } | |
2096 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2097 /* Initialize the variable expansions in loop preheader. PLACE is the |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2098 loop-preheader basic block where the initialization of the |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2099 expansions should take place. The expansions are initialized with |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2100 (-0) when the operation is plus or minus to honor sign zero. This |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2101 way we can prevent cases where the sign of the final result is |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2102 effected by the sign of the expansion. Here is an example to |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2103 demonstrate this: |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2104 |
0 | 2105 for (i = 0 ; i < n; i++) |
2106 sum += something; | |
2107 | |
2108 ==> | |
2109 | |
2110 sum += something | |
2111 .... | |
2112 i = i+1; | |
2113 sum1 += something | |
2114 .... | |
2115 i = i+1 | |
2116 sum2 += something; | |
2117 .... | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2118 |
0 | 2119 When SUM is initialized with -zero and SOMETHING is also -zero; the |
2120 final result of sum should be -zero thus the expansions sum1 and sum2 | |
2121 should be initialized with -zero as well (otherwise we will get +zero | |
2122 as the final result). */ | |
2123 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2124 static void |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2125 insert_var_expansion_initialization (struct var_to_expand *ve, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2126 basic_block place) |
0 | 2127 { |
2128 rtx seq, var, zero_init, insn; | |
2129 unsigned i; | |
2130 enum machine_mode mode = GET_MODE (ve->reg); | |
2131 bool honor_signed_zero_p = HONOR_SIGNED_ZEROS (mode); | |
2132 | |
2133 if (VEC_length (rtx, ve->var_expansions) == 0) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2134 return; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2135 |
0 | 2136 start_sequence (); |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2137 switch (ve->op) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2138 { |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2139 case FMA: |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2140 /* Note that we only accumulate FMA via the ADD operand. */ |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2141 case PLUS: |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2142 case MINUS: |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2143 FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2144 { |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2145 if (honor_signed_zero_p) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2146 zero_init = simplify_gen_unary (NEG, mode, CONST0_RTX (mode), mode); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2147 else |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2148 zero_init = CONST0_RTX (mode); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2149 emit_move_insn (var, zero_init); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2150 } |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2151 break; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2152 |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2153 case MULT: |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2154 FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2155 { |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2156 zero_init = CONST1_RTX (GET_MODE (var)); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2157 emit_move_insn (var, zero_init); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2158 } |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2159 break; |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2160 |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2161 default: |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2162 gcc_unreachable (); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2163 } |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2164 |
0 | 2165 seq = get_insns (); |
2166 end_sequence (); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2167 |
0 | 2168 insn = BB_HEAD (place); |
2169 while (!NOTE_INSN_BASIC_BLOCK_P (insn)) | |
2170 insn = NEXT_INSN (insn); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2171 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2172 emit_insn_after (seq, insn); |
0 | 2173 } |
2174 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2175 /* Combine the variable expansions at the loop exit. PLACE is the |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2176 loop exit basic block where the summation of the expansions should |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2177 take place. */ |
0 | 2178 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2179 static void |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2180 combine_var_copies_in_loop_exit (struct var_to_expand *ve, basic_block place) |
0 | 2181 { |
2182 rtx sum = ve->reg; | |
2183 rtx expr, seq, var, insn; | |
2184 unsigned i; | |
2185 | |
2186 if (VEC_length (rtx, ve->var_expansions) == 0) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2187 return; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2188 |
0 | 2189 start_sequence (); |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2190 switch (ve->op) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2191 { |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2192 case FMA: |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2193 /* Note that we only accumulate FMA via the ADD operand. */ |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2194 case PLUS: |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2195 case MINUS: |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2196 FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2197 sum = simplify_gen_binary (PLUS, GET_MODE (ve->reg), var, sum); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2198 break; |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2199 |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2200 case MULT: |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2201 FOR_EACH_VEC_ELT (rtx, ve->var_expansions, i, var) |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2202 sum = simplify_gen_binary (MULT, GET_MODE (ve->reg), var, sum); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2203 break; |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2204 |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2205 default: |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2206 gcc_unreachable (); |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
2207 } |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2208 |
0 | 2209 expr = force_operand (sum, ve->reg); |
2210 if (expr != ve->reg) | |
2211 emit_move_insn (ve->reg, expr); | |
2212 seq = get_insns (); | |
2213 end_sequence (); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2214 |
0 | 2215 insn = BB_HEAD (place); |
2216 while (!NOTE_INSN_BASIC_BLOCK_P (insn)) | |
2217 insn = NEXT_INSN (insn); | |
2218 | |
2219 emit_insn_after (seq, insn); | |
2220 } | |
2221 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2222 /* Apply loop optimizations in loop copies using the |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2223 data which gathered during the unrolling. Structure |
0 | 2224 OPT_INFO record that data. |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2225 |
0 | 2226 UNROLLING is true if we unrolled (not peeled) the loop. |
2227 REWRITE_ORIGINAL_BODY is true if we should also rewrite the original body of | |
2228 the loop (as it should happen in complete unrolling, but not in ordinary | |
2229 peeling of the loop). */ | |
2230 | |
2231 static void | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2232 apply_opt_in_copies (struct opt_info *opt_info, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2233 unsigned n_copies, bool unrolling, |
0 | 2234 bool rewrite_original_loop) |
2235 { | |
2236 unsigned i, delta; | |
2237 basic_block bb, orig_bb; | |
2238 rtx insn, orig_insn, next; | |
2239 struct iv_to_split ivts_templ, *ivts; | |
2240 struct var_to_expand ve_templ, *ves; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2241 |
0 | 2242 /* Sanity check -- we need to put initialization in the original loop |
2243 body. */ | |
2244 gcc_assert (!unrolling || rewrite_original_loop); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2245 |
0 | 2246 /* Allocate the basic variables (i0). */ |
2247 if (opt_info->insns_to_split) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2248 for (ivts = opt_info->iv_to_split_head; ivts; ivts = ivts->next) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2249 allocate_basic_variable (ivts); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2250 |
0 | 2251 for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++) |
2252 { | |
2253 bb = BASIC_BLOCK (i); | |
2254 orig_bb = get_bb_original (bb); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2255 |
0 | 2256 /* bb->aux holds position in copy sequence initialized by |
2257 duplicate_loop_to_header_edge. */ | |
2258 delta = determine_split_iv_delta ((size_t)bb->aux, n_copies, | |
2259 unrolling); | |
2260 bb->aux = 0; | |
2261 orig_insn = BB_HEAD (orig_bb); | |
2262 for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb)); insn = next) | |
2263 { | |
2264 next = NEXT_INSN (insn); | |
2265 if (!INSN_P (insn)) | |
2266 continue; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2267 |
0 | 2268 while (!INSN_P (orig_insn)) |
2269 orig_insn = NEXT_INSN (orig_insn); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2270 |
0 | 2271 ivts_templ.insn = orig_insn; |
2272 ve_templ.insn = orig_insn; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2273 |
0 | 2274 /* Apply splitting iv optimization. */ |
2275 if (opt_info->insns_to_split) | |
2276 { | |
2277 ivts = (struct iv_to_split *) | |
2278 htab_find (opt_info->insns_to_split, &ivts_templ); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2279 |
0 | 2280 if (ivts) |
2281 { | |
2282 gcc_assert (GET_CODE (PATTERN (insn)) | |
2283 == GET_CODE (PATTERN (orig_insn))); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2284 |
0 | 2285 if (!delta) |
2286 insert_base_initialization (ivts, insn); | |
2287 split_iv (ivts, insn, delta); | |
2288 } | |
2289 } | |
2290 /* Apply variable expansion optimization. */ | |
2291 if (unrolling && opt_info->insns_with_var_to_expand) | |
2292 { | |
2293 ves = (struct var_to_expand *) | |
2294 htab_find (opt_info->insns_with_var_to_expand, &ve_templ); | |
2295 if (ves) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2296 { |
0 | 2297 gcc_assert (GET_CODE (PATTERN (insn)) |
2298 == GET_CODE (PATTERN (orig_insn))); | |
2299 expand_var_during_unrolling (ves, insn); | |
2300 } | |
2301 } | |
2302 orig_insn = NEXT_INSN (orig_insn); | |
2303 } | |
2304 } | |
2305 | |
2306 if (!rewrite_original_loop) | |
2307 return; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2308 |
0 | 2309 /* Initialize the variable expansions in the loop preheader |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2310 and take care of combining them at the loop exit. */ |
0 | 2311 if (opt_info->insns_with_var_to_expand) |
2312 { | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2313 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2314 insert_var_expansion_initialization (ves, opt_info->loop_preheader); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2315 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2316 combine_var_copies_in_loop_exit (ves, opt_info->loop_exit); |
0 | 2317 } |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2318 |
0 | 2319 /* Rewrite also the original loop body. Find them as originals of the blocks |
2320 in the last copied iteration, i.e. those that have | |
2321 get_bb_copy (get_bb_original (bb)) == bb. */ | |
2322 for (i = opt_info->first_new_block; i < (unsigned) last_basic_block; i++) | |
2323 { | |
2324 bb = BASIC_BLOCK (i); | |
2325 orig_bb = get_bb_original (bb); | |
2326 if (get_bb_copy (orig_bb) != bb) | |
2327 continue; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2328 |
0 | 2329 delta = determine_split_iv_delta (0, n_copies, unrolling); |
2330 for (orig_insn = BB_HEAD (orig_bb); | |
2331 orig_insn != NEXT_INSN (BB_END (bb)); | |
2332 orig_insn = next) | |
2333 { | |
2334 next = NEXT_INSN (orig_insn); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2335 |
0 | 2336 if (!INSN_P (orig_insn)) |
2337 continue; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2338 |
0 | 2339 ivts_templ.insn = orig_insn; |
2340 if (opt_info->insns_to_split) | |
2341 { | |
2342 ivts = (struct iv_to_split *) | |
2343 htab_find (opt_info->insns_to_split, &ivts_templ); | |
2344 if (ivts) | |
2345 { | |
2346 if (!delta) | |
2347 insert_base_initialization (ivts, orig_insn); | |
2348 split_iv (ivts, orig_insn, delta); | |
2349 continue; | |
2350 } | |
2351 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2352 |
0 | 2353 } |
2354 } | |
2355 } | |
2356 | |
2357 /* Release OPT_INFO. */ | |
2358 | |
2359 static void | |
2360 free_opt_info (struct opt_info *opt_info) | |
2361 { | |
2362 if (opt_info->insns_to_split) | |
2363 htab_delete (opt_info->insns_to_split); | |
2364 if (opt_info->insns_with_var_to_expand) | |
2365 { | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2366 struct var_to_expand *ves; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2367 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2368 for (ves = opt_info->var_to_expand_head; ves; ves = ves->next) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2369 VEC_free (rtx, heap, ves->var_expansions); |
0 | 2370 htab_delete (opt_info->insns_with_var_to_expand); |
2371 } | |
2372 free (opt_info); | |
2373 } |