Mercurial > hg > CbC > CbC_gcc
comparison gcc/tree-vect-loop-manip.c @ 55:77e2b8dfacca gcc-4.4.5
update it from 4.4.3 to 4.5.0
author | ryoma <e075725@ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 12 Feb 2010 23:39:51 +0900 |
parents | |
children | b7f97abdc517 |
comparison
equal
deleted
inserted
replaced
52:c156f1bd5cd9 | 55:77e2b8dfacca |
---|---|
1 /* Vectorizer Specific Loop Manipulations | |
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 Free Software | |
3 Foundation, Inc. | |
4 Contributed by Dorit Naishlos <dorit@il.ibm.com> | |
5 and Ira Rosen <irar@il.ibm.com> | |
6 | |
7 This file is part of GCC. | |
8 | |
9 GCC is free software; you can redistribute it and/or modify it under | |
10 the terms of the GNU General Public License as published by the Free | |
11 Software Foundation; either version 3, or (at your option) any later | |
12 version. | |
13 | |
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
17 for more details. | |
18 | |
19 You should have received a copy of the GNU General Public License | |
20 along with GCC; see the file COPYING3. If not see | |
21 <http://www.gnu.org/licenses/>. */ | |
22 | |
23 #include "config.h" | |
24 #include "system.h" | |
25 #include "coretypes.h" | |
26 #include "tm.h" | |
27 #include "ggc.h" | |
28 #include "tree.h" | |
29 #include "basic-block.h" | |
30 #include "diagnostic.h" | |
31 #include "tree-flow.h" | |
32 #include "tree-dump.h" | |
33 #include "cfgloop.h" | |
34 #include "cfglayout.h" | |
35 #include "expr.h" | |
36 #include "toplev.h" | |
37 #include "tree-scalar-evolution.h" | |
38 #include "tree-vectorizer.h" | |
39 #include "langhooks.h" | |
40 | |
41 /************************************************************************* | |
42 Simple Loop Peeling Utilities | |
43 | |
44 Utilities to support loop peeling for vectorization purposes. | |
45 *************************************************************************/ | |
46 | |
47 | |
48 /* Renames the use *OP_P. */ | |
49 | |
50 static void | |
51 rename_use_op (use_operand_p op_p) | |
52 { | |
53 tree new_name; | |
54 | |
55 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME) | |
56 return; | |
57 | |
58 new_name = get_current_def (USE_FROM_PTR (op_p)); | |
59 | |
60 /* Something defined outside of the loop. */ | |
61 if (!new_name) | |
62 return; | |
63 | |
64 /* An ordinary ssa name defined in the loop. */ | |
65 | |
66 SET_USE (op_p, new_name); | |
67 } | |
68 | |
69 | |
70 /* Renames the variables in basic block BB. */ | |
71 | |
72 void | |
73 rename_variables_in_bb (basic_block bb) | |
74 { | |
75 gimple_stmt_iterator gsi; | |
76 gimple stmt; | |
77 use_operand_p use_p; | |
78 ssa_op_iter iter; | |
79 edge e; | |
80 edge_iterator ei; | |
81 struct loop *loop = bb->loop_father; | |
82 | |
83 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
84 { | |
85 stmt = gsi_stmt (gsi); | |
86 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES) | |
87 rename_use_op (use_p); | |
88 } | |
89 | |
90 FOR_EACH_EDGE (e, ei, bb->succs) | |
91 { | |
92 if (!flow_bb_inside_loop_p (loop, e->dest)) | |
93 continue; | |
94 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi)) | |
95 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi_stmt (gsi), e)); | |
96 } | |
97 } | |
98 | |
99 | |
100 /* Renames variables in new generated LOOP. */ | |
101 | |
102 void | |
103 rename_variables_in_loop (struct loop *loop) | |
104 { | |
105 unsigned i; | |
106 basic_block *bbs; | |
107 | |
108 bbs = get_loop_body (loop); | |
109 | |
110 for (i = 0; i < loop->num_nodes; i++) | |
111 rename_variables_in_bb (bbs[i]); | |
112 | |
113 free (bbs); | |
114 } | |
115 | |
116 | |
117 /* Update the PHI nodes of NEW_LOOP. | |
118 | |
119 NEW_LOOP is a duplicate of ORIG_LOOP. | |
120 AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP: | |
121 AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it | |
122 executes before it. */ | |
123 | |
124 static void | |
125 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop, | |
126 struct loop *new_loop, bool after) | |
127 { | |
128 tree new_ssa_name; | |
129 gimple phi_new, phi_orig; | |
130 tree def; | |
131 edge orig_loop_latch = loop_latch_edge (orig_loop); | |
132 edge orig_entry_e = loop_preheader_edge (orig_loop); | |
133 edge new_loop_exit_e = single_exit (new_loop); | |
134 edge new_loop_entry_e = loop_preheader_edge (new_loop); | |
135 edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e); | |
136 gimple_stmt_iterator gsi_new, gsi_orig; | |
137 | |
138 /* | |
139 step 1. For each loop-header-phi: | |
140 Add the first phi argument for the phi in NEW_LOOP | |
141 (the one associated with the entry of NEW_LOOP) | |
142 | |
143 step 2. For each loop-header-phi: | |
144 Add the second phi argument for the phi in NEW_LOOP | |
145 (the one associated with the latch of NEW_LOOP) | |
146 | |
147 step 3. Update the phis in the successor block of NEW_LOOP. | |
148 | |
149 case 1: NEW_LOOP was placed before ORIG_LOOP: | |
150 The successor block of NEW_LOOP is the header of ORIG_LOOP. | |
151 Updating the phis in the successor block can therefore be done | |
152 along with the scanning of the loop header phis, because the | |
153 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same | |
154 phi nodes, organized in the same order. | |
155 | |
156 case 2: NEW_LOOP was placed after ORIG_LOOP: | |
157 The successor block of NEW_LOOP is the original exit block of | |
158 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis. | |
159 We postpone updating these phis to a later stage (when | |
160 loop guards are added). | |
161 */ | |
162 | |
163 | |
164 /* Scan the phis in the headers of the old and new loops | |
165 (they are organized in exactly the same order). */ | |
166 | |
167 for (gsi_new = gsi_start_phis (new_loop->header), | |
168 gsi_orig = gsi_start_phis (orig_loop->header); | |
169 !gsi_end_p (gsi_new) && !gsi_end_p (gsi_orig); | |
170 gsi_next (&gsi_new), gsi_next (&gsi_orig)) | |
171 { | |
172 source_location locus; | |
173 phi_new = gsi_stmt (gsi_new); | |
174 phi_orig = gsi_stmt (gsi_orig); | |
175 | |
176 /* step 1. */ | |
177 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e); | |
178 locus = gimple_phi_arg_location_from_edge (phi_orig, entry_arg_e); | |
179 add_phi_arg (phi_new, def, new_loop_entry_e, locus); | |
180 | |
181 /* step 2. */ | |
182 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch); | |
183 locus = gimple_phi_arg_location_from_edge (phi_orig, orig_loop_latch); | |
184 if (TREE_CODE (def) != SSA_NAME) | |
185 continue; | |
186 | |
187 new_ssa_name = get_current_def (def); | |
188 if (!new_ssa_name) | |
189 { | |
190 /* This only happens if there are no definitions | |
191 inside the loop. use the phi_result in this case. */ | |
192 new_ssa_name = PHI_RESULT (phi_new); | |
193 } | |
194 | |
195 /* An ordinary ssa name defined in the loop. */ | |
196 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop), locus); | |
197 | |
198 /* step 3 (case 1). */ | |
199 if (!after) | |
200 { | |
201 gcc_assert (new_loop_exit_e == orig_entry_e); | |
202 SET_PHI_ARG_DEF (phi_orig, | |
203 new_loop_exit_e->dest_idx, | |
204 new_ssa_name); | |
205 } | |
206 } | |
207 } | |
208 | |
209 | |
210 /* Update PHI nodes for a guard of the LOOP. | |
211 | |
212 Input: | |
213 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that | |
214 controls whether LOOP is to be executed. GUARD_EDGE is the edge that | |
215 originates from the guard-bb, skips LOOP and reaches the (unique) exit | |
216 bb of LOOP. This loop-exit-bb is an empty bb with one successor. | |
217 We denote this bb NEW_MERGE_BB because before the guard code was added | |
218 it had a single predecessor (the LOOP header), and now it became a merge | |
219 point of two paths - the path that ends with the LOOP exit-edge, and | |
220 the path that ends with GUARD_EDGE. | |
221 - NEW_EXIT_BB: New basic block that is added by this function between LOOP | |
222 and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis. | |
223 | |
224 ===> The CFG before the guard-code was added: | |
225 LOOP_header_bb: | |
226 loop_body | |
227 if (exit_loop) goto update_bb | |
228 else goto LOOP_header_bb | |
229 update_bb: | |
230 | |
231 ==> The CFG after the guard-code was added: | |
232 guard_bb: | |
233 if (LOOP_guard_condition) goto new_merge_bb | |
234 else goto LOOP_header_bb | |
235 LOOP_header_bb: | |
236 loop_body | |
237 if (exit_loop_condition) goto new_merge_bb | |
238 else goto LOOP_header_bb | |
239 new_merge_bb: | |
240 goto update_bb | |
241 update_bb: | |
242 | |
243 ==> The CFG after this function: | |
244 guard_bb: | |
245 if (LOOP_guard_condition) goto new_merge_bb | |
246 else goto LOOP_header_bb | |
247 LOOP_header_bb: | |
248 loop_body | |
249 if (exit_loop_condition) goto new_exit_bb | |
250 else goto LOOP_header_bb | |
251 new_exit_bb: | |
252 new_merge_bb: | |
253 goto update_bb | |
254 update_bb: | |
255 | |
256 This function: | |
257 1. creates and updates the relevant phi nodes to account for the new | |
258 incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves: | |
259 1.1. Create phi nodes at NEW_MERGE_BB. | |
260 1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted | |
261 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB | |
262 2. preserves loop-closed-ssa-form by creating the required phi nodes | |
263 at the exit of LOOP (i.e, in NEW_EXIT_BB). | |
264 | |
265 There are two flavors to this function: | |
266 | |
267 slpeel_update_phi_nodes_for_guard1: | |
268 Here the guard controls whether we enter or skip LOOP, where LOOP is a | |
269 prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are | |
270 for variables that have phis in the loop header. | |
271 | |
272 slpeel_update_phi_nodes_for_guard2: | |
273 Here the guard controls whether we enter or skip LOOP, where LOOP is an | |
274 epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are | |
275 for variables that have phis in the loop exit. | |
276 | |
277 I.E., the overall structure is: | |
278 | |
279 loop1_preheader_bb: | |
280 guard1 (goto loop1/merge1_bb) | |
281 loop1 | |
282 loop1_exit_bb: | |
283 guard2 (goto merge1_bb/merge2_bb) | |
284 merge1_bb | |
285 loop2 | |
286 loop2_exit_bb | |
287 merge2_bb | |
288 next_bb | |
289 | |
290 slpeel_update_phi_nodes_for_guard1 takes care of creating phis in | |
291 loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars | |
292 that have phis in loop1->header). | |
293 | |
294 slpeel_update_phi_nodes_for_guard2 takes care of creating phis in | |
295 loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars | |
296 that have phis in next_bb). It also adds some of these phis to | |
297 loop1_exit_bb. | |
298 | |
299 slpeel_update_phi_nodes_for_guard1 is always called before | |
300 slpeel_update_phi_nodes_for_guard2. They are both needed in order | |
301 to create correct data-flow and loop-closed-ssa-form. | |
302 | |
303 Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables | |
304 that change between iterations of a loop (and therefore have a phi-node | |
305 at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates | |
306 phis for variables that are used out of the loop (and therefore have | |
307 loop-closed exit phis). Some variables may be both updated between | |
308 iterations and used after the loop. This is why in loop1_exit_bb we | |
309 may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1) | |
310 and exit phis (created by slpeel_update_phi_nodes_for_guard2). | |
311 | |
312 - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of | |
313 an original loop. i.e., we have: | |
314 | |
315 orig_loop | |
316 guard_bb (goto LOOP/new_merge) | |
317 new_loop <-- LOOP | |
318 new_exit | |
319 new_merge | |
320 next_bb | |
321 | |
322 If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we | |
323 have: | |
324 | |
325 new_loop | |
326 guard_bb (goto LOOP/new_merge) | |
327 orig_loop <-- LOOP | |
328 new_exit | |
329 new_merge | |
330 next_bb | |
331 | |
332 The SSA names defined in the original loop have a current | |
333 reaching definition that that records the corresponding new | |
334 ssa-name used in the new duplicated loop copy. | |
335 */ | |
336 | |
337 /* Function slpeel_update_phi_nodes_for_guard1 | |
338 | |
339 Input: | |
340 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above. | |
341 - DEFS - a bitmap of ssa names to mark new names for which we recorded | |
342 information. | |
343 | |
344 In the context of the overall structure, we have: | |
345 | |
346 loop1_preheader_bb: | |
347 guard1 (goto loop1/merge1_bb) | |
348 LOOP-> loop1 | |
349 loop1_exit_bb: | |
350 guard2 (goto merge1_bb/merge2_bb) | |
351 merge1_bb | |
352 loop2 | |
353 loop2_exit_bb | |
354 merge2_bb | |
355 next_bb | |
356 | |
357 For each name updated between loop iterations (i.e - for each name that has | |
358 an entry (loop-header) phi in LOOP) we create a new phi in: | |
359 1. merge1_bb (to account for the edge from guard1) | |
360 2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form) | |
361 */ | |
362 | |
363 static void | |
364 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop, | |
365 bool is_new_loop, basic_block *new_exit_bb, | |
366 bitmap *defs) | |
367 { | |
368 gimple orig_phi, new_phi; | |
369 gimple update_phi, update_phi2; | |
370 tree guard_arg, loop_arg; | |
371 basic_block new_merge_bb = guard_edge->dest; | |
372 edge e = EDGE_SUCC (new_merge_bb, 0); | |
373 basic_block update_bb = e->dest; | |
374 basic_block orig_bb = loop->header; | |
375 edge new_exit_e; | |
376 tree current_new_name; | |
377 gimple_stmt_iterator gsi_orig, gsi_update; | |
378 | |
379 /* Create new bb between loop and new_merge_bb. */ | |
380 *new_exit_bb = split_edge (single_exit (loop)); | |
381 | |
382 new_exit_e = EDGE_SUCC (*new_exit_bb, 0); | |
383 | |
384 for (gsi_orig = gsi_start_phis (orig_bb), | |
385 gsi_update = gsi_start_phis (update_bb); | |
386 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update); | |
387 gsi_next (&gsi_orig), gsi_next (&gsi_update)) | |
388 { | |
389 source_location loop_locus, guard_locus;; | |
390 orig_phi = gsi_stmt (gsi_orig); | |
391 update_phi = gsi_stmt (gsi_update); | |
392 | |
393 /** 1. Handle new-merge-point phis **/ | |
394 | |
395 /* 1.1. Generate new phi node in NEW_MERGE_BB: */ | |
396 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)), | |
397 new_merge_bb); | |
398 | |
399 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge | |
400 of LOOP. Set the two phi args in NEW_PHI for these edges: */ | |
401 loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0)); | |
402 loop_locus = gimple_phi_arg_location_from_edge (orig_phi, | |
403 EDGE_SUCC (loop->latch, | |
404 0)); | |
405 guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop)); | |
406 guard_locus | |
407 = gimple_phi_arg_location_from_edge (orig_phi, | |
408 loop_preheader_edge (loop)); | |
409 | |
410 add_phi_arg (new_phi, loop_arg, new_exit_e, loop_locus); | |
411 add_phi_arg (new_phi, guard_arg, guard_edge, guard_locus); | |
412 | |
413 /* 1.3. Update phi in successor block. */ | |
414 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg | |
415 || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg); | |
416 SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi)); | |
417 update_phi2 = new_phi; | |
418 | |
419 | |
420 /** 2. Handle loop-closed-ssa-form phis **/ | |
421 | |
422 if (!is_gimple_reg (PHI_RESULT (orig_phi))) | |
423 continue; | |
424 | |
425 /* 2.1. Generate new phi node in NEW_EXIT_BB: */ | |
426 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)), | |
427 *new_exit_bb); | |
428 | |
429 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */ | |
430 add_phi_arg (new_phi, loop_arg, single_exit (loop), loop_locus); | |
431 | |
432 /* 2.3. Update phi in successor of NEW_EXIT_BB: */ | |
433 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg); | |
434 SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi)); | |
435 | |
436 /* 2.4. Record the newly created name with set_current_def. | |
437 We want to find a name such that | |
438 name = get_current_def (orig_loop_name) | |
439 and to set its current definition as follows: | |
440 set_current_def (name, new_phi_name) | |
441 | |
442 If LOOP is a new loop then loop_arg is already the name we're | |
443 looking for. If LOOP is the original loop, then loop_arg is | |
444 the orig_loop_name and the relevant name is recorded in its | |
445 current reaching definition. */ | |
446 if (is_new_loop) | |
447 current_new_name = loop_arg; | |
448 else | |
449 { | |
450 current_new_name = get_current_def (loop_arg); | |
451 /* current_def is not available only if the variable does not | |
452 change inside the loop, in which case we also don't care | |
453 about recording a current_def for it because we won't be | |
454 trying to create loop-exit-phis for it. */ | |
455 if (!current_new_name) | |
456 continue; | |
457 } | |
458 gcc_assert (get_current_def (current_new_name) == NULL_TREE); | |
459 | |
460 set_current_def (current_new_name, PHI_RESULT (new_phi)); | |
461 bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name)); | |
462 } | |
463 } | |
464 | |
465 | |
466 /* Function slpeel_update_phi_nodes_for_guard2 | |
467 | |
468 Input: | |
469 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above. | |
470 | |
471 In the context of the overall structure, we have: | |
472 | |
473 loop1_preheader_bb: | |
474 guard1 (goto loop1/merge1_bb) | |
475 loop1 | |
476 loop1_exit_bb: | |
477 guard2 (goto merge1_bb/merge2_bb) | |
478 merge1_bb | |
479 LOOP-> loop2 | |
480 loop2_exit_bb | |
481 merge2_bb | |
482 next_bb | |
483 | |
484 For each name used out side the loop (i.e - for each name that has an exit | |
485 phi in next_bb) we create a new phi in: | |
486 1. merge2_bb (to account for the edge from guard_bb) | |
487 2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form) | |
488 3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form), | |
489 if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1). | |
490 */ | |
491 | |
492 static void | |
493 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop, | |
494 bool is_new_loop, basic_block *new_exit_bb) | |
495 { | |
496 gimple orig_phi, new_phi; | |
497 gimple update_phi, update_phi2; | |
498 tree guard_arg, loop_arg; | |
499 basic_block new_merge_bb = guard_edge->dest; | |
500 edge e = EDGE_SUCC (new_merge_bb, 0); | |
501 basic_block update_bb = e->dest; | |
502 edge new_exit_e; | |
503 tree orig_def, orig_def_new_name; | |
504 tree new_name, new_name2; | |
505 tree arg; | |
506 gimple_stmt_iterator gsi; | |
507 | |
508 /* Create new bb between loop and new_merge_bb. */ | |
509 *new_exit_bb = split_edge (single_exit (loop)); | |
510 | |
511 new_exit_e = EDGE_SUCC (*new_exit_bb, 0); | |
512 | |
513 for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
514 { | |
515 update_phi = gsi_stmt (gsi); | |
516 orig_phi = update_phi; | |
517 orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e); | |
518 /* This loop-closed-phi actually doesn't represent a use | |
519 out of the loop - the phi arg is a constant. */ | |
520 if (TREE_CODE (orig_def) != SSA_NAME) | |
521 continue; | |
522 orig_def_new_name = get_current_def (orig_def); | |
523 arg = NULL_TREE; | |
524 | |
525 /** 1. Handle new-merge-point phis **/ | |
526 | |
527 /* 1.1. Generate new phi node in NEW_MERGE_BB: */ | |
528 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)), | |
529 new_merge_bb); | |
530 | |
531 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge | |
532 of LOOP. Set the two PHI args in NEW_PHI for these edges: */ | |
533 new_name = orig_def; | |
534 new_name2 = NULL_TREE; | |
535 if (orig_def_new_name) | |
536 { | |
537 new_name = orig_def_new_name; | |
538 /* Some variables have both loop-entry-phis and loop-exit-phis. | |
539 Such variables were given yet newer names by phis placed in | |
540 guard_bb by slpeel_update_phi_nodes_for_guard1. I.e: | |
541 new_name2 = get_current_def (get_current_def (orig_name)). */ | |
542 new_name2 = get_current_def (new_name); | |
543 } | |
544 | |
545 if (is_new_loop) | |
546 { | |
547 guard_arg = orig_def; | |
548 loop_arg = new_name; | |
549 } | |
550 else | |
551 { | |
552 guard_arg = new_name; | |
553 loop_arg = orig_def; | |
554 } | |
555 if (new_name2) | |
556 guard_arg = new_name2; | |
557 | |
558 add_phi_arg (new_phi, loop_arg, new_exit_e, UNKNOWN_LOCATION); | |
559 add_phi_arg (new_phi, guard_arg, guard_edge, UNKNOWN_LOCATION); | |
560 | |
561 /* 1.3. Update phi in successor block. */ | |
562 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def); | |
563 SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi)); | |
564 update_phi2 = new_phi; | |
565 | |
566 | |
567 /** 2. Handle loop-closed-ssa-form phis **/ | |
568 | |
569 /* 2.1. Generate new phi node in NEW_EXIT_BB: */ | |
570 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)), | |
571 *new_exit_bb); | |
572 | |
573 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */ | |
574 add_phi_arg (new_phi, loop_arg, single_exit (loop), UNKNOWN_LOCATION); | |
575 | |
576 /* 2.3. Update phi in successor of NEW_EXIT_BB: */ | |
577 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg); | |
578 SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi)); | |
579 | |
580 | |
581 /** 3. Handle loop-closed-ssa-form phis for first loop **/ | |
582 | |
583 /* 3.1. Find the relevant names that need an exit-phi in | |
584 GUARD_BB, i.e. names for which | |
585 slpeel_update_phi_nodes_for_guard1 had not already created a | |
586 phi node. This is the case for names that are used outside | |
587 the loop (and therefore need an exit phi) but are not updated | |
588 across loop iterations (and therefore don't have a | |
589 loop-header-phi). | |
590 | |
591 slpeel_update_phi_nodes_for_guard1 is responsible for | |
592 creating loop-exit phis in GUARD_BB for names that have a | |
593 loop-header-phi. When such a phi is created we also record | |
594 the new name in its current definition. If this new name | |
595 exists, then guard_arg was set to this new name (see 1.2 | |
596 above). Therefore, if guard_arg is not this new name, this | |
597 is an indication that an exit-phi in GUARD_BB was not yet | |
598 created, so we take care of it here. */ | |
599 if (guard_arg == new_name2) | |
600 continue; | |
601 arg = guard_arg; | |
602 | |
603 /* 3.2. Generate new phi node in GUARD_BB: */ | |
604 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)), | |
605 guard_edge->src); | |
606 | |
607 /* 3.3. GUARD_BB has one incoming edge: */ | |
608 gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1); | |
609 add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0), | |
610 UNKNOWN_LOCATION); | |
611 | |
612 /* 3.4. Update phi in successor of GUARD_BB: */ | |
613 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge) | |
614 == guard_arg); | |
615 SET_PHI_ARG_DEF (update_phi2, guard_edge->dest_idx, PHI_RESULT (new_phi)); | |
616 } | |
617 } | |
618 | |
619 | |
620 /* Make the LOOP iterate NITERS times. This is done by adding a new IV | |
621 that starts at zero, increases by one and its limit is NITERS. | |
622 | |
623 Assumption: the exit-condition of LOOP is the last stmt in the loop. */ | |
624 | |
625 void | |
626 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters) | |
627 { | |
628 tree indx_before_incr, indx_after_incr; | |
629 gimple cond_stmt; | |
630 gimple orig_cond; | |
631 edge exit_edge = single_exit (loop); | |
632 gimple_stmt_iterator loop_cond_gsi; | |
633 gimple_stmt_iterator incr_gsi; | |
634 bool insert_after; | |
635 tree init = build_int_cst (TREE_TYPE (niters), 0); | |
636 tree step = build_int_cst (TREE_TYPE (niters), 1); | |
637 LOC loop_loc; | |
638 enum tree_code code; | |
639 | |
640 orig_cond = get_loop_exit_condition (loop); | |
641 gcc_assert (orig_cond); | |
642 loop_cond_gsi = gsi_for_stmt (orig_cond); | |
643 | |
644 standard_iv_increment_position (loop, &incr_gsi, &insert_after); | |
645 create_iv (init, step, NULL_TREE, loop, | |
646 &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr); | |
647 | |
648 indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr, | |
649 true, NULL_TREE, true, | |
650 GSI_SAME_STMT); | |
651 niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE, | |
652 true, GSI_SAME_STMT); | |
653 | |
654 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR; | |
655 cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE, | |
656 NULL_TREE); | |
657 | |
658 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT); | |
659 | |
660 /* Remove old loop exit test: */ | |
661 gsi_remove (&loop_cond_gsi, true); | |
662 | |
663 loop_loc = find_loop_location (loop); | |
664 if (dump_file && (dump_flags & TDF_DETAILS)) | |
665 { | |
666 if (loop_loc != UNKNOWN_LOC) | |
667 fprintf (dump_file, "\nloop at %s:%d: ", | |
668 LOC_FILE (loop_loc), LOC_LINE (loop_loc)); | |
669 print_gimple_stmt (dump_file, cond_stmt, 0, TDF_SLIM); | |
670 } | |
671 | |
672 loop->nb_iterations = niters; | |
673 } | |
674 | |
675 | |
676 /* Given LOOP this function generates a new copy of it and puts it | |
677 on E which is either the entry or exit of LOOP. */ | |
678 | |
679 struct loop * | |
680 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e) | |
681 { | |
682 struct loop *new_loop; | |
683 basic_block *new_bbs, *bbs; | |
684 bool at_exit; | |
685 bool was_imm_dom; | |
686 basic_block exit_dest; | |
687 gimple phi; | |
688 tree phi_arg; | |
689 edge exit, new_exit; | |
690 gimple_stmt_iterator gsi; | |
691 | |
692 at_exit = (e == single_exit (loop)); | |
693 if (!at_exit && e != loop_preheader_edge (loop)) | |
694 return NULL; | |
695 | |
696 bbs = get_loop_body (loop); | |
697 | |
698 /* Check whether duplication is possible. */ | |
699 if (!can_copy_bbs_p (bbs, loop->num_nodes)) | |
700 { | |
701 free (bbs); | |
702 return NULL; | |
703 } | |
704 | |
705 /* Generate new loop structure. */ | |
706 new_loop = duplicate_loop (loop, loop_outer (loop)); | |
707 if (!new_loop) | |
708 { | |
709 free (bbs); | |
710 return NULL; | |
711 } | |
712 | |
713 exit_dest = single_exit (loop)->dest; | |
714 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS, | |
715 exit_dest) == loop->header ? | |
716 true : false); | |
717 | |
718 new_bbs = XNEWVEC (basic_block, loop->num_nodes); | |
719 | |
720 exit = single_exit (loop); | |
721 copy_bbs (bbs, loop->num_nodes, new_bbs, | |
722 &exit, 1, &new_exit, NULL, | |
723 e->src); | |
724 | |
725 /* Duplicating phi args at exit bbs as coming | |
726 also from exit of duplicated loop. */ | |
727 for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi); gsi_next (&gsi)) | |
728 { | |
729 phi = gsi_stmt (gsi); | |
730 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop)); | |
731 if (phi_arg) | |
732 { | |
733 edge new_loop_exit_edge; | |
734 source_location locus; | |
735 | |
736 locus = gimple_phi_arg_location_from_edge (phi, single_exit (loop)); | |
737 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch) | |
738 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1); | |
739 else | |
740 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0); | |
741 | |
742 add_phi_arg (phi, phi_arg, new_loop_exit_edge, locus); | |
743 } | |
744 } | |
745 | |
746 if (at_exit) /* Add the loop copy at exit. */ | |
747 { | |
748 redirect_edge_and_branch_force (e, new_loop->header); | |
749 PENDING_STMT (e) = NULL; | |
750 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src); | |
751 if (was_imm_dom) | |
752 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header); | |
753 } | |
754 else /* Add the copy at entry. */ | |
755 { | |
756 edge new_exit_e; | |
757 edge entry_e = loop_preheader_edge (loop); | |
758 basic_block preheader = entry_e->src; | |
759 | |
760 if (!flow_bb_inside_loop_p (new_loop, | |
761 EDGE_SUCC (new_loop->header, 0)->dest)) | |
762 new_exit_e = EDGE_SUCC (new_loop->header, 0); | |
763 else | |
764 new_exit_e = EDGE_SUCC (new_loop->header, 1); | |
765 | |
766 redirect_edge_and_branch_force (new_exit_e, loop->header); | |
767 PENDING_STMT (new_exit_e) = NULL; | |
768 set_immediate_dominator (CDI_DOMINATORS, loop->header, | |
769 new_exit_e->src); | |
770 | |
771 /* We have to add phi args to the loop->header here as coming | |
772 from new_exit_e edge. */ | |
773 for (gsi = gsi_start_phis (loop->header); | |
774 !gsi_end_p (gsi); | |
775 gsi_next (&gsi)) | |
776 { | |
777 phi = gsi_stmt (gsi); | |
778 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e); | |
779 if (phi_arg) | |
780 add_phi_arg (phi, phi_arg, new_exit_e, | |
781 gimple_phi_arg_location_from_edge (phi, entry_e)); | |
782 } | |
783 | |
784 redirect_edge_and_branch_force (entry_e, new_loop->header); | |
785 PENDING_STMT (entry_e) = NULL; | |
786 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader); | |
787 } | |
788 | |
789 free (new_bbs); | |
790 free (bbs); | |
791 | |
792 return new_loop; | |
793 } | |
794 | |
795 | |
796 /* Given the condition statement COND, put it as the last statement | |
797 of GUARD_BB; EXIT_BB is the basic block to skip the loop; | |
798 Assumes that this is the single exit of the guarded loop. | |
799 Returns the skip edge, inserts new stmts on the COND_EXPR_STMT_LIST. */ | |
800 | |
801 static edge | |
802 slpeel_add_loop_guard (basic_block guard_bb, tree cond, | |
803 gimple_seq cond_expr_stmt_list, | |
804 basic_block exit_bb, basic_block dom_bb) | |
805 { | |
806 gimple_stmt_iterator gsi; | |
807 edge new_e, enter_e; | |
808 gimple cond_stmt; | |
809 gimple_seq gimplify_stmt_list = NULL; | |
810 | |
811 enter_e = EDGE_SUCC (guard_bb, 0); | |
812 enter_e->flags &= ~EDGE_FALLTHRU; | |
813 enter_e->flags |= EDGE_FALSE_VALUE; | |
814 gsi = gsi_last_bb (guard_bb); | |
815 | |
816 cond = force_gimple_operand (cond, &gimplify_stmt_list, true, NULL_TREE); | |
817 if (gimplify_stmt_list) | |
818 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list); | |
819 cond_stmt = gimple_build_cond (NE_EXPR, | |
820 cond, build_int_cst (TREE_TYPE (cond), 0), | |
821 NULL_TREE, NULL_TREE); | |
822 if (cond_expr_stmt_list) | |
823 gsi_insert_seq_after (&gsi, cond_expr_stmt_list, GSI_NEW_STMT); | |
824 | |
825 gsi = gsi_last_bb (guard_bb); | |
826 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT); | |
827 | |
828 /* Add new edge to connect guard block to the merge/loop-exit block. */ | |
829 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE); | |
830 set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb); | |
831 return new_e; | |
832 } | |
833 | |
834 | |
835 /* This function verifies that the following restrictions apply to LOOP: | |
836 (1) it is innermost | |
837 (2) it consists of exactly 2 basic blocks - header, and an empty latch. | |
838 (3) it is single entry, single exit | |
839 (4) its exit condition is the last stmt in the header | |
840 (5) E is the entry/exit edge of LOOP. | |
841 */ | |
842 | |
843 bool | |
844 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e) | |
845 { | |
846 edge exit_e = single_exit (loop); | |
847 edge entry_e = loop_preheader_edge (loop); | |
848 gimple orig_cond = get_loop_exit_condition (loop); | |
849 gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src); | |
850 | |
851 if (need_ssa_update_p (cfun)) | |
852 return false; | |
853 | |
854 if (loop->inner | |
855 /* All loops have an outer scope; the only case loop->outer is NULL is for | |
856 the function itself. */ | |
857 || !loop_outer (loop) | |
858 || loop->num_nodes != 2 | |
859 || !empty_block_p (loop->latch) | |
860 || !single_exit (loop) | |
861 /* Verify that new loop exit condition can be trivially modified. */ | |
862 || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi)) | |
863 || (e != exit_e && e != entry_e)) | |
864 return false; | |
865 | |
866 return true; | |
867 } | |
868 | |
869 #ifdef ENABLE_CHECKING | |
870 static void | |
871 slpeel_verify_cfg_after_peeling (struct loop *first_loop, | |
872 struct loop *second_loop) | |
873 { | |
874 basic_block loop1_exit_bb = single_exit (first_loop)->dest; | |
875 basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src; | |
876 basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src; | |
877 | |
878 /* A guard that controls whether the second_loop is to be executed or skipped | |
879 is placed in first_loop->exit. first_loop->exit therefore has two | |
880 successors - one is the preheader of second_loop, and the other is a bb | |
881 after second_loop. | |
882 */ | |
883 gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2); | |
884 | |
885 /* 1. Verify that one of the successors of first_loop->exit is the preheader | |
886 of second_loop. */ | |
887 | |
888 /* The preheader of new_loop is expected to have two predecessors: | |
889 first_loop->exit and the block that precedes first_loop. */ | |
890 | |
891 gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2 | |
892 && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb | |
893 && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb) | |
894 || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb | |
895 && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb))); | |
896 | |
897 /* Verify that the other successor of first_loop->exit is after the | |
898 second_loop. */ | |
899 /* TODO */ | |
900 } | |
901 #endif | |
902 | |
903 /* If the run time cost model check determines that vectorization is | |
904 not profitable and hence scalar loop should be generated then set | |
905 FIRST_NITERS to prologue peeled iterations. This will allow all the | |
906 iterations to be executed in the prologue peeled scalar loop. */ | |
907 | |
908 static void | |
909 set_prologue_iterations (basic_block bb_before_first_loop, | |
910 tree first_niters, | |
911 struct loop *loop, | |
912 unsigned int th) | |
913 { | |
914 edge e; | |
915 basic_block cond_bb, then_bb; | |
916 tree var, prologue_after_cost_adjust_name; | |
917 gimple_stmt_iterator gsi; | |
918 gimple newphi; | |
919 edge e_true, e_false, e_fallthru; | |
920 gimple cond_stmt; | |
921 gimple_seq gimplify_stmt_list = NULL, stmts = NULL; | |
922 tree cost_pre_condition = NULL_TREE; | |
923 tree scalar_loop_iters = | |
924 unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop))); | |
925 | |
926 e = single_pred_edge (bb_before_first_loop); | |
927 cond_bb = split_edge(e); | |
928 | |
929 e = single_pred_edge (bb_before_first_loop); | |
930 then_bb = split_edge(e); | |
931 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb); | |
932 | |
933 e_false = make_single_succ_edge (cond_bb, bb_before_first_loop, | |
934 EDGE_FALSE_VALUE); | |
935 set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb); | |
936 | |
937 e_true = EDGE_PRED (then_bb, 0); | |
938 e_true->flags &= ~EDGE_FALLTHRU; | |
939 e_true->flags |= EDGE_TRUE_VALUE; | |
940 | |
941 e_fallthru = EDGE_SUCC (then_bb, 0); | |
942 | |
943 cost_pre_condition = | |
944 fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters, | |
945 build_int_cst (TREE_TYPE (scalar_loop_iters), th)); | |
946 cost_pre_condition = | |
947 force_gimple_operand (cost_pre_condition, &gimplify_stmt_list, | |
948 true, NULL_TREE); | |
949 cond_stmt = gimple_build_cond (NE_EXPR, cost_pre_condition, | |
950 build_int_cst (TREE_TYPE (cost_pre_condition), | |
951 0), NULL_TREE, NULL_TREE); | |
952 | |
953 gsi = gsi_last_bb (cond_bb); | |
954 if (gimplify_stmt_list) | |
955 gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT); | |
956 | |
957 gsi = gsi_last_bb (cond_bb); | |
958 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT); | |
959 | |
960 var = create_tmp_var (TREE_TYPE (scalar_loop_iters), | |
961 "prologue_after_cost_adjust"); | |
962 add_referenced_var (var); | |
963 prologue_after_cost_adjust_name = | |
964 force_gimple_operand (scalar_loop_iters, &stmts, false, var); | |
965 | |
966 gsi = gsi_last_bb (then_bb); | |
967 if (stmts) | |
968 gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); | |
969 | |
970 newphi = create_phi_node (var, bb_before_first_loop); | |
971 add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru, | |
972 UNKNOWN_LOCATION); | |
973 add_phi_arg (newphi, first_niters, e_false, UNKNOWN_LOCATION); | |
974 | |
975 first_niters = PHI_RESULT (newphi); | |
976 } | |
977 | |
978 | |
979 /* Function slpeel_tree_peel_loop_to_edge. | |
980 | |
981 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop | |
982 that is placed on the entry (exit) edge E of LOOP. After this transformation | |
983 we have two loops one after the other - first-loop iterates FIRST_NITERS | |
984 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times. | |
985 If the cost model indicates that it is profitable to emit a scalar | |
986 loop instead of the vector one, then the prolog (epilog) loop will iterate | |
987 for the entire unchanged scalar iterations of the loop. | |
988 | |
989 Input: | |
990 - LOOP: the loop to be peeled. | |
991 - E: the exit or entry edge of LOOP. | |
992 If it is the entry edge, we peel the first iterations of LOOP. In this | |
993 case first-loop is LOOP, and second-loop is the newly created loop. | |
994 If it is the exit edge, we peel the last iterations of LOOP. In this | |
995 case, first-loop is the newly created loop, and second-loop is LOOP. | |
996 - NITERS: the number of iterations that LOOP iterates. | |
997 - FIRST_NITERS: the number of iterations that the first-loop should iterate. | |
998 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible | |
999 for updating the loop bound of the first-loop to FIRST_NITERS. If it | |
1000 is false, the caller of this function may want to take care of this | |
1001 (this can be useful if we don't want new stmts added to first-loop). | |
1002 - TH: cost model profitability threshold of iterations for vectorization. | |
1003 - CHECK_PROFITABILITY: specify whether cost model check has not occurred | |
1004 during versioning and hence needs to occur during | |
1005 prologue generation or whether cost model check | |
1006 has not occurred during prologue generation and hence | |
1007 needs to occur during epilogue generation. | |
1008 | |
1009 | |
1010 Output: | |
1011 The function returns a pointer to the new loop-copy, or NULL if it failed | |
1012 to perform the transformation. | |
1013 | |
1014 The function generates two if-then-else guards: one before the first loop, | |
1015 and the other before the second loop: | |
1016 The first guard is: | |
1017 if (FIRST_NITERS == 0) then skip the first loop, | |
1018 and go directly to the second loop. | |
1019 The second guard is: | |
1020 if (FIRST_NITERS == NITERS) then skip the second loop. | |
1021 | |
1022 If the optional COND_EXPR and COND_EXPR_STMT_LIST arguments are given | |
1023 then the generated condition is combined with COND_EXPR and the | |
1024 statements in COND_EXPR_STMT_LIST are emitted together with it. | |
1025 | |
1026 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p). | |
1027 FORNOW the resulting code will not be in loop-closed-ssa form. | |
1028 */ | |
1029 | |
1030 static struct loop* | |
1031 slpeel_tree_peel_loop_to_edge (struct loop *loop, | |
1032 edge e, tree first_niters, | |
1033 tree niters, bool update_first_loop_count, | |
1034 unsigned int th, bool check_profitability, | |
1035 tree cond_expr, gimple_seq cond_expr_stmt_list) | |
1036 { | |
1037 struct loop *new_loop = NULL, *first_loop, *second_loop; | |
1038 edge skip_e; | |
1039 tree pre_condition = NULL_TREE; | |
1040 bitmap definitions; | |
1041 basic_block bb_before_second_loop, bb_after_second_loop; | |
1042 basic_block bb_before_first_loop; | |
1043 basic_block bb_between_loops; | |
1044 basic_block new_exit_bb; | |
1045 edge exit_e = single_exit (loop); | |
1046 LOC loop_loc; | |
1047 tree cost_pre_condition = NULL_TREE; | |
1048 | |
1049 if (!slpeel_can_duplicate_loop_p (loop, e)) | |
1050 return NULL; | |
1051 | |
1052 /* We have to initialize cfg_hooks. Then, when calling | |
1053 cfg_hooks->split_edge, the function tree_split_edge | |
1054 is actually called and, when calling cfg_hooks->duplicate_block, | |
1055 the function tree_duplicate_bb is called. */ | |
1056 gimple_register_cfg_hooks (); | |
1057 | |
1058 | |
1059 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP). | |
1060 Resulting CFG would be: | |
1061 | |
1062 first_loop: | |
1063 do { | |
1064 } while ... | |
1065 | |
1066 second_loop: | |
1067 do { | |
1068 } while ... | |
1069 | |
1070 orig_exit_bb: | |
1071 */ | |
1072 | |
1073 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e))) | |
1074 { | |
1075 loop_loc = find_loop_location (loop); | |
1076 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1077 { | |
1078 if (loop_loc != UNKNOWN_LOC) | |
1079 fprintf (dump_file, "\n%s:%d: note: ", | |
1080 LOC_FILE (loop_loc), LOC_LINE (loop_loc)); | |
1081 fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n"); | |
1082 } | |
1083 return NULL; | |
1084 } | |
1085 | |
1086 if (e == exit_e) | |
1087 { | |
1088 /* NEW_LOOP was placed after LOOP. */ | |
1089 first_loop = loop; | |
1090 second_loop = new_loop; | |
1091 } | |
1092 else | |
1093 { | |
1094 /* NEW_LOOP was placed before LOOP. */ | |
1095 first_loop = new_loop; | |
1096 second_loop = loop; | |
1097 } | |
1098 | |
1099 definitions = ssa_names_to_replace (); | |
1100 slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e); | |
1101 rename_variables_in_loop (new_loop); | |
1102 | |
1103 | |
1104 /* 2. Add the guard code in one of the following ways: | |
1105 | |
1106 2.a Add the guard that controls whether the first loop is executed. | |
1107 This occurs when this function is invoked for prologue or epilogue | |
1108 generation and when the cost model check can be done at compile time. | |
1109 | |
1110 Resulting CFG would be: | |
1111 | |
1112 bb_before_first_loop: | |
1113 if (FIRST_NITERS == 0) GOTO bb_before_second_loop | |
1114 GOTO first-loop | |
1115 | |
1116 first_loop: | |
1117 do { | |
1118 } while ... | |
1119 | |
1120 bb_before_second_loop: | |
1121 | |
1122 second_loop: | |
1123 do { | |
1124 } while ... | |
1125 | |
1126 orig_exit_bb: | |
1127 | |
1128 2.b Add the cost model check that allows the prologue | |
1129 to iterate for the entire unchanged scalar | |
1130 iterations of the loop in the event that the cost | |
1131 model indicates that the scalar loop is more | |
1132 profitable than the vector one. This occurs when | |
1133 this function is invoked for prologue generation | |
1134 and the cost model check needs to be done at run | |
1135 time. | |
1136 | |
1137 Resulting CFG after prologue peeling would be: | |
1138 | |
1139 if (scalar_loop_iterations <= th) | |
1140 FIRST_NITERS = scalar_loop_iterations | |
1141 | |
1142 bb_before_first_loop: | |
1143 if (FIRST_NITERS == 0) GOTO bb_before_second_loop | |
1144 GOTO first-loop | |
1145 | |
1146 first_loop: | |
1147 do { | |
1148 } while ... | |
1149 | |
1150 bb_before_second_loop: | |
1151 | |
1152 second_loop: | |
1153 do { | |
1154 } while ... | |
1155 | |
1156 orig_exit_bb: | |
1157 | |
1158 2.c Add the cost model check that allows the epilogue | |
1159 to iterate for the entire unchanged scalar | |
1160 iterations of the loop in the event that the cost | |
1161 model indicates that the scalar loop is more | |
1162 profitable than the vector one. This occurs when | |
1163 this function is invoked for epilogue generation | |
1164 and the cost model check needs to be done at run | |
1165 time. This check is combined with any pre-existing | |
1166 check in COND_EXPR to avoid versioning. | |
1167 | |
1168 Resulting CFG after prologue peeling would be: | |
1169 | |
1170 bb_before_first_loop: | |
1171 if ((scalar_loop_iterations <= th) | |
1172 || | |
1173 FIRST_NITERS == 0) GOTO bb_before_second_loop | |
1174 GOTO first-loop | |
1175 | |
1176 first_loop: | |
1177 do { | |
1178 } while ... | |
1179 | |
1180 bb_before_second_loop: | |
1181 | |
1182 second_loop: | |
1183 do { | |
1184 } while ... | |
1185 | |
1186 orig_exit_bb: | |
1187 */ | |
1188 | |
1189 bb_before_first_loop = split_edge (loop_preheader_edge (first_loop)); | |
1190 bb_before_second_loop = split_edge (single_exit (first_loop)); | |
1191 | |
1192 /* Epilogue peeling. */ | |
1193 if (!update_first_loop_count) | |
1194 { | |
1195 pre_condition = | |
1196 fold_build2 (LE_EXPR, boolean_type_node, first_niters, | |
1197 build_int_cst (TREE_TYPE (first_niters), 0)); | |
1198 if (check_profitability) | |
1199 { | |
1200 tree scalar_loop_iters | |
1201 = unshare_expr (LOOP_VINFO_NITERS_UNCHANGED | |
1202 (loop_vec_info_for_loop (loop))); | |
1203 cost_pre_condition = | |
1204 fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters, | |
1205 build_int_cst (TREE_TYPE (scalar_loop_iters), th)); | |
1206 | |
1207 pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, | |
1208 cost_pre_condition, pre_condition); | |
1209 } | |
1210 if (cond_expr) | |
1211 { | |
1212 pre_condition = | |
1213 fold_build2 (TRUTH_OR_EXPR, boolean_type_node, | |
1214 pre_condition, | |
1215 fold_build1 (TRUTH_NOT_EXPR, boolean_type_node, | |
1216 cond_expr)); | |
1217 } | |
1218 } | |
1219 | |
1220 /* Prologue peeling. */ | |
1221 else | |
1222 { | |
1223 if (check_profitability) | |
1224 set_prologue_iterations (bb_before_first_loop, first_niters, | |
1225 loop, th); | |
1226 | |
1227 pre_condition = | |
1228 fold_build2 (LE_EXPR, boolean_type_node, first_niters, | |
1229 build_int_cst (TREE_TYPE (first_niters), 0)); | |
1230 } | |
1231 | |
1232 skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition, | |
1233 cond_expr_stmt_list, | |
1234 bb_before_second_loop, bb_before_first_loop); | |
1235 slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop, | |
1236 first_loop == new_loop, | |
1237 &new_exit_bb, &definitions); | |
1238 | |
1239 | |
1240 /* 3. Add the guard that controls whether the second loop is executed. | |
1241 Resulting CFG would be: | |
1242 | |
1243 bb_before_first_loop: | |
1244 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop) | |
1245 GOTO first-loop | |
1246 | |
1247 first_loop: | |
1248 do { | |
1249 } while ... | |
1250 | |
1251 bb_between_loops: | |
1252 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop) | |
1253 GOTO bb_before_second_loop | |
1254 | |
1255 bb_before_second_loop: | |
1256 | |
1257 second_loop: | |
1258 do { | |
1259 } while ... | |
1260 | |
1261 bb_after_second_loop: | |
1262 | |
1263 orig_exit_bb: | |
1264 */ | |
1265 | |
1266 bb_between_loops = new_exit_bb; | |
1267 bb_after_second_loop = split_edge (single_exit (second_loop)); | |
1268 | |
1269 pre_condition = | |
1270 fold_build2 (EQ_EXPR, boolean_type_node, first_niters, niters); | |
1271 skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, NULL, | |
1272 bb_after_second_loop, bb_before_first_loop); | |
1273 slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop, | |
1274 second_loop == new_loop, &new_exit_bb); | |
1275 | |
1276 /* 4. Make first-loop iterate FIRST_NITERS times, if requested. | |
1277 */ | |
1278 if (update_first_loop_count) | |
1279 slpeel_make_loop_iterate_ntimes (first_loop, first_niters); | |
1280 | |
1281 BITMAP_FREE (definitions); | |
1282 delete_update_ssa (); | |
1283 | |
1284 return new_loop; | |
1285 } | |
1286 | |
1287 /* Function vect_get_loop_location. | |
1288 | |
1289 Extract the location of the loop in the source code. | |
1290 If the loop is not well formed for vectorization, an estimated | |
1291 location is calculated. | |
1292 Return the loop location if succeed and NULL if not. */ | |
1293 | |
1294 LOC | |
1295 find_loop_location (struct loop *loop) | |
1296 { | |
1297 gimple stmt = NULL; | |
1298 basic_block bb; | |
1299 gimple_stmt_iterator si; | |
1300 | |
1301 if (!loop) | |
1302 return UNKNOWN_LOC; | |
1303 | |
1304 stmt = get_loop_exit_condition (loop); | |
1305 | |
1306 if (stmt && gimple_location (stmt) != UNKNOWN_LOC) | |
1307 return gimple_location (stmt); | |
1308 | |
1309 /* If we got here the loop is probably not "well formed", | |
1310 try to estimate the loop location */ | |
1311 | |
1312 if (!loop->header) | |
1313 return UNKNOWN_LOC; | |
1314 | |
1315 bb = loop->header; | |
1316 | |
1317 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) | |
1318 { | |
1319 stmt = gsi_stmt (si); | |
1320 if (gimple_location (stmt) != UNKNOWN_LOC) | |
1321 return gimple_location (stmt); | |
1322 } | |
1323 | |
1324 return UNKNOWN_LOC; | |
1325 } | |
1326 | |
1327 | |
1328 /* This function builds ni_name = number of iterations loop executes | |
1329 on the loop preheader. If SEQ is given the stmt is instead emitted | |
1330 there. */ | |
1331 | |
1332 static tree | |
1333 vect_build_loop_niters (loop_vec_info loop_vinfo, gimple_seq seq) | |
1334 { | |
1335 tree ni_name, var; | |
1336 gimple_seq stmts = NULL; | |
1337 edge pe; | |
1338 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
1339 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo)); | |
1340 | |
1341 var = create_tmp_var (TREE_TYPE (ni), "niters"); | |
1342 add_referenced_var (var); | |
1343 ni_name = force_gimple_operand (ni, &stmts, false, var); | |
1344 | |
1345 pe = loop_preheader_edge (loop); | |
1346 if (stmts) | |
1347 { | |
1348 if (seq) | |
1349 gimple_seq_add_seq (&seq, stmts); | |
1350 else | |
1351 { | |
1352 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); | |
1353 gcc_assert (!new_bb); | |
1354 } | |
1355 } | |
1356 | |
1357 return ni_name; | |
1358 } | |
1359 | |
1360 | |
1361 /* This function generates the following statements: | |
1362 | |
1363 ni_name = number of iterations loop executes | |
1364 ratio = ni_name / vf | |
1365 ratio_mult_vf_name = ratio * vf | |
1366 | |
1367 and places them at the loop preheader edge or in COND_EXPR_STMT_LIST | |
1368 if that is non-NULL. */ | |
1369 | |
1370 static void | |
1371 vect_generate_tmps_on_preheader (loop_vec_info loop_vinfo, | |
1372 tree *ni_name_ptr, | |
1373 tree *ratio_mult_vf_name_ptr, | |
1374 tree *ratio_name_ptr, | |
1375 gimple_seq cond_expr_stmt_list) | |
1376 { | |
1377 | |
1378 edge pe; | |
1379 basic_block new_bb; | |
1380 gimple_seq stmts; | |
1381 tree ni_name; | |
1382 tree var; | |
1383 tree ratio_name; | |
1384 tree ratio_mult_vf_name; | |
1385 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
1386 tree ni = LOOP_VINFO_NITERS (loop_vinfo); | |
1387 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); | |
1388 tree log_vf; | |
1389 | |
1390 pe = loop_preheader_edge (loop); | |
1391 | |
1392 /* Generate temporary variable that contains | |
1393 number of iterations loop executes. */ | |
1394 | |
1395 ni_name = vect_build_loop_niters (loop_vinfo, cond_expr_stmt_list); | |
1396 log_vf = build_int_cst (TREE_TYPE (ni), exact_log2 (vf)); | |
1397 | |
1398 /* Create: ratio = ni >> log2(vf) */ | |
1399 | |
1400 ratio_name = fold_build2 (RSHIFT_EXPR, TREE_TYPE (ni_name), ni_name, log_vf); | |
1401 if (!is_gimple_val (ratio_name)) | |
1402 { | |
1403 var = create_tmp_var (TREE_TYPE (ni), "bnd"); | |
1404 add_referenced_var (var); | |
1405 | |
1406 stmts = NULL; | |
1407 ratio_name = force_gimple_operand (ratio_name, &stmts, true, var); | |
1408 if (cond_expr_stmt_list) | |
1409 gimple_seq_add_seq (&cond_expr_stmt_list, stmts); | |
1410 else | |
1411 { | |
1412 pe = loop_preheader_edge (loop); | |
1413 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); | |
1414 gcc_assert (!new_bb); | |
1415 } | |
1416 } | |
1417 | |
1418 /* Create: ratio_mult_vf = ratio << log2 (vf). */ | |
1419 | |
1420 ratio_mult_vf_name = fold_build2 (LSHIFT_EXPR, TREE_TYPE (ratio_name), | |
1421 ratio_name, log_vf); | |
1422 if (!is_gimple_val (ratio_mult_vf_name)) | |
1423 { | |
1424 var = create_tmp_var (TREE_TYPE (ni), "ratio_mult_vf"); | |
1425 add_referenced_var (var); | |
1426 | |
1427 stmts = NULL; | |
1428 ratio_mult_vf_name = force_gimple_operand (ratio_mult_vf_name, &stmts, | |
1429 true, var); | |
1430 if (cond_expr_stmt_list) | |
1431 gimple_seq_add_seq (&cond_expr_stmt_list, stmts); | |
1432 else | |
1433 { | |
1434 pe = loop_preheader_edge (loop); | |
1435 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); | |
1436 gcc_assert (!new_bb); | |
1437 } | |
1438 } | |
1439 | |
1440 *ni_name_ptr = ni_name; | |
1441 *ratio_mult_vf_name_ptr = ratio_mult_vf_name; | |
1442 *ratio_name_ptr = ratio_name; | |
1443 | |
1444 return; | |
1445 } | |
1446 | |
1447 /* Function vect_can_advance_ivs_p | |
1448 | |
1449 In case the number of iterations that LOOP iterates is unknown at compile | |
1450 time, an epilog loop will be generated, and the loop induction variables | |
1451 (IVs) will be "advanced" to the value they are supposed to take just before | |
1452 the epilog loop. Here we check that the access function of the loop IVs | |
1453 and the expression that represents the loop bound are simple enough. | |
1454 These restrictions will be relaxed in the future. */ | |
1455 | |
1456 bool | |
1457 vect_can_advance_ivs_p (loop_vec_info loop_vinfo) | |
1458 { | |
1459 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
1460 basic_block bb = loop->header; | |
1461 gimple phi; | |
1462 gimple_stmt_iterator gsi; | |
1463 | |
1464 /* Analyze phi functions of the loop header. */ | |
1465 | |
1466 if (vect_print_dump_info (REPORT_DETAILS)) | |
1467 fprintf (vect_dump, "vect_can_advance_ivs_p:"); | |
1468 | |
1469 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
1470 { | |
1471 tree access_fn = NULL; | |
1472 tree evolution_part; | |
1473 | |
1474 phi = gsi_stmt (gsi); | |
1475 if (vect_print_dump_info (REPORT_DETAILS)) | |
1476 { | |
1477 fprintf (vect_dump, "Analyze phi: "); | |
1478 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM); | |
1479 } | |
1480 | |
1481 /* Skip virtual phi's. The data dependences that are associated with | |
1482 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */ | |
1483 | |
1484 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi)))) | |
1485 { | |
1486 if (vect_print_dump_info (REPORT_DETAILS)) | |
1487 fprintf (vect_dump, "virtual phi. skip."); | |
1488 continue; | |
1489 } | |
1490 | |
1491 /* Skip reduction phis. */ | |
1492 | |
1493 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def) | |
1494 { | |
1495 if (vect_print_dump_info (REPORT_DETAILS)) | |
1496 fprintf (vect_dump, "reduc phi. skip."); | |
1497 continue; | |
1498 } | |
1499 | |
1500 /* Analyze the evolution function. */ | |
1501 | |
1502 access_fn = instantiate_parameters | |
1503 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi))); | |
1504 | |
1505 if (!access_fn) | |
1506 { | |
1507 if (vect_print_dump_info (REPORT_DETAILS)) | |
1508 fprintf (vect_dump, "No Access function."); | |
1509 return false; | |
1510 } | |
1511 | |
1512 if (vect_print_dump_info (REPORT_DETAILS)) | |
1513 { | |
1514 fprintf (vect_dump, "Access function of PHI: "); | |
1515 print_generic_expr (vect_dump, access_fn, TDF_SLIM); | |
1516 } | |
1517 | |
1518 evolution_part = evolution_part_in_loop_num (access_fn, loop->num); | |
1519 | |
1520 if (evolution_part == NULL_TREE) | |
1521 { | |
1522 if (vect_print_dump_info (REPORT_DETAILS)) | |
1523 fprintf (vect_dump, "No evolution."); | |
1524 return false; | |
1525 } | |
1526 | |
1527 /* FORNOW: We do not transform initial conditions of IVs | |
1528 which evolution functions are a polynomial of degree >= 2. */ | |
1529 | |
1530 if (tree_is_chrec (evolution_part)) | |
1531 return false; | |
1532 } | |
1533 | |
1534 return true; | |
1535 } | |
1536 | |
1537 | |
1538 /* Function vect_update_ivs_after_vectorizer. | |
1539 | |
1540 "Advance" the induction variables of LOOP to the value they should take | |
1541 after the execution of LOOP. This is currently necessary because the | |
1542 vectorizer does not handle induction variables that are used after the | |
1543 loop. Such a situation occurs when the last iterations of LOOP are | |
1544 peeled, because: | |
1545 1. We introduced new uses after LOOP for IVs that were not originally used | |
1546 after LOOP: the IVs of LOOP are now used by an epilog loop. | |
1547 2. LOOP is going to be vectorized; this means that it will iterate N/VF | |
1548 times, whereas the loop IVs should be bumped N times. | |
1549 | |
1550 Input: | |
1551 - LOOP - a loop that is going to be vectorized. The last few iterations | |
1552 of LOOP were peeled. | |
1553 - NITERS - the number of iterations that LOOP executes (before it is | |
1554 vectorized). i.e, the number of times the ivs should be bumped. | |
1555 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path | |
1556 coming out from LOOP on which there are uses of the LOOP ivs | |
1557 (this is the path from LOOP->exit to epilog_loop->preheader). | |
1558 | |
1559 The new definitions of the ivs are placed in LOOP->exit. | |
1560 The phi args associated with the edge UPDATE_E in the bb | |
1561 UPDATE_E->dest are updated accordingly. | |
1562 | |
1563 Assumption 1: Like the rest of the vectorizer, this function assumes | |
1564 a single loop exit that has a single predecessor. | |
1565 | |
1566 Assumption 2: The phi nodes in the LOOP header and in update_bb are | |
1567 organized in the same order. | |
1568 | |
1569 Assumption 3: The access function of the ivs is simple enough (see | |
1570 vect_can_advance_ivs_p). This assumption will be relaxed in the future. | |
1571 | |
1572 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path | |
1573 coming out of LOOP on which the ivs of LOOP are used (this is the path | |
1574 that leads to the epilog loop; other paths skip the epilog loop). This | |
1575 path starts with the edge UPDATE_E, and its destination (denoted update_bb) | |
1576 needs to have its phis updated. | |
1577 */ | |
1578 | |
1579 static void | |
1580 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo, tree niters, | |
1581 edge update_e) | |
1582 { | |
1583 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
1584 basic_block exit_bb = single_exit (loop)->dest; | |
1585 gimple phi, phi1; | |
1586 gimple_stmt_iterator gsi, gsi1; | |
1587 basic_block update_bb = update_e->dest; | |
1588 | |
1589 /* gcc_assert (vect_can_advance_ivs_p (loop_vinfo)); */ | |
1590 | |
1591 /* Make sure there exists a single-predecessor exit bb: */ | |
1592 gcc_assert (single_pred_p (exit_bb)); | |
1593 | |
1594 for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb); | |
1595 !gsi_end_p (gsi) && !gsi_end_p (gsi1); | |
1596 gsi_next (&gsi), gsi_next (&gsi1)) | |
1597 { | |
1598 tree access_fn = NULL; | |
1599 tree evolution_part; | |
1600 tree init_expr; | |
1601 tree step_expr, off; | |
1602 tree type; | |
1603 tree var, ni, ni_name; | |
1604 gimple_stmt_iterator last_gsi; | |
1605 | |
1606 phi = gsi_stmt (gsi); | |
1607 phi1 = gsi_stmt (gsi1); | |
1608 if (vect_print_dump_info (REPORT_DETAILS)) | |
1609 { | |
1610 fprintf (vect_dump, "vect_update_ivs_after_vectorizer: phi: "); | |
1611 print_gimple_stmt (vect_dump, phi, 0, TDF_SLIM); | |
1612 } | |
1613 | |
1614 /* Skip virtual phi's. */ | |
1615 if (!is_gimple_reg (SSA_NAME_VAR (PHI_RESULT (phi)))) | |
1616 { | |
1617 if (vect_print_dump_info (REPORT_DETAILS)) | |
1618 fprintf (vect_dump, "virtual phi. skip."); | |
1619 continue; | |
1620 } | |
1621 | |
1622 /* Skip reduction phis. */ | |
1623 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (phi)) == vect_reduction_def) | |
1624 { | |
1625 if (vect_print_dump_info (REPORT_DETAILS)) | |
1626 fprintf (vect_dump, "reduc phi. skip."); | |
1627 continue; | |
1628 } | |
1629 | |
1630 access_fn = analyze_scalar_evolution (loop, PHI_RESULT (phi)); | |
1631 gcc_assert (access_fn); | |
1632 /* We can end up with an access_fn like | |
1633 (short int) {(short unsigned int) i_49, +, 1}_1 | |
1634 for further analysis we need to strip the outer cast but we | |
1635 need to preserve the original type. */ | |
1636 type = TREE_TYPE (access_fn); | |
1637 STRIP_NOPS (access_fn); | |
1638 evolution_part = | |
1639 unshare_expr (evolution_part_in_loop_num (access_fn, loop->num)); | |
1640 gcc_assert (evolution_part != NULL_TREE); | |
1641 | |
1642 /* FORNOW: We do not support IVs whose evolution function is a polynomial | |
1643 of degree >= 2 or exponential. */ | |
1644 gcc_assert (!tree_is_chrec (evolution_part)); | |
1645 | |
1646 step_expr = evolution_part; | |
1647 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, | |
1648 loop->num)); | |
1649 init_expr = fold_convert (type, init_expr); | |
1650 | |
1651 off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr), | |
1652 fold_convert (TREE_TYPE (step_expr), niters), | |
1653 step_expr); | |
1654 if (POINTER_TYPE_P (TREE_TYPE (init_expr))) | |
1655 ni = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (init_expr), | |
1656 init_expr, | |
1657 fold_convert (sizetype, off)); | |
1658 else | |
1659 ni = fold_build2 (PLUS_EXPR, TREE_TYPE (init_expr), | |
1660 init_expr, | |
1661 fold_convert (TREE_TYPE (init_expr), off)); | |
1662 | |
1663 var = create_tmp_var (TREE_TYPE (init_expr), "tmp"); | |
1664 add_referenced_var (var); | |
1665 | |
1666 last_gsi = gsi_last_bb (exit_bb); | |
1667 ni_name = force_gimple_operand_gsi (&last_gsi, ni, false, var, | |
1668 true, GSI_SAME_STMT); | |
1669 | |
1670 /* Fix phi expressions in the successor bb. */ | |
1671 SET_PHI_ARG_DEF (phi1, update_e->dest_idx, ni_name); | |
1672 } | |
1673 } | |
1674 | |
1675 /* Return the more conservative threshold between the | |
1676 min_profitable_iters returned by the cost model and the user | |
1677 specified threshold, if provided. */ | |
1678 | |
1679 static unsigned int | |
1680 conservative_cost_threshold (loop_vec_info loop_vinfo, | |
1681 int min_profitable_iters) | |
1682 { | |
1683 unsigned int th; | |
1684 int min_scalar_loop_bound; | |
1685 | |
1686 min_scalar_loop_bound = ((PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND) | |
1687 * LOOP_VINFO_VECT_FACTOR (loop_vinfo)) - 1); | |
1688 | |
1689 /* Use the cost model only if it is more conservative than user specified | |
1690 threshold. */ | |
1691 th = (unsigned) min_scalar_loop_bound; | |
1692 if (min_profitable_iters | |
1693 && (!min_scalar_loop_bound | |
1694 || min_profitable_iters > min_scalar_loop_bound)) | |
1695 th = (unsigned) min_profitable_iters; | |
1696 | |
1697 if (th && vect_print_dump_info (REPORT_COST)) | |
1698 fprintf (vect_dump, "Profitability threshold is %u loop iterations.", th); | |
1699 | |
1700 return th; | |
1701 } | |
1702 | |
1703 /* Function vect_do_peeling_for_loop_bound | |
1704 | |
1705 Peel the last iterations of the loop represented by LOOP_VINFO. | |
1706 The peeled iterations form a new epilog loop. Given that the loop now | |
1707 iterates NITERS times, the new epilog loop iterates | |
1708 NITERS % VECTORIZATION_FACTOR times. | |
1709 | |
1710 The original loop will later be made to iterate | |
1711 NITERS / VECTORIZATION_FACTOR times (this value is placed into RATIO). | |
1712 | |
1713 COND_EXPR and COND_EXPR_STMT_LIST are combined with a new generated | |
1714 test. */ | |
1715 | |
1716 void | |
1717 vect_do_peeling_for_loop_bound (loop_vec_info loop_vinfo, tree *ratio, | |
1718 tree cond_expr, gimple_seq cond_expr_stmt_list) | |
1719 { | |
1720 tree ni_name, ratio_mult_vf_name; | |
1721 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
1722 struct loop *new_loop; | |
1723 edge update_e; | |
1724 basic_block preheader; | |
1725 int loop_num; | |
1726 bool check_profitability = false; | |
1727 unsigned int th = 0; | |
1728 int min_profitable_iters; | |
1729 | |
1730 if (vect_print_dump_info (REPORT_DETAILS)) | |
1731 fprintf (vect_dump, "=== vect_do_peeling_for_loop_bound ==="); | |
1732 | |
1733 initialize_original_copy_tables (); | |
1734 | |
1735 /* Generate the following variables on the preheader of original loop: | |
1736 | |
1737 ni_name = number of iteration the original loop executes | |
1738 ratio = ni_name / vf | |
1739 ratio_mult_vf_name = ratio * vf */ | |
1740 vect_generate_tmps_on_preheader (loop_vinfo, &ni_name, | |
1741 &ratio_mult_vf_name, ratio, | |
1742 cond_expr_stmt_list); | |
1743 | |
1744 loop_num = loop->num; | |
1745 | |
1746 /* If cost model check not done during versioning and | |
1747 peeling for alignment. */ | |
1748 if (!LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo) | |
1749 && !LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo) | |
1750 && !LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) | |
1751 && !cond_expr) | |
1752 { | |
1753 check_profitability = true; | |
1754 | |
1755 /* Get profitability threshold for vectorized loop. */ | |
1756 min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo); | |
1757 | |
1758 th = conservative_cost_threshold (loop_vinfo, | |
1759 min_profitable_iters); | |
1760 } | |
1761 | |
1762 new_loop = slpeel_tree_peel_loop_to_edge (loop, single_exit (loop), | |
1763 ratio_mult_vf_name, ni_name, false, | |
1764 th, check_profitability, | |
1765 cond_expr, cond_expr_stmt_list); | |
1766 gcc_assert (new_loop); | |
1767 gcc_assert (loop_num == loop->num); | |
1768 #ifdef ENABLE_CHECKING | |
1769 slpeel_verify_cfg_after_peeling (loop, new_loop); | |
1770 #endif | |
1771 | |
1772 /* A guard that controls whether the new_loop is to be executed or skipped | |
1773 is placed in LOOP->exit. LOOP->exit therefore has two successors - one | |
1774 is the preheader of NEW_LOOP, where the IVs from LOOP are used. The other | |
1775 is a bb after NEW_LOOP, where these IVs are not used. Find the edge that | |
1776 is on the path where the LOOP IVs are used and need to be updated. */ | |
1777 | |
1778 preheader = loop_preheader_edge (new_loop)->src; | |
1779 if (EDGE_PRED (preheader, 0)->src == single_exit (loop)->dest) | |
1780 update_e = EDGE_PRED (preheader, 0); | |
1781 else | |
1782 update_e = EDGE_PRED (preheader, 1); | |
1783 | |
1784 /* Update IVs of original loop as if they were advanced | |
1785 by ratio_mult_vf_name steps. */ | |
1786 vect_update_ivs_after_vectorizer (loop_vinfo, ratio_mult_vf_name, update_e); | |
1787 | |
1788 /* After peeling we have to reset scalar evolution analyzer. */ | |
1789 scev_reset (); | |
1790 | |
1791 free_original_copy_tables (); | |
1792 } | |
1793 | |
1794 | |
1795 /* Function vect_gen_niters_for_prolog_loop | |
1796 | |
1797 Set the number of iterations for the loop represented by LOOP_VINFO | |
1798 to the minimum between LOOP_NITERS (the original iteration count of the loop) | |
1799 and the misalignment of DR - the data reference recorded in | |
1800 LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO). As a result, after the execution of | |
1801 this loop, the data reference DR will refer to an aligned location. | |
1802 | |
1803 The following computation is generated: | |
1804 | |
1805 If the misalignment of DR is known at compile time: | |
1806 addr_mis = int mis = DR_MISALIGNMENT (dr); | |
1807 Else, compute address misalignment in bytes: | |
1808 addr_mis = addr & (vectype_size - 1) | |
1809 | |
1810 prolog_niters = min (LOOP_NITERS, ((VF - addr_mis/elem_size)&(VF-1))/step) | |
1811 | |
1812 (elem_size = element type size; an element is the scalar element whose type | |
1813 is the inner type of the vectype) | |
1814 | |
1815 When the step of the data-ref in the loop is not 1 (as in interleaved data | |
1816 and SLP), the number of iterations of the prolog must be divided by the step | |
1817 (which is equal to the size of interleaved group). | |
1818 | |
1819 The above formulas assume that VF == number of elements in the vector. This | |
1820 may not hold when there are multiple-types in the loop. | |
1821 In this case, for some data-references in the loop the VF does not represent | |
1822 the number of elements that fit in the vector. Therefore, instead of VF we | |
1823 use TYPE_VECTOR_SUBPARTS. */ | |
1824 | |
1825 static tree | |
1826 vect_gen_niters_for_prolog_loop (loop_vec_info loop_vinfo, tree loop_niters) | |
1827 { | |
1828 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo); | |
1829 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
1830 tree var; | |
1831 gimple_seq stmts; | |
1832 tree iters, iters_name; | |
1833 edge pe; | |
1834 basic_block new_bb; | |
1835 gimple dr_stmt = DR_STMT (dr); | |
1836 stmt_vec_info stmt_info = vinfo_for_stmt (dr_stmt); | |
1837 tree vectype = STMT_VINFO_VECTYPE (stmt_info); | |
1838 int vectype_align = TYPE_ALIGN (vectype) / BITS_PER_UNIT; | |
1839 tree niters_type = TREE_TYPE (loop_niters); | |
1840 int step = 1; | |
1841 int element_size = GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (DR_REF (dr)))); | |
1842 int nelements = TYPE_VECTOR_SUBPARTS (vectype); | |
1843 | |
1844 if (STMT_VINFO_STRIDED_ACCESS (stmt_info)) | |
1845 step = DR_GROUP_SIZE (vinfo_for_stmt (DR_GROUP_FIRST_DR (stmt_info))); | |
1846 | |
1847 pe = loop_preheader_edge (loop); | |
1848 | |
1849 if (LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0) | |
1850 { | |
1851 int byte_misalign = LOOP_PEELING_FOR_ALIGNMENT (loop_vinfo); | |
1852 int elem_misalign = byte_misalign / element_size; | |
1853 | |
1854 if (vect_print_dump_info (REPORT_DETAILS)) | |
1855 fprintf (vect_dump, "known alignment = %d.", byte_misalign); | |
1856 | |
1857 iters = build_int_cst (niters_type, | |
1858 (((nelements - elem_misalign) & (nelements - 1)) / step)); | |
1859 } | |
1860 else | |
1861 { | |
1862 gimple_seq new_stmts = NULL; | |
1863 tree start_addr = vect_create_addr_base_for_vector_ref (dr_stmt, | |
1864 &new_stmts, NULL_TREE, loop); | |
1865 tree ptr_type = TREE_TYPE (start_addr); | |
1866 tree size = TYPE_SIZE (ptr_type); | |
1867 tree type = lang_hooks.types.type_for_size (tree_low_cst (size, 1), 1); | |
1868 tree vectype_size_minus_1 = build_int_cst (type, vectype_align - 1); | |
1869 tree elem_size_log = | |
1870 build_int_cst (type, exact_log2 (vectype_align/nelements)); | |
1871 tree nelements_minus_1 = build_int_cst (type, nelements - 1); | |
1872 tree nelements_tree = build_int_cst (type, nelements); | |
1873 tree byte_misalign; | |
1874 tree elem_misalign; | |
1875 | |
1876 new_bb = gsi_insert_seq_on_edge_immediate (pe, new_stmts); | |
1877 gcc_assert (!new_bb); | |
1878 | |
1879 /* Create: byte_misalign = addr & (vectype_size - 1) */ | |
1880 byte_misalign = | |
1881 fold_build2 (BIT_AND_EXPR, type, fold_convert (type, start_addr), vectype_size_minus_1); | |
1882 | |
1883 /* Create: elem_misalign = byte_misalign / element_size */ | |
1884 elem_misalign = | |
1885 fold_build2 (RSHIFT_EXPR, type, byte_misalign, elem_size_log); | |
1886 | |
1887 /* Create: (niters_type) (nelements - elem_misalign)&(nelements - 1) */ | |
1888 iters = fold_build2 (MINUS_EXPR, type, nelements_tree, elem_misalign); | |
1889 iters = fold_build2 (BIT_AND_EXPR, type, iters, nelements_minus_1); | |
1890 iters = fold_convert (niters_type, iters); | |
1891 } | |
1892 | |
1893 /* Create: prolog_loop_niters = min (iters, loop_niters) */ | |
1894 /* If the loop bound is known at compile time we already verified that it is | |
1895 greater than vf; since the misalignment ('iters') is at most vf, there's | |
1896 no need to generate the MIN_EXPR in this case. */ | |
1897 if (TREE_CODE (loop_niters) != INTEGER_CST) | |
1898 iters = fold_build2 (MIN_EXPR, niters_type, iters, loop_niters); | |
1899 | |
1900 if (vect_print_dump_info (REPORT_DETAILS)) | |
1901 { | |
1902 fprintf (vect_dump, "niters for prolog loop: "); | |
1903 print_generic_expr (vect_dump, iters, TDF_SLIM); | |
1904 } | |
1905 | |
1906 var = create_tmp_var (niters_type, "prolog_loop_niters"); | |
1907 add_referenced_var (var); | |
1908 stmts = NULL; | |
1909 iters_name = force_gimple_operand (iters, &stmts, false, var); | |
1910 | |
1911 /* Insert stmt on loop preheader edge. */ | |
1912 if (stmts) | |
1913 { | |
1914 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); | |
1915 gcc_assert (!new_bb); | |
1916 } | |
1917 | |
1918 return iters_name; | |
1919 } | |
1920 | |
1921 | |
1922 /* Function vect_update_init_of_dr | |
1923 | |
1924 NITERS iterations were peeled from LOOP. DR represents a data reference | |
1925 in LOOP. This function updates the information recorded in DR to | |
1926 account for the fact that the first NITERS iterations had already been | |
1927 executed. Specifically, it updates the OFFSET field of DR. */ | |
1928 | |
1929 static void | |
1930 vect_update_init_of_dr (struct data_reference *dr, tree niters) | |
1931 { | |
1932 tree offset = DR_OFFSET (dr); | |
1933 | |
1934 niters = fold_build2 (MULT_EXPR, sizetype, | |
1935 fold_convert (sizetype, niters), | |
1936 fold_convert (sizetype, DR_STEP (dr))); | |
1937 offset = fold_build2 (PLUS_EXPR, sizetype, | |
1938 fold_convert (sizetype, offset), niters); | |
1939 DR_OFFSET (dr) = offset; | |
1940 } | |
1941 | |
1942 | |
1943 /* Function vect_update_inits_of_drs | |
1944 | |
1945 NITERS iterations were peeled from the loop represented by LOOP_VINFO. | |
1946 This function updates the information recorded for the data references in | |
1947 the loop to account for the fact that the first NITERS iterations had | |
1948 already been executed. Specifically, it updates the initial_condition of | |
1949 the access_function of all the data_references in the loop. */ | |
1950 | |
1951 static void | |
1952 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters) | |
1953 { | |
1954 unsigned int i; | |
1955 VEC (data_reference_p, heap) *datarefs = LOOP_VINFO_DATAREFS (loop_vinfo); | |
1956 struct data_reference *dr; | |
1957 | |
1958 if (vect_print_dump_info (REPORT_DETAILS)) | |
1959 fprintf (vect_dump, "=== vect_update_inits_of_dr ==="); | |
1960 | |
1961 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++) | |
1962 vect_update_init_of_dr (dr, niters); | |
1963 } | |
1964 | |
1965 | |
1966 /* Function vect_do_peeling_for_alignment | |
1967 | |
1968 Peel the first 'niters' iterations of the loop represented by LOOP_VINFO. | |
1969 'niters' is set to the misalignment of one of the data references in the | |
1970 loop, thereby forcing it to refer to an aligned location at the beginning | |
1971 of the execution of this loop. The data reference for which we are | |
1972 peeling is recorded in LOOP_VINFO_UNALIGNED_DR. */ | |
1973 | |
1974 void | |
1975 vect_do_peeling_for_alignment (loop_vec_info loop_vinfo) | |
1976 { | |
1977 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
1978 tree niters_of_prolog_loop, ni_name; | |
1979 tree n_iters; | |
1980 struct loop *new_loop; | |
1981 unsigned int th = 0; | |
1982 int min_profitable_iters; | |
1983 | |
1984 if (vect_print_dump_info (REPORT_DETAILS)) | |
1985 fprintf (vect_dump, "=== vect_do_peeling_for_alignment ==="); | |
1986 | |
1987 initialize_original_copy_tables (); | |
1988 | |
1989 ni_name = vect_build_loop_niters (loop_vinfo, NULL); | |
1990 niters_of_prolog_loop = vect_gen_niters_for_prolog_loop (loop_vinfo, ni_name); | |
1991 | |
1992 | |
1993 /* Get profitability threshold for vectorized loop. */ | |
1994 min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo); | |
1995 th = conservative_cost_threshold (loop_vinfo, | |
1996 min_profitable_iters); | |
1997 | |
1998 /* Peel the prolog loop and iterate it niters_of_prolog_loop. */ | |
1999 new_loop = | |
2000 slpeel_tree_peel_loop_to_edge (loop, loop_preheader_edge (loop), | |
2001 niters_of_prolog_loop, ni_name, true, | |
2002 th, true, NULL_TREE, NULL); | |
2003 | |
2004 gcc_assert (new_loop); | |
2005 #ifdef ENABLE_CHECKING | |
2006 slpeel_verify_cfg_after_peeling (new_loop, loop); | |
2007 #endif | |
2008 | |
2009 /* Update number of times loop executes. */ | |
2010 n_iters = LOOP_VINFO_NITERS (loop_vinfo); | |
2011 LOOP_VINFO_NITERS (loop_vinfo) = fold_build2 (MINUS_EXPR, | |
2012 TREE_TYPE (n_iters), n_iters, niters_of_prolog_loop); | |
2013 | |
2014 /* Update the init conditions of the access functions of all data refs. */ | |
2015 vect_update_inits_of_drs (loop_vinfo, niters_of_prolog_loop); | |
2016 | |
2017 /* After peeling we have to reset scalar evolution analyzer. */ | |
2018 scev_reset (); | |
2019 | |
2020 free_original_copy_tables (); | |
2021 } | |
2022 | |
2023 | |
2024 /* Function vect_create_cond_for_align_checks. | |
2025 | |
2026 Create a conditional expression that represents the alignment checks for | |
2027 all of data references (array element references) whose alignment must be | |
2028 checked at runtime. | |
2029 | |
2030 Input: | |
2031 COND_EXPR - input conditional expression. New conditions will be chained | |
2032 with logical AND operation. | |
2033 LOOP_VINFO - two fields of the loop information are used. | |
2034 LOOP_VINFO_PTR_MASK is the mask used to check the alignment. | |
2035 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked. | |
2036 | |
2037 Output: | |
2038 COND_EXPR_STMT_LIST - statements needed to construct the conditional | |
2039 expression. | |
2040 The returned value is the conditional expression to be used in the if | |
2041 statement that controls which version of the loop gets executed at runtime. | |
2042 | |
2043 The algorithm makes two assumptions: | |
2044 1) The number of bytes "n" in a vector is a power of 2. | |
2045 2) An address "a" is aligned if a%n is zero and that this | |
2046 test can be done as a&(n-1) == 0. For example, for 16 | |
2047 byte vectors the test is a&0xf == 0. */ | |
2048 | |
2049 static void | |
2050 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo, | |
2051 tree *cond_expr, | |
2052 gimple_seq *cond_expr_stmt_list) | |
2053 { | |
2054 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
2055 VEC(gimple,heap) *may_misalign_stmts | |
2056 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo); | |
2057 gimple ref_stmt; | |
2058 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo); | |
2059 tree mask_cst; | |
2060 unsigned int i; | |
2061 tree psize; | |
2062 tree int_ptrsize_type; | |
2063 char tmp_name[20]; | |
2064 tree or_tmp_name = NULL_TREE; | |
2065 tree and_tmp, and_tmp_name; | |
2066 gimple and_stmt; | |
2067 tree ptrsize_zero; | |
2068 tree part_cond_expr; | |
2069 | |
2070 /* Check that mask is one less than a power of 2, i.e., mask is | |
2071 all zeros followed by all ones. */ | |
2072 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0)); | |
2073 | |
2074 /* CHECKME: what is the best integer or unsigned type to use to hold a | |
2075 cast from a pointer value? */ | |
2076 psize = TYPE_SIZE (ptr_type_node); | |
2077 int_ptrsize_type | |
2078 = lang_hooks.types.type_for_size (tree_low_cst (psize, 1), 0); | |
2079 | |
2080 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address | |
2081 of the first vector of the i'th data reference. */ | |
2082 | |
2083 for (i = 0; VEC_iterate (gimple, may_misalign_stmts, i, ref_stmt); i++) | |
2084 { | |
2085 gimple_seq new_stmt_list = NULL; | |
2086 tree addr_base; | |
2087 tree addr_tmp, addr_tmp_name; | |
2088 tree or_tmp, new_or_tmp_name; | |
2089 gimple addr_stmt, or_stmt; | |
2090 | |
2091 /* create: addr_tmp = (int)(address_of_first_vector) */ | |
2092 addr_base = | |
2093 vect_create_addr_base_for_vector_ref (ref_stmt, &new_stmt_list, | |
2094 NULL_TREE, loop); | |
2095 if (new_stmt_list != NULL) | |
2096 gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list); | |
2097 | |
2098 sprintf (tmp_name, "%s%d", "addr2int", i); | |
2099 addr_tmp = create_tmp_var (int_ptrsize_type, tmp_name); | |
2100 add_referenced_var (addr_tmp); | |
2101 addr_tmp_name = make_ssa_name (addr_tmp, NULL); | |
2102 addr_stmt = gimple_build_assign_with_ops (NOP_EXPR, addr_tmp_name, | |
2103 addr_base, NULL_TREE); | |
2104 SSA_NAME_DEF_STMT (addr_tmp_name) = addr_stmt; | |
2105 gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt); | |
2106 | |
2107 /* The addresses are OR together. */ | |
2108 | |
2109 if (or_tmp_name != NULL_TREE) | |
2110 { | |
2111 /* create: or_tmp = or_tmp | addr_tmp */ | |
2112 sprintf (tmp_name, "%s%d", "orptrs", i); | |
2113 or_tmp = create_tmp_var (int_ptrsize_type, tmp_name); | |
2114 add_referenced_var (or_tmp); | |
2115 new_or_tmp_name = make_ssa_name (or_tmp, NULL); | |
2116 or_stmt = gimple_build_assign_with_ops (BIT_IOR_EXPR, | |
2117 new_or_tmp_name, | |
2118 or_tmp_name, addr_tmp_name); | |
2119 SSA_NAME_DEF_STMT (new_or_tmp_name) = or_stmt; | |
2120 gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt); | |
2121 or_tmp_name = new_or_tmp_name; | |
2122 } | |
2123 else | |
2124 or_tmp_name = addr_tmp_name; | |
2125 | |
2126 } /* end for i */ | |
2127 | |
2128 mask_cst = build_int_cst (int_ptrsize_type, mask); | |
2129 | |
2130 /* create: and_tmp = or_tmp & mask */ | |
2131 and_tmp = create_tmp_var (int_ptrsize_type, "andmask" ); | |
2132 add_referenced_var (and_tmp); | |
2133 and_tmp_name = make_ssa_name (and_tmp, NULL); | |
2134 | |
2135 and_stmt = gimple_build_assign_with_ops (BIT_AND_EXPR, and_tmp_name, | |
2136 or_tmp_name, mask_cst); | |
2137 SSA_NAME_DEF_STMT (and_tmp_name) = and_stmt; | |
2138 gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt); | |
2139 | |
2140 /* Make and_tmp the left operand of the conditional test against zero. | |
2141 if and_tmp has a nonzero bit then some address is unaligned. */ | |
2142 ptrsize_zero = build_int_cst (int_ptrsize_type, 0); | |
2143 part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node, | |
2144 and_tmp_name, ptrsize_zero); | |
2145 if (*cond_expr) | |
2146 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, | |
2147 *cond_expr, part_cond_expr); | |
2148 else | |
2149 *cond_expr = part_cond_expr; | |
2150 } | |
2151 | |
2152 | |
2153 /* Function vect_vfa_segment_size. | |
2154 | |
2155 Create an expression that computes the size of segment | |
2156 that will be accessed for a data reference. The functions takes into | |
2157 account that realignment loads may access one more vector. | |
2158 | |
2159 Input: | |
2160 DR: The data reference. | |
2161 VECT_FACTOR: vectorization factor. | |
2162 | |
2163 Return an expression whose value is the size of segment which will be | |
2164 accessed by DR. */ | |
2165 | |
2166 static tree | |
2167 vect_vfa_segment_size (struct data_reference *dr, tree vect_factor) | |
2168 { | |
2169 tree segment_length = fold_build2 (MULT_EXPR, integer_type_node, | |
2170 DR_STEP (dr), vect_factor); | |
2171 | |
2172 if (vect_supportable_dr_alignment (dr) == dr_explicit_realign_optimized) | |
2173 { | |
2174 tree vector_size = TYPE_SIZE_UNIT | |
2175 (STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr)))); | |
2176 | |
2177 segment_length = fold_build2 (PLUS_EXPR, integer_type_node, | |
2178 segment_length, vector_size); | |
2179 } | |
2180 return fold_convert (sizetype, segment_length); | |
2181 } | |
2182 | |
2183 | |
2184 /* Function vect_create_cond_for_alias_checks. | |
2185 | |
2186 Create a conditional expression that represents the run-time checks for | |
2187 overlapping of address ranges represented by a list of data references | |
2188 relations passed as input. | |
2189 | |
2190 Input: | |
2191 COND_EXPR - input conditional expression. New conditions will be chained | |
2192 with logical AND operation. | |
2193 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs | |
2194 to be checked. | |
2195 | |
2196 Output: | |
2197 COND_EXPR - conditional expression. | |
2198 COND_EXPR_STMT_LIST - statements needed to construct the conditional | |
2199 expression. | |
2200 | |
2201 | |
2202 The returned value is the conditional expression to be used in the if | |
2203 statement that controls which version of the loop gets executed at runtime. | |
2204 */ | |
2205 | |
2206 static void | |
2207 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, | |
2208 tree * cond_expr, | |
2209 gimple_seq * cond_expr_stmt_list) | |
2210 { | |
2211 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
2212 VEC (ddr_p, heap) * may_alias_ddrs = | |
2213 LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo); | |
2214 tree vect_factor = | |
2215 build_int_cst (integer_type_node, LOOP_VINFO_VECT_FACTOR (loop_vinfo)); | |
2216 | |
2217 ddr_p ddr; | |
2218 unsigned int i; | |
2219 tree part_cond_expr; | |
2220 | |
2221 /* Create expression | |
2222 ((store_ptr_0 + store_segment_length_0) < load_ptr_0) | |
2223 || (load_ptr_0 + load_segment_length_0) < store_ptr_0)) | |
2224 && | |
2225 ... | |
2226 && | |
2227 ((store_ptr_n + store_segment_length_n) < load_ptr_n) | |
2228 || (load_ptr_n + load_segment_length_n) < store_ptr_n)) */ | |
2229 | |
2230 if (VEC_empty (ddr_p, may_alias_ddrs)) | |
2231 return; | |
2232 | |
2233 for (i = 0; VEC_iterate (ddr_p, may_alias_ddrs, i, ddr); i++) | |
2234 { | |
2235 struct data_reference *dr_a, *dr_b; | |
2236 gimple dr_group_first_a, dr_group_first_b; | |
2237 tree addr_base_a, addr_base_b; | |
2238 tree segment_length_a, segment_length_b; | |
2239 gimple stmt_a, stmt_b; | |
2240 | |
2241 dr_a = DDR_A (ddr); | |
2242 stmt_a = DR_STMT (DDR_A (ddr)); | |
2243 dr_group_first_a = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_a)); | |
2244 if (dr_group_first_a) | |
2245 { | |
2246 stmt_a = dr_group_first_a; | |
2247 dr_a = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_a)); | |
2248 } | |
2249 | |
2250 dr_b = DDR_B (ddr); | |
2251 stmt_b = DR_STMT (DDR_B (ddr)); | |
2252 dr_group_first_b = DR_GROUP_FIRST_DR (vinfo_for_stmt (stmt_b)); | |
2253 if (dr_group_first_b) | |
2254 { | |
2255 stmt_b = dr_group_first_b; | |
2256 dr_b = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt_b)); | |
2257 } | |
2258 | |
2259 addr_base_a = | |
2260 vect_create_addr_base_for_vector_ref (stmt_a, cond_expr_stmt_list, | |
2261 NULL_TREE, loop); | |
2262 addr_base_b = | |
2263 vect_create_addr_base_for_vector_ref (stmt_b, cond_expr_stmt_list, | |
2264 NULL_TREE, loop); | |
2265 | |
2266 segment_length_a = vect_vfa_segment_size (dr_a, vect_factor); | |
2267 segment_length_b = vect_vfa_segment_size (dr_b, vect_factor); | |
2268 | |
2269 if (vect_print_dump_info (REPORT_DR_DETAILS)) | |
2270 { | |
2271 fprintf (vect_dump, | |
2272 "create runtime check for data references "); | |
2273 print_generic_expr (vect_dump, DR_REF (dr_a), TDF_SLIM); | |
2274 fprintf (vect_dump, " and "); | |
2275 print_generic_expr (vect_dump, DR_REF (dr_b), TDF_SLIM); | |
2276 } | |
2277 | |
2278 | |
2279 part_cond_expr = | |
2280 fold_build2 (TRUTH_OR_EXPR, boolean_type_node, | |
2281 fold_build2 (LT_EXPR, boolean_type_node, | |
2282 fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_a), | |
2283 addr_base_a, | |
2284 segment_length_a), | |
2285 addr_base_b), | |
2286 fold_build2 (LT_EXPR, boolean_type_node, | |
2287 fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (addr_base_b), | |
2288 addr_base_b, | |
2289 segment_length_b), | |
2290 addr_base_a)); | |
2291 | |
2292 if (*cond_expr) | |
2293 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node, | |
2294 *cond_expr, part_cond_expr); | |
2295 else | |
2296 *cond_expr = part_cond_expr; | |
2297 } | |
2298 | |
2299 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)) | |
2300 fprintf (vect_dump, "created %u versioning for alias checks.\n", | |
2301 VEC_length (ddr_p, may_alias_ddrs)); | |
2302 } | |
2303 | |
2304 | |
2305 /* Function vect_loop_versioning. | |
2306 | |
2307 If the loop has data references that may or may not be aligned or/and | |
2308 has data reference relations whose independence was not proven then | |
2309 two versions of the loop need to be generated, one which is vectorized | |
2310 and one which isn't. A test is then generated to control which of the | |
2311 loops is executed. The test checks for the alignment of all of the | |
2312 data references that may or may not be aligned. An additional | |
2313 sequence of runtime tests is generated for each pairs of DDRs whose | |
2314 independence was not proven. The vectorized version of loop is | |
2315 executed only if both alias and alignment tests are passed. | |
2316 | |
2317 The test generated to check which version of loop is executed | |
2318 is modified to also check for profitability as indicated by the | |
2319 cost model initially. | |
2320 | |
2321 The versioning precondition(s) are placed in *COND_EXPR and | |
2322 *COND_EXPR_STMT_LIST. If DO_VERSIONING is true versioning is | |
2323 also performed, otherwise only the conditions are generated. */ | |
2324 | |
2325 void | |
2326 vect_loop_versioning (loop_vec_info loop_vinfo, bool do_versioning, | |
2327 tree *cond_expr, gimple_seq *cond_expr_stmt_list) | |
2328 { | |
2329 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
2330 basic_block condition_bb; | |
2331 gimple_stmt_iterator gsi, cond_exp_gsi; | |
2332 basic_block merge_bb; | |
2333 basic_block new_exit_bb; | |
2334 edge new_exit_e, e; | |
2335 gimple orig_phi, new_phi; | |
2336 tree arg; | |
2337 unsigned prob = 4 * REG_BR_PROB_BASE / 5; | |
2338 gimple_seq gimplify_stmt_list = NULL; | |
2339 tree scalar_loop_iters = LOOP_VINFO_NITERS (loop_vinfo); | |
2340 int min_profitable_iters = 0; | |
2341 unsigned int th; | |
2342 | |
2343 /* Get profitability threshold for vectorized loop. */ | |
2344 min_profitable_iters = LOOP_VINFO_COST_MODEL_MIN_ITERS (loop_vinfo); | |
2345 | |
2346 th = conservative_cost_threshold (loop_vinfo, | |
2347 min_profitable_iters); | |
2348 | |
2349 *cond_expr = | |
2350 fold_build2 (GT_EXPR, boolean_type_node, scalar_loop_iters, | |
2351 build_int_cst (TREE_TYPE (scalar_loop_iters), th)); | |
2352 | |
2353 *cond_expr = force_gimple_operand (*cond_expr, cond_expr_stmt_list, | |
2354 false, NULL_TREE); | |
2355 | |
2356 if (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo)) | |
2357 vect_create_cond_for_align_checks (loop_vinfo, cond_expr, | |
2358 cond_expr_stmt_list); | |
2359 | |
2360 if (LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo)) | |
2361 vect_create_cond_for_alias_checks (loop_vinfo, cond_expr, | |
2362 cond_expr_stmt_list); | |
2363 | |
2364 *cond_expr = | |
2365 fold_build2 (NE_EXPR, boolean_type_node, *cond_expr, integer_zero_node); | |
2366 *cond_expr = | |
2367 force_gimple_operand (*cond_expr, &gimplify_stmt_list, true, NULL_TREE); | |
2368 gimple_seq_add_seq (cond_expr_stmt_list, gimplify_stmt_list); | |
2369 | |
2370 /* If we only needed the extra conditions and a new loop copy | |
2371 bail out here. */ | |
2372 if (!do_versioning) | |
2373 return; | |
2374 | |
2375 initialize_original_copy_tables (); | |
2376 loop_version (loop, *cond_expr, &condition_bb, | |
2377 prob, prob, REG_BR_PROB_BASE - prob, true); | |
2378 free_original_copy_tables(); | |
2379 | |
2380 /* Loop versioning violates an assumption we try to maintain during | |
2381 vectorization - that the loop exit block has a single predecessor. | |
2382 After versioning, the exit block of both loop versions is the same | |
2383 basic block (i.e. it has two predecessors). Just in order to simplify | |
2384 following transformations in the vectorizer, we fix this situation | |
2385 here by adding a new (empty) block on the exit-edge of the loop, | |
2386 with the proper loop-exit phis to maintain loop-closed-form. */ | |
2387 | |
2388 merge_bb = single_exit (loop)->dest; | |
2389 gcc_assert (EDGE_COUNT (merge_bb->preds) == 2); | |
2390 new_exit_bb = split_edge (single_exit (loop)); | |
2391 new_exit_e = single_exit (loop); | |
2392 e = EDGE_SUCC (new_exit_bb, 0); | |
2393 | |
2394 for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
2395 { | |
2396 orig_phi = gsi_stmt (gsi); | |
2397 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)), | |
2398 new_exit_bb); | |
2399 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e); | |
2400 add_phi_arg (new_phi, arg, new_exit_e, | |
2401 gimple_phi_arg_location_from_edge (orig_phi, e)); | |
2402 SET_PHI_ARG_DEF (orig_phi, e->dest_idx, PHI_RESULT (new_phi)); | |
2403 } | |
2404 | |
2405 /* End loop-exit-fixes after versioning. */ | |
2406 | |
2407 update_ssa (TODO_update_ssa); | |
2408 if (*cond_expr_stmt_list) | |
2409 { | |
2410 cond_exp_gsi = gsi_last_bb (condition_bb); | |
2411 gsi_insert_seq_before (&cond_exp_gsi, *cond_expr_stmt_list, | |
2412 GSI_SAME_STMT); | |
2413 *cond_expr_stmt_list = NULL; | |
2414 } | |
2415 *cond_expr = NULL_TREE; | |
2416 } | |
2417 |