Mercurial > hg > CbC > CbC_gcc
comparison gcc/tree-vectorizer.c @ 0:a06113de4d67
first commit
author | kent <kent@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 17 Jul 2009 14:47:48 +0900 |
parents | |
children | 77e2b8dfacca |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a06113de4d67 |
---|---|
1 /* Loop Vectorization | |
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Free Software | |
3 Foundation, Inc. | |
4 Contributed by Dorit Naishlos <dorit@il.ibm.com> | |
5 | |
6 This file is part of GCC. | |
7 | |
8 GCC is free software; you can redistribute it and/or modify it under | |
9 the terms of the GNU General Public License as published by the Free | |
10 Software Foundation; either version 3, or (at your option) any later | |
11 version. | |
12 | |
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 for more details. | |
17 | |
18 You should have received a copy of the GNU General Public License | |
19 along with GCC; see the file COPYING3. If not see | |
20 <http://www.gnu.org/licenses/>. */ | |
21 | |
22 /* Loop Vectorization Pass. | |
23 | |
24 This pass tries to vectorize loops. This first implementation focuses on | |
25 simple inner-most loops, with no conditional control flow, and a set of | |
26 simple operations which vector form can be expressed using existing | |
27 tree codes (PLUS, MULT etc). | |
28 | |
29 For example, the vectorizer transforms the following simple loop: | |
30 | |
31 short a[N]; short b[N]; short c[N]; int i; | |
32 | |
33 for (i=0; i<N; i++){ | |
34 a[i] = b[i] + c[i]; | |
35 } | |
36 | |
37 as if it was manually vectorized by rewriting the source code into: | |
38 | |
39 typedef int __attribute__((mode(V8HI))) v8hi; | |
40 short a[N]; short b[N]; short c[N]; int i; | |
41 v8hi *pa = (v8hi*)a, *pb = (v8hi*)b, *pc = (v8hi*)c; | |
42 v8hi va, vb, vc; | |
43 | |
44 for (i=0; i<N/8; i++){ | |
45 vb = pb[i]; | |
46 vc = pc[i]; | |
47 va = vb + vc; | |
48 pa[i] = va; | |
49 } | |
50 | |
51 The main entry to this pass is vectorize_loops(), in which | |
52 the vectorizer applies a set of analyses on a given set of loops, | |
53 followed by the actual vectorization transformation for the loops that | |
54 had successfully passed the analysis phase. | |
55 | |
56 Throughout this pass we make a distinction between two types of | |
57 data: scalars (which are represented by SSA_NAMES), and memory references | |
58 ("data-refs"). These two types of data require different handling both | |
59 during analysis and transformation. The types of data-refs that the | |
60 vectorizer currently supports are ARRAY_REFS which base is an array DECL | |
61 (not a pointer), and INDIRECT_REFS through pointers; both array and pointer | |
62 accesses are required to have a simple (consecutive) access pattern. | |
63 | |
64 Analysis phase: | |
65 =============== | |
66 The driver for the analysis phase is vect_analyze_loop_nest(). | |
67 It applies a set of analyses, some of which rely on the scalar evolution | |
68 analyzer (scev) developed by Sebastian Pop. | |
69 | |
70 During the analysis phase the vectorizer records some information | |
71 per stmt in a "stmt_vec_info" struct which is attached to each stmt in the | |
72 loop, as well as general information about the loop as a whole, which is | |
73 recorded in a "loop_vec_info" struct attached to each loop. | |
74 | |
75 Transformation phase: | |
76 ===================== | |
77 The loop transformation phase scans all the stmts in the loop, and | |
78 creates a vector stmt (or a sequence of stmts) for each scalar stmt S in | |
79 the loop that needs to be vectorized. It insert the vector code sequence | |
80 just before the scalar stmt S, and records a pointer to the vector code | |
81 in STMT_VINFO_VEC_STMT (stmt_info) (stmt_info is the stmt_vec_info struct | |
82 attached to S). This pointer will be used for the vectorization of following | |
83 stmts which use the def of stmt S. Stmt S is removed if it writes to memory; | |
84 otherwise, we rely on dead code elimination for removing it. | |
85 | |
86 For example, say stmt S1 was vectorized into stmt VS1: | |
87 | |
88 VS1: vb = px[i]; | |
89 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1 | |
90 S2: a = b; | |
91 | |
92 To vectorize stmt S2, the vectorizer first finds the stmt that defines | |
93 the operand 'b' (S1), and gets the relevant vector def 'vb' from the | |
94 vector stmt VS1 pointed to by STMT_VINFO_VEC_STMT (stmt_info (S1)). The | |
95 resulting sequence would be: | |
96 | |
97 VS1: vb = px[i]; | |
98 S1: b = x[i]; STMT_VINFO_VEC_STMT (stmt_info (S1)) = VS1 | |
99 VS2: va = vb; | |
100 S2: a = b; STMT_VINFO_VEC_STMT (stmt_info (S2)) = VS2 | |
101 | |
102 Operands that are not SSA_NAMEs, are data-refs that appear in | |
103 load/store operations (like 'x[i]' in S1), and are handled differently. | |
104 | |
105 Target modeling: | |
106 ================= | |
107 Currently the only target specific information that is used is the | |
108 size of the vector (in bytes) - "UNITS_PER_SIMD_WORD". Targets that can | |
109 support different sizes of vectors, for now will need to specify one value | |
110 for "UNITS_PER_SIMD_WORD". More flexibility will be added in the future. | |
111 | |
112 Since we only vectorize operations which vector form can be | |
113 expressed using existing tree codes, to verify that an operation is | |
114 supported, the vectorizer checks the relevant optab at the relevant | |
115 machine_mode (e.g, optab_handler (add_optab, V8HImode)->insn_code). If | |
116 the value found is CODE_FOR_nothing, then there's no target support, and | |
117 we can't vectorize the stmt. | |
118 | |
119 For additional information on this project see: | |
120 http://gcc.gnu.org/projects/tree-ssa/vectorization.html | |
121 */ | |
122 | |
123 #include "config.h" | |
124 #include "system.h" | |
125 #include "coretypes.h" | |
126 #include "tm.h" | |
127 #include "ggc.h" | |
128 #include "tree.h" | |
129 #include "target.h" | |
130 #include "rtl.h" | |
131 #include "basic-block.h" | |
132 #include "diagnostic.h" | |
133 #include "tree-flow.h" | |
134 #include "tree-dump.h" | |
135 #include "timevar.h" | |
136 #include "cfgloop.h" | |
137 #include "cfglayout.h" | |
138 #include "expr.h" | |
139 #include "recog.h" | |
140 #include "optabs.h" | |
141 #include "params.h" | |
142 #include "toplev.h" | |
143 #include "tree-chrec.h" | |
144 #include "tree-data-ref.h" | |
145 #include "tree-scalar-evolution.h" | |
146 #include "input.h" | |
147 #include "hashtab.h" | |
148 #include "tree-vectorizer.h" | |
149 #include "tree-pass.h" | |
150 #include "langhooks.h" | |
151 | |
152 /************************************************************************* | |
153 General Vectorization Utilities | |
154 *************************************************************************/ | |
155 | |
156 /* vect_dump will be set to stderr or dump_file if exist. */ | |
157 FILE *vect_dump; | |
158 | |
159 /* vect_verbosity_level set to an invalid value | |
160 to mark that it's uninitialized. */ | |
161 enum verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL; | |
162 | |
163 /* Loop location. */ | |
164 static LOC vect_loop_location; | |
165 | |
166 /* Bitmap of virtual variables to be renamed. */ | |
167 bitmap vect_memsyms_to_rename; | |
168 | |
169 /* Vector mapping GIMPLE stmt to stmt_vec_info. */ | |
170 VEC(vec_void_p,heap) *stmt_vec_info_vec; | |
171 | |
172 | |
173 /************************************************************************* | |
174 Simple Loop Peeling Utilities | |
175 | |
176 Utilities to support loop peeling for vectorization purposes. | |
177 *************************************************************************/ | |
178 | |
179 | |
180 /* Renames the use *OP_P. */ | |
181 | |
182 static void | |
183 rename_use_op (use_operand_p op_p) | |
184 { | |
185 tree new_name; | |
186 | |
187 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME) | |
188 return; | |
189 | |
190 new_name = get_current_def (USE_FROM_PTR (op_p)); | |
191 | |
192 /* Something defined outside of the loop. */ | |
193 if (!new_name) | |
194 return; | |
195 | |
196 /* An ordinary ssa name defined in the loop. */ | |
197 | |
198 SET_USE (op_p, new_name); | |
199 } | |
200 | |
201 | |
202 /* Renames the variables in basic block BB. */ | |
203 | |
204 void | |
205 rename_variables_in_bb (basic_block bb) | |
206 { | |
207 gimple_stmt_iterator gsi; | |
208 gimple stmt; | |
209 use_operand_p use_p; | |
210 ssa_op_iter iter; | |
211 edge e; | |
212 edge_iterator ei; | |
213 struct loop *loop = bb->loop_father; | |
214 | |
215 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
216 { | |
217 stmt = gsi_stmt (gsi); | |
218 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES) | |
219 rename_use_op (use_p); | |
220 } | |
221 | |
222 FOR_EACH_EDGE (e, ei, bb->succs) | |
223 { | |
224 if (!flow_bb_inside_loop_p (loop, e->dest)) | |
225 continue; | |
226 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi)) | |
227 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi_stmt (gsi), e)); | |
228 } | |
229 } | |
230 | |
231 | |
232 /* Renames variables in new generated LOOP. */ | |
233 | |
234 void | |
235 rename_variables_in_loop (struct loop *loop) | |
236 { | |
237 unsigned i; | |
238 basic_block *bbs; | |
239 | |
240 bbs = get_loop_body (loop); | |
241 | |
242 for (i = 0; i < loop->num_nodes; i++) | |
243 rename_variables_in_bb (bbs[i]); | |
244 | |
245 free (bbs); | |
246 } | |
247 | |
248 | |
249 /* Update the PHI nodes of NEW_LOOP. | |
250 | |
251 NEW_LOOP is a duplicate of ORIG_LOOP. | |
252 AFTER indicates whether NEW_LOOP executes before or after ORIG_LOOP: | |
253 AFTER is true if NEW_LOOP executes after ORIG_LOOP, and false if it | |
254 executes before it. */ | |
255 | |
256 static void | |
257 slpeel_update_phis_for_duplicate_loop (struct loop *orig_loop, | |
258 struct loop *new_loop, bool after) | |
259 { | |
260 tree new_ssa_name; | |
261 gimple phi_new, phi_orig; | |
262 tree def; | |
263 edge orig_loop_latch = loop_latch_edge (orig_loop); | |
264 edge orig_entry_e = loop_preheader_edge (orig_loop); | |
265 edge new_loop_exit_e = single_exit (new_loop); | |
266 edge new_loop_entry_e = loop_preheader_edge (new_loop); | |
267 edge entry_arg_e = (after ? orig_loop_latch : orig_entry_e); | |
268 gimple_stmt_iterator gsi_new, gsi_orig; | |
269 | |
270 /* | |
271 step 1. For each loop-header-phi: | |
272 Add the first phi argument for the phi in NEW_LOOP | |
273 (the one associated with the entry of NEW_LOOP) | |
274 | |
275 step 2. For each loop-header-phi: | |
276 Add the second phi argument for the phi in NEW_LOOP | |
277 (the one associated with the latch of NEW_LOOP) | |
278 | |
279 step 3. Update the phis in the successor block of NEW_LOOP. | |
280 | |
281 case 1: NEW_LOOP was placed before ORIG_LOOP: | |
282 The successor block of NEW_LOOP is the header of ORIG_LOOP. | |
283 Updating the phis in the successor block can therefore be done | |
284 along with the scanning of the loop header phis, because the | |
285 header blocks of ORIG_LOOP and NEW_LOOP have exactly the same | |
286 phi nodes, organized in the same order. | |
287 | |
288 case 2: NEW_LOOP was placed after ORIG_LOOP: | |
289 The successor block of NEW_LOOP is the original exit block of | |
290 ORIG_LOOP - the phis to be updated are the loop-closed-ssa phis. | |
291 We postpone updating these phis to a later stage (when | |
292 loop guards are added). | |
293 */ | |
294 | |
295 | |
296 /* Scan the phis in the headers of the old and new loops | |
297 (they are organized in exactly the same order). */ | |
298 | |
299 for (gsi_new = gsi_start_phis (new_loop->header), | |
300 gsi_orig = gsi_start_phis (orig_loop->header); | |
301 !gsi_end_p (gsi_new) && !gsi_end_p (gsi_orig); | |
302 gsi_next (&gsi_new), gsi_next (&gsi_orig)) | |
303 { | |
304 phi_new = gsi_stmt (gsi_new); | |
305 phi_orig = gsi_stmt (gsi_orig); | |
306 | |
307 /* step 1. */ | |
308 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, entry_arg_e); | |
309 add_phi_arg (phi_new, def, new_loop_entry_e); | |
310 | |
311 /* step 2. */ | |
312 def = PHI_ARG_DEF_FROM_EDGE (phi_orig, orig_loop_latch); | |
313 if (TREE_CODE (def) != SSA_NAME) | |
314 continue; | |
315 | |
316 new_ssa_name = get_current_def (def); | |
317 if (!new_ssa_name) | |
318 { | |
319 /* This only happens if there are no definitions | |
320 inside the loop. use the phi_result in this case. */ | |
321 new_ssa_name = PHI_RESULT (phi_new); | |
322 } | |
323 | |
324 /* An ordinary ssa name defined in the loop. */ | |
325 add_phi_arg (phi_new, new_ssa_name, loop_latch_edge (new_loop)); | |
326 | |
327 /* step 3 (case 1). */ | |
328 if (!after) | |
329 { | |
330 gcc_assert (new_loop_exit_e == orig_entry_e); | |
331 SET_PHI_ARG_DEF (phi_orig, | |
332 new_loop_exit_e->dest_idx, | |
333 new_ssa_name); | |
334 } | |
335 } | |
336 } | |
337 | |
338 | |
339 /* Update PHI nodes for a guard of the LOOP. | |
340 | |
341 Input: | |
342 - LOOP, GUARD_EDGE: LOOP is a loop for which we added guard code that | |
343 controls whether LOOP is to be executed. GUARD_EDGE is the edge that | |
344 originates from the guard-bb, skips LOOP and reaches the (unique) exit | |
345 bb of LOOP. This loop-exit-bb is an empty bb with one successor. | |
346 We denote this bb NEW_MERGE_BB because before the guard code was added | |
347 it had a single predecessor (the LOOP header), and now it became a merge | |
348 point of two paths - the path that ends with the LOOP exit-edge, and | |
349 the path that ends with GUARD_EDGE. | |
350 - NEW_EXIT_BB: New basic block that is added by this function between LOOP | |
351 and NEW_MERGE_BB. It is used to place loop-closed-ssa-form exit-phis. | |
352 | |
353 ===> The CFG before the guard-code was added: | |
354 LOOP_header_bb: | |
355 loop_body | |
356 if (exit_loop) goto update_bb | |
357 else goto LOOP_header_bb | |
358 update_bb: | |
359 | |
360 ==> The CFG after the guard-code was added: | |
361 guard_bb: | |
362 if (LOOP_guard_condition) goto new_merge_bb | |
363 else goto LOOP_header_bb | |
364 LOOP_header_bb: | |
365 loop_body | |
366 if (exit_loop_condition) goto new_merge_bb | |
367 else goto LOOP_header_bb | |
368 new_merge_bb: | |
369 goto update_bb | |
370 update_bb: | |
371 | |
372 ==> The CFG after this function: | |
373 guard_bb: | |
374 if (LOOP_guard_condition) goto new_merge_bb | |
375 else goto LOOP_header_bb | |
376 LOOP_header_bb: | |
377 loop_body | |
378 if (exit_loop_condition) goto new_exit_bb | |
379 else goto LOOP_header_bb | |
380 new_exit_bb: | |
381 new_merge_bb: | |
382 goto update_bb | |
383 update_bb: | |
384 | |
385 This function: | |
386 1. creates and updates the relevant phi nodes to account for the new | |
387 incoming edge (GUARD_EDGE) into NEW_MERGE_BB. This involves: | |
388 1.1. Create phi nodes at NEW_MERGE_BB. | |
389 1.2. Update the phi nodes at the successor of NEW_MERGE_BB (denoted | |
390 UPDATE_BB). UPDATE_BB was the exit-bb of LOOP before NEW_MERGE_BB | |
391 2. preserves loop-closed-ssa-form by creating the required phi nodes | |
392 at the exit of LOOP (i.e, in NEW_EXIT_BB). | |
393 | |
394 There are two flavors to this function: | |
395 | |
396 slpeel_update_phi_nodes_for_guard1: | |
397 Here the guard controls whether we enter or skip LOOP, where LOOP is a | |
398 prolog_loop (loop1 below), and the new phis created in NEW_MERGE_BB are | |
399 for variables that have phis in the loop header. | |
400 | |
401 slpeel_update_phi_nodes_for_guard2: | |
402 Here the guard controls whether we enter or skip LOOP, where LOOP is an | |
403 epilog_loop (loop2 below), and the new phis created in NEW_MERGE_BB are | |
404 for variables that have phis in the loop exit. | |
405 | |
406 I.E., the overall structure is: | |
407 | |
408 loop1_preheader_bb: | |
409 guard1 (goto loop1/merge1_bb) | |
410 loop1 | |
411 loop1_exit_bb: | |
412 guard2 (goto merge1_bb/merge2_bb) | |
413 merge1_bb | |
414 loop2 | |
415 loop2_exit_bb | |
416 merge2_bb | |
417 next_bb | |
418 | |
419 slpeel_update_phi_nodes_for_guard1 takes care of creating phis in | |
420 loop1_exit_bb and merge1_bb. These are entry phis (phis for the vars | |
421 that have phis in loop1->header). | |
422 | |
423 slpeel_update_phi_nodes_for_guard2 takes care of creating phis in | |
424 loop2_exit_bb and merge2_bb. These are exit phis (phis for the vars | |
425 that have phis in next_bb). It also adds some of these phis to | |
426 loop1_exit_bb. | |
427 | |
428 slpeel_update_phi_nodes_for_guard1 is always called before | |
429 slpeel_update_phi_nodes_for_guard2. They are both needed in order | |
430 to create correct data-flow and loop-closed-ssa-form. | |
431 | |
432 Generally slpeel_update_phi_nodes_for_guard1 creates phis for variables | |
433 that change between iterations of a loop (and therefore have a phi-node | |
434 at the loop entry), whereas slpeel_update_phi_nodes_for_guard2 creates | |
435 phis for variables that are used out of the loop (and therefore have | |
436 loop-closed exit phis). Some variables may be both updated between | |
437 iterations and used after the loop. This is why in loop1_exit_bb we | |
438 may need both entry_phis (created by slpeel_update_phi_nodes_for_guard1) | |
439 and exit phis (created by slpeel_update_phi_nodes_for_guard2). | |
440 | |
441 - IS_NEW_LOOP: if IS_NEW_LOOP is true, then LOOP is a newly created copy of | |
442 an original loop. i.e., we have: | |
443 | |
444 orig_loop | |
445 guard_bb (goto LOOP/new_merge) | |
446 new_loop <-- LOOP | |
447 new_exit | |
448 new_merge | |
449 next_bb | |
450 | |
451 If IS_NEW_LOOP is false, then LOOP is an original loop, in which case we | |
452 have: | |
453 | |
454 new_loop | |
455 guard_bb (goto LOOP/new_merge) | |
456 orig_loop <-- LOOP | |
457 new_exit | |
458 new_merge | |
459 next_bb | |
460 | |
461 The SSA names defined in the original loop have a current | |
462 reaching definition that that records the corresponding new | |
463 ssa-name used in the new duplicated loop copy. | |
464 */ | |
465 | |
466 /* Function slpeel_update_phi_nodes_for_guard1 | |
467 | |
468 Input: | |
469 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above. | |
470 - DEFS - a bitmap of ssa names to mark new names for which we recorded | |
471 information. | |
472 | |
473 In the context of the overall structure, we have: | |
474 | |
475 loop1_preheader_bb: | |
476 guard1 (goto loop1/merge1_bb) | |
477 LOOP-> loop1 | |
478 loop1_exit_bb: | |
479 guard2 (goto merge1_bb/merge2_bb) | |
480 merge1_bb | |
481 loop2 | |
482 loop2_exit_bb | |
483 merge2_bb | |
484 next_bb | |
485 | |
486 For each name updated between loop iterations (i.e - for each name that has | |
487 an entry (loop-header) phi in LOOP) we create a new phi in: | |
488 1. merge1_bb (to account for the edge from guard1) | |
489 2. loop1_exit_bb (an exit-phi to keep LOOP in loop-closed form) | |
490 */ | |
491 | |
492 static void | |
493 slpeel_update_phi_nodes_for_guard1 (edge guard_edge, struct loop *loop, | |
494 bool is_new_loop, basic_block *new_exit_bb, | |
495 bitmap *defs) | |
496 { | |
497 gimple orig_phi, new_phi; | |
498 gimple update_phi, update_phi2; | |
499 tree guard_arg, loop_arg; | |
500 basic_block new_merge_bb = guard_edge->dest; | |
501 edge e = EDGE_SUCC (new_merge_bb, 0); | |
502 basic_block update_bb = e->dest; | |
503 basic_block orig_bb = loop->header; | |
504 edge new_exit_e; | |
505 tree current_new_name; | |
506 tree name; | |
507 gimple_stmt_iterator gsi_orig, gsi_update; | |
508 | |
509 /* Create new bb between loop and new_merge_bb. */ | |
510 *new_exit_bb = split_edge (single_exit (loop)); | |
511 | |
512 new_exit_e = EDGE_SUCC (*new_exit_bb, 0); | |
513 | |
514 for (gsi_orig = gsi_start_phis (orig_bb), | |
515 gsi_update = gsi_start_phis (update_bb); | |
516 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update); | |
517 gsi_next (&gsi_orig), gsi_next (&gsi_update)) | |
518 { | |
519 orig_phi = gsi_stmt (gsi_orig); | |
520 update_phi = gsi_stmt (gsi_update); | |
521 | |
522 /* Virtual phi; Mark it for renaming. We actually want to call | |
523 mar_sym_for_renaming, but since all ssa renaming datastructures | |
524 are going to be freed before we get to call ssa_update, we just | |
525 record this name for now in a bitmap, and will mark it for | |
526 renaming later. */ | |
527 name = PHI_RESULT (orig_phi); | |
528 if (!is_gimple_reg (SSA_NAME_VAR (name))) | |
529 bitmap_set_bit (vect_memsyms_to_rename, DECL_UID (SSA_NAME_VAR (name))); | |
530 | |
531 /** 1. Handle new-merge-point phis **/ | |
532 | |
533 /* 1.1. Generate new phi node in NEW_MERGE_BB: */ | |
534 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)), | |
535 new_merge_bb); | |
536 | |
537 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge | |
538 of LOOP. Set the two phi args in NEW_PHI for these edges: */ | |
539 loop_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, EDGE_SUCC (loop->latch, 0)); | |
540 guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, loop_preheader_edge (loop)); | |
541 | |
542 add_phi_arg (new_phi, loop_arg, new_exit_e); | |
543 add_phi_arg (new_phi, guard_arg, guard_edge); | |
544 | |
545 /* 1.3. Update phi in successor block. */ | |
546 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == loop_arg | |
547 || PHI_ARG_DEF_FROM_EDGE (update_phi, e) == guard_arg); | |
548 SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi)); | |
549 update_phi2 = new_phi; | |
550 | |
551 | |
552 /** 2. Handle loop-closed-ssa-form phis **/ | |
553 | |
554 if (!is_gimple_reg (PHI_RESULT (orig_phi))) | |
555 continue; | |
556 | |
557 /* 2.1. Generate new phi node in NEW_EXIT_BB: */ | |
558 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)), | |
559 *new_exit_bb); | |
560 | |
561 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */ | |
562 add_phi_arg (new_phi, loop_arg, single_exit (loop)); | |
563 | |
564 /* 2.3. Update phi in successor of NEW_EXIT_BB: */ | |
565 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg); | |
566 SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi)); | |
567 | |
568 /* 2.4. Record the newly created name with set_current_def. | |
569 We want to find a name such that | |
570 name = get_current_def (orig_loop_name) | |
571 and to set its current definition as follows: | |
572 set_current_def (name, new_phi_name) | |
573 | |
574 If LOOP is a new loop then loop_arg is already the name we're | |
575 looking for. If LOOP is the original loop, then loop_arg is | |
576 the orig_loop_name and the relevant name is recorded in its | |
577 current reaching definition. */ | |
578 if (is_new_loop) | |
579 current_new_name = loop_arg; | |
580 else | |
581 { | |
582 current_new_name = get_current_def (loop_arg); | |
583 /* current_def is not available only if the variable does not | |
584 change inside the loop, in which case we also don't care | |
585 about recording a current_def for it because we won't be | |
586 trying to create loop-exit-phis for it. */ | |
587 if (!current_new_name) | |
588 continue; | |
589 } | |
590 gcc_assert (get_current_def (current_new_name) == NULL_TREE); | |
591 | |
592 set_current_def (current_new_name, PHI_RESULT (new_phi)); | |
593 bitmap_set_bit (*defs, SSA_NAME_VERSION (current_new_name)); | |
594 } | |
595 } | |
596 | |
597 | |
598 /* Function slpeel_update_phi_nodes_for_guard2 | |
599 | |
600 Input: | |
601 - GUARD_EDGE, LOOP, IS_NEW_LOOP, NEW_EXIT_BB - as explained above. | |
602 | |
603 In the context of the overall structure, we have: | |
604 | |
605 loop1_preheader_bb: | |
606 guard1 (goto loop1/merge1_bb) | |
607 loop1 | |
608 loop1_exit_bb: | |
609 guard2 (goto merge1_bb/merge2_bb) | |
610 merge1_bb | |
611 LOOP-> loop2 | |
612 loop2_exit_bb | |
613 merge2_bb | |
614 next_bb | |
615 | |
616 For each name used out side the loop (i.e - for each name that has an exit | |
617 phi in next_bb) we create a new phi in: | |
618 1. merge2_bb (to account for the edge from guard_bb) | |
619 2. loop2_exit_bb (an exit-phi to keep LOOP in loop-closed form) | |
620 3. guard2 bb (an exit phi to keep the preceding loop in loop-closed form), | |
621 if needed (if it wasn't handled by slpeel_update_phis_nodes_for_phi1). | |
622 */ | |
623 | |
624 static void | |
625 slpeel_update_phi_nodes_for_guard2 (edge guard_edge, struct loop *loop, | |
626 bool is_new_loop, basic_block *new_exit_bb) | |
627 { | |
628 gimple orig_phi, new_phi; | |
629 gimple update_phi, update_phi2; | |
630 tree guard_arg, loop_arg; | |
631 basic_block new_merge_bb = guard_edge->dest; | |
632 edge e = EDGE_SUCC (new_merge_bb, 0); | |
633 basic_block update_bb = e->dest; | |
634 edge new_exit_e; | |
635 tree orig_def, orig_def_new_name; | |
636 tree new_name, new_name2; | |
637 tree arg; | |
638 gimple_stmt_iterator gsi; | |
639 | |
640 /* Create new bb between loop and new_merge_bb. */ | |
641 *new_exit_bb = split_edge (single_exit (loop)); | |
642 | |
643 new_exit_e = EDGE_SUCC (*new_exit_bb, 0); | |
644 | |
645 for (gsi = gsi_start_phis (update_bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
646 { | |
647 update_phi = gsi_stmt (gsi); | |
648 orig_phi = update_phi; | |
649 orig_def = PHI_ARG_DEF_FROM_EDGE (orig_phi, e); | |
650 /* This loop-closed-phi actually doesn't represent a use | |
651 out of the loop - the phi arg is a constant. */ | |
652 if (TREE_CODE (orig_def) != SSA_NAME) | |
653 continue; | |
654 orig_def_new_name = get_current_def (orig_def); | |
655 arg = NULL_TREE; | |
656 | |
657 /** 1. Handle new-merge-point phis **/ | |
658 | |
659 /* 1.1. Generate new phi node in NEW_MERGE_BB: */ | |
660 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)), | |
661 new_merge_bb); | |
662 | |
663 /* 1.2. NEW_MERGE_BB has two incoming edges: GUARD_EDGE and the exit-edge | |
664 of LOOP. Set the two PHI args in NEW_PHI for these edges: */ | |
665 new_name = orig_def; | |
666 new_name2 = NULL_TREE; | |
667 if (orig_def_new_name) | |
668 { | |
669 new_name = orig_def_new_name; | |
670 /* Some variables have both loop-entry-phis and loop-exit-phis. | |
671 Such variables were given yet newer names by phis placed in | |
672 guard_bb by slpeel_update_phi_nodes_for_guard1. I.e: | |
673 new_name2 = get_current_def (get_current_def (orig_name)). */ | |
674 new_name2 = get_current_def (new_name); | |
675 } | |
676 | |
677 if (is_new_loop) | |
678 { | |
679 guard_arg = orig_def; | |
680 loop_arg = new_name; | |
681 } | |
682 else | |
683 { | |
684 guard_arg = new_name; | |
685 loop_arg = orig_def; | |
686 } | |
687 if (new_name2) | |
688 guard_arg = new_name2; | |
689 | |
690 add_phi_arg (new_phi, loop_arg, new_exit_e); | |
691 add_phi_arg (new_phi, guard_arg, guard_edge); | |
692 | |
693 /* 1.3. Update phi in successor block. */ | |
694 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi, e) == orig_def); | |
695 SET_PHI_ARG_DEF (update_phi, e->dest_idx, PHI_RESULT (new_phi)); | |
696 update_phi2 = new_phi; | |
697 | |
698 | |
699 /** 2. Handle loop-closed-ssa-form phis **/ | |
700 | |
701 /* 2.1. Generate new phi node in NEW_EXIT_BB: */ | |
702 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)), | |
703 *new_exit_bb); | |
704 | |
705 /* 2.2. NEW_EXIT_BB has one incoming edge: the exit-edge of the loop. */ | |
706 add_phi_arg (new_phi, loop_arg, single_exit (loop)); | |
707 | |
708 /* 2.3. Update phi in successor of NEW_EXIT_BB: */ | |
709 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, new_exit_e) == loop_arg); | |
710 SET_PHI_ARG_DEF (update_phi2, new_exit_e->dest_idx, PHI_RESULT (new_phi)); | |
711 | |
712 | |
713 /** 3. Handle loop-closed-ssa-form phis for first loop **/ | |
714 | |
715 /* 3.1. Find the relevant names that need an exit-phi in | |
716 GUARD_BB, i.e. names for which | |
717 slpeel_update_phi_nodes_for_guard1 had not already created a | |
718 phi node. This is the case for names that are used outside | |
719 the loop (and therefore need an exit phi) but are not updated | |
720 across loop iterations (and therefore don't have a | |
721 loop-header-phi). | |
722 | |
723 slpeel_update_phi_nodes_for_guard1 is responsible for | |
724 creating loop-exit phis in GUARD_BB for names that have a | |
725 loop-header-phi. When such a phi is created we also record | |
726 the new name in its current definition. If this new name | |
727 exists, then guard_arg was set to this new name (see 1.2 | |
728 above). Therefore, if guard_arg is not this new name, this | |
729 is an indication that an exit-phi in GUARD_BB was not yet | |
730 created, so we take care of it here. */ | |
731 if (guard_arg == new_name2) | |
732 continue; | |
733 arg = guard_arg; | |
734 | |
735 /* 3.2. Generate new phi node in GUARD_BB: */ | |
736 new_phi = create_phi_node (SSA_NAME_VAR (PHI_RESULT (orig_phi)), | |
737 guard_edge->src); | |
738 | |
739 /* 3.3. GUARD_BB has one incoming edge: */ | |
740 gcc_assert (EDGE_COUNT (guard_edge->src->preds) == 1); | |
741 add_phi_arg (new_phi, arg, EDGE_PRED (guard_edge->src, 0)); | |
742 | |
743 /* 3.4. Update phi in successor of GUARD_BB: */ | |
744 gcc_assert (PHI_ARG_DEF_FROM_EDGE (update_phi2, guard_edge) | |
745 == guard_arg); | |
746 SET_PHI_ARG_DEF (update_phi2, guard_edge->dest_idx, PHI_RESULT (new_phi)); | |
747 } | |
748 } | |
749 | |
750 | |
751 /* Make the LOOP iterate NITERS times. This is done by adding a new IV | |
752 that starts at zero, increases by one and its limit is NITERS. | |
753 | |
754 Assumption: the exit-condition of LOOP is the last stmt in the loop. */ | |
755 | |
756 void | |
757 slpeel_make_loop_iterate_ntimes (struct loop *loop, tree niters) | |
758 { | |
759 tree indx_before_incr, indx_after_incr; | |
760 gimple cond_stmt; | |
761 gimple orig_cond; | |
762 edge exit_edge = single_exit (loop); | |
763 gimple_stmt_iterator loop_cond_gsi; | |
764 gimple_stmt_iterator incr_gsi; | |
765 bool insert_after; | |
766 tree init = build_int_cst (TREE_TYPE (niters), 0); | |
767 tree step = build_int_cst (TREE_TYPE (niters), 1); | |
768 LOC loop_loc; | |
769 enum tree_code code; | |
770 | |
771 orig_cond = get_loop_exit_condition (loop); | |
772 gcc_assert (orig_cond); | |
773 loop_cond_gsi = gsi_for_stmt (orig_cond); | |
774 | |
775 standard_iv_increment_position (loop, &incr_gsi, &insert_after); | |
776 create_iv (init, step, NULL_TREE, loop, | |
777 &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr); | |
778 | |
779 indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr, | |
780 true, NULL_TREE, true, | |
781 GSI_SAME_STMT); | |
782 niters = force_gimple_operand_gsi (&loop_cond_gsi, niters, true, NULL_TREE, | |
783 true, GSI_SAME_STMT); | |
784 | |
785 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR; | |
786 cond_stmt = gimple_build_cond (code, indx_after_incr, niters, NULL_TREE, | |
787 NULL_TREE); | |
788 | |
789 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT); | |
790 | |
791 /* Remove old loop exit test: */ | |
792 gsi_remove (&loop_cond_gsi, true); | |
793 | |
794 loop_loc = find_loop_location (loop); | |
795 if (dump_file && (dump_flags & TDF_DETAILS)) | |
796 { | |
797 if (loop_loc != UNKNOWN_LOC) | |
798 fprintf (dump_file, "\nloop at %s:%d: ", | |
799 LOC_FILE (loop_loc), LOC_LINE (loop_loc)); | |
800 print_gimple_stmt (dump_file, cond_stmt, 0, TDF_SLIM); | |
801 } | |
802 | |
803 loop->nb_iterations = niters; | |
804 } | |
805 | |
806 | |
807 /* Given LOOP this function generates a new copy of it and puts it | |
808 on E which is either the entry or exit of LOOP. */ | |
809 | |
810 struct loop * | |
811 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop, edge e) | |
812 { | |
813 struct loop *new_loop; | |
814 basic_block *new_bbs, *bbs; | |
815 bool at_exit; | |
816 bool was_imm_dom; | |
817 basic_block exit_dest; | |
818 gimple phi; | |
819 tree phi_arg; | |
820 edge exit, new_exit; | |
821 gimple_stmt_iterator gsi; | |
822 | |
823 at_exit = (e == single_exit (loop)); | |
824 if (!at_exit && e != loop_preheader_edge (loop)) | |
825 return NULL; | |
826 | |
827 bbs = get_loop_body (loop); | |
828 | |
829 /* Check whether duplication is possible. */ | |
830 if (!can_copy_bbs_p (bbs, loop->num_nodes)) | |
831 { | |
832 free (bbs); | |
833 return NULL; | |
834 } | |
835 | |
836 /* Generate new loop structure. */ | |
837 new_loop = duplicate_loop (loop, loop_outer (loop)); | |
838 if (!new_loop) | |
839 { | |
840 free (bbs); | |
841 return NULL; | |
842 } | |
843 | |
844 exit_dest = single_exit (loop)->dest; | |
845 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS, | |
846 exit_dest) == loop->header ? | |
847 true : false); | |
848 | |
849 new_bbs = XNEWVEC (basic_block, loop->num_nodes); | |
850 | |
851 exit = single_exit (loop); | |
852 copy_bbs (bbs, loop->num_nodes, new_bbs, | |
853 &exit, 1, &new_exit, NULL, | |
854 e->src); | |
855 | |
856 /* Duplicating phi args at exit bbs as coming | |
857 also from exit of duplicated loop. */ | |
858 for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi); gsi_next (&gsi)) | |
859 { | |
860 phi = gsi_stmt (gsi); | |
861 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, single_exit (loop)); | |
862 if (phi_arg) | |
863 { | |
864 edge new_loop_exit_edge; | |
865 | |
866 if (EDGE_SUCC (new_loop->header, 0)->dest == new_loop->latch) | |
867 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 1); | |
868 else | |
869 new_loop_exit_edge = EDGE_SUCC (new_loop->header, 0); | |
870 | |
871 add_phi_arg (phi, phi_arg, new_loop_exit_edge); | |
872 } | |
873 } | |
874 | |
875 if (at_exit) /* Add the loop copy at exit. */ | |
876 { | |
877 redirect_edge_and_branch_force (e, new_loop->header); | |
878 PENDING_STMT (e) = NULL; | |
879 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, e->src); | |
880 if (was_imm_dom) | |
881 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_loop->header); | |
882 } | |
883 else /* Add the copy at entry. */ | |
884 { | |
885 edge new_exit_e; | |
886 edge entry_e = loop_preheader_edge (loop); | |
887 basic_block preheader = entry_e->src; | |
888 | |
889 if (!flow_bb_inside_loop_p (new_loop, | |
890 EDGE_SUCC (new_loop->header, 0)->dest)) | |
891 new_exit_e = EDGE_SUCC (new_loop->header, 0); | |
892 else | |
893 new_exit_e = EDGE_SUCC (new_loop->header, 1); | |
894 | |
895 redirect_edge_and_branch_force (new_exit_e, loop->header); | |
896 PENDING_STMT (new_exit_e) = NULL; | |
897 set_immediate_dominator (CDI_DOMINATORS, loop->header, | |
898 new_exit_e->src); | |
899 | |
900 /* We have to add phi args to the loop->header here as coming | |
901 from new_exit_e edge. */ | |
902 for (gsi = gsi_start_phis (loop->header); | |
903 !gsi_end_p (gsi); | |
904 gsi_next (&gsi)) | |
905 { | |
906 phi = gsi_stmt (gsi); | |
907 phi_arg = PHI_ARG_DEF_FROM_EDGE (phi, entry_e); | |
908 if (phi_arg) | |
909 add_phi_arg (phi, phi_arg, new_exit_e); | |
910 } | |
911 | |
912 redirect_edge_and_branch_force (entry_e, new_loop->header); | |
913 PENDING_STMT (entry_e) = NULL; | |
914 set_immediate_dominator (CDI_DOMINATORS, new_loop->header, preheader); | |
915 } | |
916 | |
917 free (new_bbs); | |
918 free (bbs); | |
919 | |
920 return new_loop; | |
921 } | |
922 | |
923 | |
924 /* Given the condition statement COND, put it as the last statement | |
925 of GUARD_BB; EXIT_BB is the basic block to skip the loop; | |
926 Assumes that this is the single exit of the guarded loop. | |
927 Returns the skip edge. */ | |
928 | |
929 static edge | |
930 slpeel_add_loop_guard (basic_block guard_bb, tree cond, basic_block exit_bb, | |
931 basic_block dom_bb) | |
932 { | |
933 gimple_stmt_iterator gsi; | |
934 edge new_e, enter_e; | |
935 gimple cond_stmt; | |
936 gimple_seq gimplify_stmt_list = NULL; | |
937 | |
938 enter_e = EDGE_SUCC (guard_bb, 0); | |
939 enter_e->flags &= ~EDGE_FALLTHRU; | |
940 enter_e->flags |= EDGE_FALSE_VALUE; | |
941 gsi = gsi_last_bb (guard_bb); | |
942 | |
943 cond = force_gimple_operand (cond, &gimplify_stmt_list, true, NULL_TREE); | |
944 cond_stmt = gimple_build_cond (NE_EXPR, | |
945 cond, build_int_cst (TREE_TYPE (cond), 0), | |
946 NULL_TREE, NULL_TREE); | |
947 if (gimplify_stmt_list) | |
948 gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT); | |
949 | |
950 gsi = gsi_last_bb (guard_bb); | |
951 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT); | |
952 | |
953 /* Add new edge to connect guard block to the merge/loop-exit block. */ | |
954 new_e = make_edge (guard_bb, exit_bb, EDGE_TRUE_VALUE); | |
955 set_immediate_dominator (CDI_DOMINATORS, exit_bb, dom_bb); | |
956 return new_e; | |
957 } | |
958 | |
959 | |
960 /* This function verifies that the following restrictions apply to LOOP: | |
961 (1) it is innermost | |
962 (2) it consists of exactly 2 basic blocks - header, and an empty latch. | |
963 (3) it is single entry, single exit | |
964 (4) its exit condition is the last stmt in the header | |
965 (5) E is the entry/exit edge of LOOP. | |
966 */ | |
967 | |
968 bool | |
969 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e) | |
970 { | |
971 edge exit_e = single_exit (loop); | |
972 edge entry_e = loop_preheader_edge (loop); | |
973 gimple orig_cond = get_loop_exit_condition (loop); | |
974 gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src); | |
975 | |
976 if (need_ssa_update_p ()) | |
977 return false; | |
978 | |
979 if (loop->inner | |
980 /* All loops have an outer scope; the only case loop->outer is NULL is for | |
981 the function itself. */ | |
982 || !loop_outer (loop) | |
983 || loop->num_nodes != 2 | |
984 || !empty_block_p (loop->latch) | |
985 || !single_exit (loop) | |
986 /* Verify that new loop exit condition can be trivially modified. */ | |
987 || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi)) | |
988 || (e != exit_e && e != entry_e)) | |
989 return false; | |
990 | |
991 return true; | |
992 } | |
993 | |
994 #ifdef ENABLE_CHECKING | |
995 void | |
996 slpeel_verify_cfg_after_peeling (struct loop *first_loop, | |
997 struct loop *second_loop) | |
998 { | |
999 basic_block loop1_exit_bb = single_exit (first_loop)->dest; | |
1000 basic_block loop2_entry_bb = loop_preheader_edge (second_loop)->src; | |
1001 basic_block loop1_entry_bb = loop_preheader_edge (first_loop)->src; | |
1002 | |
1003 /* A guard that controls whether the second_loop is to be executed or skipped | |
1004 is placed in first_loop->exit. first_loop->exit therefore has two | |
1005 successors - one is the preheader of second_loop, and the other is a bb | |
1006 after second_loop. | |
1007 */ | |
1008 gcc_assert (EDGE_COUNT (loop1_exit_bb->succs) == 2); | |
1009 | |
1010 /* 1. Verify that one of the successors of first_loop->exit is the preheader | |
1011 of second_loop. */ | |
1012 | |
1013 /* The preheader of new_loop is expected to have two predecessors: | |
1014 first_loop->exit and the block that precedes first_loop. */ | |
1015 | |
1016 gcc_assert (EDGE_COUNT (loop2_entry_bb->preds) == 2 | |
1017 && ((EDGE_PRED (loop2_entry_bb, 0)->src == loop1_exit_bb | |
1018 && EDGE_PRED (loop2_entry_bb, 1)->src == loop1_entry_bb) | |
1019 || (EDGE_PRED (loop2_entry_bb, 1)->src == loop1_exit_bb | |
1020 && EDGE_PRED (loop2_entry_bb, 0)->src == loop1_entry_bb))); | |
1021 | |
1022 /* Verify that the other successor of first_loop->exit is after the | |
1023 second_loop. */ | |
1024 /* TODO */ | |
1025 } | |
1026 #endif | |
1027 | |
1028 /* If the run time cost model check determines that vectorization is | |
1029 not profitable and hence scalar loop should be generated then set | |
1030 FIRST_NITERS to prologue peeled iterations. This will allow all the | |
1031 iterations to be executed in the prologue peeled scalar loop. */ | |
1032 | |
1033 void | |
1034 set_prologue_iterations (basic_block bb_before_first_loop, | |
1035 tree first_niters, | |
1036 struct loop *loop, | |
1037 unsigned int th) | |
1038 { | |
1039 edge e; | |
1040 basic_block cond_bb, then_bb; | |
1041 tree var, prologue_after_cost_adjust_name; | |
1042 gimple_stmt_iterator gsi; | |
1043 gimple newphi; | |
1044 edge e_true, e_false, e_fallthru; | |
1045 gimple cond_stmt; | |
1046 gimple_seq gimplify_stmt_list = NULL, stmts = NULL; | |
1047 tree cost_pre_condition = NULL_TREE; | |
1048 tree scalar_loop_iters = | |
1049 unshare_expr (LOOP_VINFO_NITERS_UNCHANGED (loop_vec_info_for_loop (loop))); | |
1050 | |
1051 e = single_pred_edge (bb_before_first_loop); | |
1052 cond_bb = split_edge(e); | |
1053 | |
1054 e = single_pred_edge (bb_before_first_loop); | |
1055 then_bb = split_edge(e); | |
1056 set_immediate_dominator (CDI_DOMINATORS, then_bb, cond_bb); | |
1057 | |
1058 e_false = make_single_succ_edge (cond_bb, bb_before_first_loop, | |
1059 EDGE_FALSE_VALUE); | |
1060 set_immediate_dominator (CDI_DOMINATORS, bb_before_first_loop, cond_bb); | |
1061 | |
1062 e_true = EDGE_PRED (then_bb, 0); | |
1063 e_true->flags &= ~EDGE_FALLTHRU; | |
1064 e_true->flags |= EDGE_TRUE_VALUE; | |
1065 | |
1066 e_fallthru = EDGE_SUCC (then_bb, 0); | |
1067 | |
1068 cost_pre_condition = | |
1069 fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters, | |
1070 build_int_cst (TREE_TYPE (scalar_loop_iters), th)); | |
1071 cost_pre_condition = | |
1072 force_gimple_operand (cost_pre_condition, &gimplify_stmt_list, | |
1073 true, NULL_TREE); | |
1074 cond_stmt = gimple_build_cond (NE_EXPR, cost_pre_condition, | |
1075 build_int_cst (TREE_TYPE (cost_pre_condition), | |
1076 0), NULL_TREE, NULL_TREE); | |
1077 | |
1078 gsi = gsi_last_bb (cond_bb); | |
1079 if (gimplify_stmt_list) | |
1080 gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT); | |
1081 | |
1082 gsi = gsi_last_bb (cond_bb); | |
1083 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT); | |
1084 | |
1085 var = create_tmp_var (TREE_TYPE (scalar_loop_iters), | |
1086 "prologue_after_cost_adjust"); | |
1087 add_referenced_var (var); | |
1088 prologue_after_cost_adjust_name = | |
1089 force_gimple_operand (scalar_loop_iters, &stmts, false, var); | |
1090 | |
1091 gsi = gsi_last_bb (then_bb); | |
1092 if (stmts) | |
1093 gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); | |
1094 | |
1095 newphi = create_phi_node (var, bb_before_first_loop); | |
1096 add_phi_arg (newphi, prologue_after_cost_adjust_name, e_fallthru); | |
1097 add_phi_arg (newphi, first_niters, e_false); | |
1098 | |
1099 first_niters = PHI_RESULT (newphi); | |
1100 } | |
1101 | |
1102 | |
1103 /* Function slpeel_tree_peel_loop_to_edge. | |
1104 | |
1105 Peel the first (last) iterations of LOOP into a new prolog (epilog) loop | |
1106 that is placed on the entry (exit) edge E of LOOP. After this transformation | |
1107 we have two loops one after the other - first-loop iterates FIRST_NITERS | |
1108 times, and second-loop iterates the remainder NITERS - FIRST_NITERS times. | |
1109 If the cost model indicates that it is profitable to emit a scalar | |
1110 loop instead of the vector one, then the prolog (epilog) loop will iterate | |
1111 for the entire unchanged scalar iterations of the loop. | |
1112 | |
1113 Input: | |
1114 - LOOP: the loop to be peeled. | |
1115 - E: the exit or entry edge of LOOP. | |
1116 If it is the entry edge, we peel the first iterations of LOOP. In this | |
1117 case first-loop is LOOP, and second-loop is the newly created loop. | |
1118 If it is the exit edge, we peel the last iterations of LOOP. In this | |
1119 case, first-loop is the newly created loop, and second-loop is LOOP. | |
1120 - NITERS: the number of iterations that LOOP iterates. | |
1121 - FIRST_NITERS: the number of iterations that the first-loop should iterate. | |
1122 - UPDATE_FIRST_LOOP_COUNT: specified whether this function is responsible | |
1123 for updating the loop bound of the first-loop to FIRST_NITERS. If it | |
1124 is false, the caller of this function may want to take care of this | |
1125 (this can be useful if we don't want new stmts added to first-loop). | |
1126 - TH: cost model profitability threshold of iterations for vectorization. | |
1127 - CHECK_PROFITABILITY: specify whether cost model check has not occurred | |
1128 during versioning and hence needs to occur during | |
1129 prologue generation or whether cost model check | |
1130 has not occurred during prologue generation and hence | |
1131 needs to occur during epilogue generation. | |
1132 | |
1133 | |
1134 Output: | |
1135 The function returns a pointer to the new loop-copy, or NULL if it failed | |
1136 to perform the transformation. | |
1137 | |
1138 The function generates two if-then-else guards: one before the first loop, | |
1139 and the other before the second loop: | |
1140 The first guard is: | |
1141 if (FIRST_NITERS == 0) then skip the first loop, | |
1142 and go directly to the second loop. | |
1143 The second guard is: | |
1144 if (FIRST_NITERS == NITERS) then skip the second loop. | |
1145 | |
1146 FORNOW only simple loops are supported (see slpeel_can_duplicate_loop_p). | |
1147 FORNOW the resulting code will not be in loop-closed-ssa form. | |
1148 */ | |
1149 | |
1150 struct loop* | |
1151 slpeel_tree_peel_loop_to_edge (struct loop *loop, | |
1152 edge e, tree first_niters, | |
1153 tree niters, bool update_first_loop_count, | |
1154 unsigned int th, bool check_profitability) | |
1155 { | |
1156 struct loop *new_loop = NULL, *first_loop, *second_loop; | |
1157 edge skip_e; | |
1158 tree pre_condition = NULL_TREE; | |
1159 bitmap definitions; | |
1160 basic_block bb_before_second_loop, bb_after_second_loop; | |
1161 basic_block bb_before_first_loop; | |
1162 basic_block bb_between_loops; | |
1163 basic_block new_exit_bb; | |
1164 edge exit_e = single_exit (loop); | |
1165 LOC loop_loc; | |
1166 tree cost_pre_condition = NULL_TREE; | |
1167 | |
1168 if (!slpeel_can_duplicate_loop_p (loop, e)) | |
1169 return NULL; | |
1170 | |
1171 /* We have to initialize cfg_hooks. Then, when calling | |
1172 cfg_hooks->split_edge, the function tree_split_edge | |
1173 is actually called and, when calling cfg_hooks->duplicate_block, | |
1174 the function tree_duplicate_bb is called. */ | |
1175 gimple_register_cfg_hooks (); | |
1176 | |
1177 | |
1178 /* 1. Generate a copy of LOOP and put it on E (E is the entry/exit of LOOP). | |
1179 Resulting CFG would be: | |
1180 | |
1181 first_loop: | |
1182 do { | |
1183 } while ... | |
1184 | |
1185 second_loop: | |
1186 do { | |
1187 } while ... | |
1188 | |
1189 orig_exit_bb: | |
1190 */ | |
1191 | |
1192 if (!(new_loop = slpeel_tree_duplicate_loop_to_edge_cfg (loop, e))) | |
1193 { | |
1194 loop_loc = find_loop_location (loop); | |
1195 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1196 { | |
1197 if (loop_loc != UNKNOWN_LOC) | |
1198 fprintf (dump_file, "\n%s:%d: note: ", | |
1199 LOC_FILE (loop_loc), LOC_LINE (loop_loc)); | |
1200 fprintf (dump_file, "tree_duplicate_loop_to_edge_cfg failed.\n"); | |
1201 } | |
1202 return NULL; | |
1203 } | |
1204 | |
1205 if (e == exit_e) | |
1206 { | |
1207 /* NEW_LOOP was placed after LOOP. */ | |
1208 first_loop = loop; | |
1209 second_loop = new_loop; | |
1210 } | |
1211 else | |
1212 { | |
1213 /* NEW_LOOP was placed before LOOP. */ | |
1214 first_loop = new_loop; | |
1215 second_loop = loop; | |
1216 } | |
1217 | |
1218 definitions = ssa_names_to_replace (); | |
1219 slpeel_update_phis_for_duplicate_loop (loop, new_loop, e == exit_e); | |
1220 rename_variables_in_loop (new_loop); | |
1221 | |
1222 | |
1223 /* 2. Add the guard code in one of the following ways: | |
1224 | |
1225 2.a Add the guard that controls whether the first loop is executed. | |
1226 This occurs when this function is invoked for prologue or epilogue | |
1227 generation and when the cost model check can be done at compile time. | |
1228 | |
1229 Resulting CFG would be: | |
1230 | |
1231 bb_before_first_loop: | |
1232 if (FIRST_NITERS == 0) GOTO bb_before_second_loop | |
1233 GOTO first-loop | |
1234 | |
1235 first_loop: | |
1236 do { | |
1237 } while ... | |
1238 | |
1239 bb_before_second_loop: | |
1240 | |
1241 second_loop: | |
1242 do { | |
1243 } while ... | |
1244 | |
1245 orig_exit_bb: | |
1246 | |
1247 2.b Add the cost model check that allows the prologue | |
1248 to iterate for the entire unchanged scalar | |
1249 iterations of the loop in the event that the cost | |
1250 model indicates that the scalar loop is more | |
1251 profitable than the vector one. This occurs when | |
1252 this function is invoked for prologue generation | |
1253 and the cost model check needs to be done at run | |
1254 time. | |
1255 | |
1256 Resulting CFG after prologue peeling would be: | |
1257 | |
1258 if (scalar_loop_iterations <= th) | |
1259 FIRST_NITERS = scalar_loop_iterations | |
1260 | |
1261 bb_before_first_loop: | |
1262 if (FIRST_NITERS == 0) GOTO bb_before_second_loop | |
1263 GOTO first-loop | |
1264 | |
1265 first_loop: | |
1266 do { | |
1267 } while ... | |
1268 | |
1269 bb_before_second_loop: | |
1270 | |
1271 second_loop: | |
1272 do { | |
1273 } while ... | |
1274 | |
1275 orig_exit_bb: | |
1276 | |
1277 2.c Add the cost model check that allows the epilogue | |
1278 to iterate for the entire unchanged scalar | |
1279 iterations of the loop in the event that the cost | |
1280 model indicates that the scalar loop is more | |
1281 profitable than the vector one. This occurs when | |
1282 this function is invoked for epilogue generation | |
1283 and the cost model check needs to be done at run | |
1284 time. | |
1285 | |
1286 Resulting CFG after prologue peeling would be: | |
1287 | |
1288 bb_before_first_loop: | |
1289 if ((scalar_loop_iterations <= th) | |
1290 || | |
1291 FIRST_NITERS == 0) GOTO bb_before_second_loop | |
1292 GOTO first-loop | |
1293 | |
1294 first_loop: | |
1295 do { | |
1296 } while ... | |
1297 | |
1298 bb_before_second_loop: | |
1299 | |
1300 second_loop: | |
1301 do { | |
1302 } while ... | |
1303 | |
1304 orig_exit_bb: | |
1305 */ | |
1306 | |
1307 bb_before_first_loop = split_edge (loop_preheader_edge (first_loop)); | |
1308 bb_before_second_loop = split_edge (single_exit (first_loop)); | |
1309 | |
1310 /* Epilogue peeling. */ | |
1311 if (!update_first_loop_count) | |
1312 { | |
1313 pre_condition = | |
1314 fold_build2 (LE_EXPR, boolean_type_node, first_niters, | |
1315 build_int_cst (TREE_TYPE (first_niters), 0)); | |
1316 if (check_profitability) | |
1317 { | |
1318 tree scalar_loop_iters | |
1319 = unshare_expr (LOOP_VINFO_NITERS_UNCHANGED | |
1320 (loop_vec_info_for_loop (loop))); | |
1321 cost_pre_condition = | |
1322 fold_build2 (LE_EXPR, boolean_type_node, scalar_loop_iters, | |
1323 build_int_cst (TREE_TYPE (scalar_loop_iters), th)); | |
1324 | |
1325 pre_condition = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, | |
1326 cost_pre_condition, pre_condition); | |
1327 } | |
1328 } | |
1329 | |
1330 /* Prologue peeling. */ | |
1331 else | |
1332 { | |
1333 if (check_profitability) | |
1334 set_prologue_iterations (bb_before_first_loop, first_niters, | |
1335 loop, th); | |
1336 | |
1337 pre_condition = | |
1338 fold_build2 (LE_EXPR, boolean_type_node, first_niters, | |
1339 build_int_cst (TREE_TYPE (first_niters), 0)); | |
1340 } | |
1341 | |
1342 skip_e = slpeel_add_loop_guard (bb_before_first_loop, pre_condition, | |
1343 bb_before_second_loop, bb_before_first_loop); | |
1344 slpeel_update_phi_nodes_for_guard1 (skip_e, first_loop, | |
1345 first_loop == new_loop, | |
1346 &new_exit_bb, &definitions); | |
1347 | |
1348 | |
1349 /* 3. Add the guard that controls whether the second loop is executed. | |
1350 Resulting CFG would be: | |
1351 | |
1352 bb_before_first_loop: | |
1353 if (FIRST_NITERS == 0) GOTO bb_before_second_loop (skip first loop) | |
1354 GOTO first-loop | |
1355 | |
1356 first_loop: | |
1357 do { | |
1358 } while ... | |
1359 | |
1360 bb_between_loops: | |
1361 if (FIRST_NITERS == NITERS) GOTO bb_after_second_loop (skip second loop) | |
1362 GOTO bb_before_second_loop | |
1363 | |
1364 bb_before_second_loop: | |
1365 | |
1366 second_loop: | |
1367 do { | |
1368 } while ... | |
1369 | |
1370 bb_after_second_loop: | |
1371 | |
1372 orig_exit_bb: | |
1373 */ | |
1374 | |
1375 bb_between_loops = new_exit_bb; | |
1376 bb_after_second_loop = split_edge (single_exit (second_loop)); | |
1377 | |
1378 pre_condition = | |
1379 fold_build2 (EQ_EXPR, boolean_type_node, first_niters, niters); | |
1380 skip_e = slpeel_add_loop_guard (bb_between_loops, pre_condition, | |
1381 bb_after_second_loop, bb_before_first_loop); | |
1382 slpeel_update_phi_nodes_for_guard2 (skip_e, second_loop, | |
1383 second_loop == new_loop, &new_exit_bb); | |
1384 | |
1385 /* 4. Make first-loop iterate FIRST_NITERS times, if requested. | |
1386 */ | |
1387 if (update_first_loop_count) | |
1388 slpeel_make_loop_iterate_ntimes (first_loop, first_niters); | |
1389 | |
1390 BITMAP_FREE (definitions); | |
1391 delete_update_ssa (); | |
1392 | |
1393 return new_loop; | |
1394 } | |
1395 | |
1396 /* Function vect_get_loop_location. | |
1397 | |
1398 Extract the location of the loop in the source code. | |
1399 If the loop is not well formed for vectorization, an estimated | |
1400 location is calculated. | |
1401 Return the loop location if succeed and NULL if not. */ | |
1402 | |
1403 LOC | |
1404 find_loop_location (struct loop *loop) | |
1405 { | |
1406 gimple stmt = NULL; | |
1407 basic_block bb; | |
1408 gimple_stmt_iterator si; | |
1409 | |
1410 if (!loop) | |
1411 return UNKNOWN_LOC; | |
1412 | |
1413 stmt = get_loop_exit_condition (loop); | |
1414 | |
1415 if (stmt && gimple_location (stmt) != UNKNOWN_LOC) | |
1416 return gimple_location (stmt); | |
1417 | |
1418 /* If we got here the loop is probably not "well formed", | |
1419 try to estimate the loop location */ | |
1420 | |
1421 if (!loop->header) | |
1422 return UNKNOWN_LOC; | |
1423 | |
1424 bb = loop->header; | |
1425 | |
1426 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) | |
1427 { | |
1428 stmt = gsi_stmt (si); | |
1429 if (gimple_location (stmt) != UNKNOWN_LOC) | |
1430 return gimple_location (stmt); | |
1431 } | |
1432 | |
1433 return UNKNOWN_LOC; | |
1434 } | |
1435 | |
1436 | |
1437 /************************************************************************* | |
1438 Vectorization Debug Information. | |
1439 *************************************************************************/ | |
1440 | |
1441 /* Function vect_set_verbosity_level. | |
1442 | |
1443 Called from toplev.c upon detection of the | |
1444 -ftree-vectorizer-verbose=N option. */ | |
1445 | |
1446 void | |
1447 vect_set_verbosity_level (const char *val) | |
1448 { | |
1449 unsigned int vl; | |
1450 | |
1451 vl = atoi (val); | |
1452 if (vl < MAX_VERBOSITY_LEVEL) | |
1453 vect_verbosity_level = vl; | |
1454 else | |
1455 vect_verbosity_level = MAX_VERBOSITY_LEVEL - 1; | |
1456 } | |
1457 | |
1458 | |
1459 /* Function vect_set_dump_settings. | |
1460 | |
1461 Fix the verbosity level of the vectorizer if the | |
1462 requested level was not set explicitly using the flag | |
1463 -ftree-vectorizer-verbose=N. | |
1464 Decide where to print the debugging information (dump_file/stderr). | |
1465 If the user defined the verbosity level, but there is no dump file, | |
1466 print to stderr, otherwise print to the dump file. */ | |
1467 | |
1468 static void | |
1469 vect_set_dump_settings (void) | |
1470 { | |
1471 vect_dump = dump_file; | |
1472 | |
1473 /* Check if the verbosity level was defined by the user: */ | |
1474 if (vect_verbosity_level != MAX_VERBOSITY_LEVEL) | |
1475 { | |
1476 /* If there is no dump file, print to stderr. */ | |
1477 if (!dump_file) | |
1478 vect_dump = stderr; | |
1479 return; | |
1480 } | |
1481 | |
1482 /* User didn't specify verbosity level: */ | |
1483 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1484 vect_verbosity_level = REPORT_DETAILS; | |
1485 else if (dump_file && (dump_flags & TDF_STATS)) | |
1486 vect_verbosity_level = REPORT_UNVECTORIZED_LOOPS; | |
1487 else | |
1488 vect_verbosity_level = REPORT_NONE; | |
1489 | |
1490 gcc_assert (dump_file || vect_verbosity_level == REPORT_NONE); | |
1491 } | |
1492 | |
1493 | |
1494 /* Function debug_loop_details. | |
1495 | |
1496 For vectorization debug dumps. */ | |
1497 | |
1498 bool | |
1499 vect_print_dump_info (enum verbosity_levels vl) | |
1500 { | |
1501 if (vl > vect_verbosity_level) | |
1502 return false; | |
1503 | |
1504 if (!current_function_decl || !vect_dump) | |
1505 return false; | |
1506 | |
1507 if (vect_loop_location == UNKNOWN_LOC) | |
1508 fprintf (vect_dump, "\n%s:%d: note: ", | |
1509 DECL_SOURCE_FILE (current_function_decl), | |
1510 DECL_SOURCE_LINE (current_function_decl)); | |
1511 else | |
1512 fprintf (vect_dump, "\n%s:%d: note: ", | |
1513 LOC_FILE (vect_loop_location), LOC_LINE (vect_loop_location)); | |
1514 | |
1515 return true; | |
1516 } | |
1517 | |
1518 | |
1519 /************************************************************************* | |
1520 Vectorization Utilities. | |
1521 *************************************************************************/ | |
1522 | |
1523 /* Function new_stmt_vec_info. | |
1524 | |
1525 Create and initialize a new stmt_vec_info struct for STMT. */ | |
1526 | |
1527 stmt_vec_info | |
1528 new_stmt_vec_info (gimple stmt, loop_vec_info loop_vinfo) | |
1529 { | |
1530 stmt_vec_info res; | |
1531 res = (stmt_vec_info) xcalloc (1, sizeof (struct _stmt_vec_info)); | |
1532 | |
1533 STMT_VINFO_TYPE (res) = undef_vec_info_type; | |
1534 STMT_VINFO_STMT (res) = stmt; | |
1535 STMT_VINFO_LOOP_VINFO (res) = loop_vinfo; | |
1536 STMT_VINFO_RELEVANT (res) = 0; | |
1537 STMT_VINFO_LIVE_P (res) = false; | |
1538 STMT_VINFO_VECTYPE (res) = NULL; | |
1539 STMT_VINFO_VEC_STMT (res) = NULL; | |
1540 STMT_VINFO_IN_PATTERN_P (res) = false; | |
1541 STMT_VINFO_RELATED_STMT (res) = NULL; | |
1542 STMT_VINFO_DATA_REF (res) = NULL; | |
1543 | |
1544 STMT_VINFO_DR_BASE_ADDRESS (res) = NULL; | |
1545 STMT_VINFO_DR_OFFSET (res) = NULL; | |
1546 STMT_VINFO_DR_INIT (res) = NULL; | |
1547 STMT_VINFO_DR_STEP (res) = NULL; | |
1548 STMT_VINFO_DR_ALIGNED_TO (res) = NULL; | |
1549 | |
1550 if (gimple_code (stmt) == GIMPLE_PHI | |
1551 && is_loop_header_bb_p (gimple_bb (stmt))) | |
1552 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type; | |
1553 else | |
1554 STMT_VINFO_DEF_TYPE (res) = vect_loop_def; | |
1555 STMT_VINFO_SAME_ALIGN_REFS (res) = VEC_alloc (dr_p, heap, 5); | |
1556 STMT_VINFO_INSIDE_OF_LOOP_COST (res) = 0; | |
1557 STMT_VINFO_OUTSIDE_OF_LOOP_COST (res) = 0; | |
1558 STMT_SLP_TYPE (res) = 0; | |
1559 DR_GROUP_FIRST_DR (res) = NULL; | |
1560 DR_GROUP_NEXT_DR (res) = NULL; | |
1561 DR_GROUP_SIZE (res) = 0; | |
1562 DR_GROUP_STORE_COUNT (res) = 0; | |
1563 DR_GROUP_GAP (res) = 0; | |
1564 DR_GROUP_SAME_DR_STMT (res) = NULL; | |
1565 DR_GROUP_READ_WRITE_DEPENDENCE (res) = false; | |
1566 | |
1567 return res; | |
1568 } | |
1569 | |
1570 /* Create a hash table for stmt_vec_info. */ | |
1571 | |
1572 void | |
1573 init_stmt_vec_info_vec (void) | |
1574 { | |
1575 gcc_assert (!stmt_vec_info_vec); | |
1576 stmt_vec_info_vec = VEC_alloc (vec_void_p, heap, 50); | |
1577 } | |
1578 | |
1579 /* Free hash table for stmt_vec_info. */ | |
1580 | |
1581 void | |
1582 free_stmt_vec_info_vec (void) | |
1583 { | |
1584 gcc_assert (stmt_vec_info_vec); | |
1585 VEC_free (vec_void_p, heap, stmt_vec_info_vec); | |
1586 } | |
1587 | |
1588 /* Free stmt vectorization related info. */ | |
1589 | |
1590 void | |
1591 free_stmt_vec_info (gimple stmt) | |
1592 { | |
1593 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
1594 | |
1595 if (!stmt_info) | |
1596 return; | |
1597 | |
1598 VEC_free (dr_p, heap, STMT_VINFO_SAME_ALIGN_REFS (stmt_info)); | |
1599 set_vinfo_for_stmt (stmt, NULL); | |
1600 free (stmt_info); | |
1601 } | |
1602 | |
1603 | |
1604 /* Function bb_in_loop_p | |
1605 | |
1606 Used as predicate for dfs order traversal of the loop bbs. */ | |
1607 | |
1608 static bool | |
1609 bb_in_loop_p (const_basic_block bb, const void *data) | |
1610 { | |
1611 const struct loop *const loop = (const struct loop *)data; | |
1612 if (flow_bb_inside_loop_p (loop, bb)) | |
1613 return true; | |
1614 return false; | |
1615 } | |
1616 | |
1617 | |
1618 /* Function new_loop_vec_info. | |
1619 | |
1620 Create and initialize a new loop_vec_info struct for LOOP, as well as | |
1621 stmt_vec_info structs for all the stmts in LOOP. */ | |
1622 | |
1623 loop_vec_info | |
1624 new_loop_vec_info (struct loop *loop) | |
1625 { | |
1626 loop_vec_info res; | |
1627 basic_block *bbs; | |
1628 gimple_stmt_iterator si; | |
1629 unsigned int i, nbbs; | |
1630 | |
1631 res = (loop_vec_info) xcalloc (1, sizeof (struct _loop_vec_info)); | |
1632 LOOP_VINFO_LOOP (res) = loop; | |
1633 | |
1634 bbs = get_loop_body (loop); | |
1635 | |
1636 /* Create/Update stmt_info for all stmts in the loop. */ | |
1637 for (i = 0; i < loop->num_nodes; i++) | |
1638 { | |
1639 basic_block bb = bbs[i]; | |
1640 | |
1641 /* BBs in a nested inner-loop will have been already processed (because | |
1642 we will have called vect_analyze_loop_form for any nested inner-loop). | |
1643 Therefore, for stmts in an inner-loop we just want to update the | |
1644 STMT_VINFO_LOOP_VINFO field of their stmt_info to point to the new | |
1645 loop_info of the outer-loop we are currently considering to vectorize | |
1646 (instead of the loop_info of the inner-loop). | |
1647 For stmts in other BBs we need to create a stmt_info from scratch. */ | |
1648 if (bb->loop_father != loop) | |
1649 { | |
1650 /* Inner-loop bb. */ | |
1651 gcc_assert (loop->inner && bb->loop_father == loop->inner); | |
1652 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si)) | |
1653 { | |
1654 gimple phi = gsi_stmt (si); | |
1655 stmt_vec_info stmt_info = vinfo_for_stmt (phi); | |
1656 loop_vec_info inner_loop_vinfo = | |
1657 STMT_VINFO_LOOP_VINFO (stmt_info); | |
1658 gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo)); | |
1659 STMT_VINFO_LOOP_VINFO (stmt_info) = res; | |
1660 } | |
1661 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) | |
1662 { | |
1663 gimple stmt = gsi_stmt (si); | |
1664 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
1665 loop_vec_info inner_loop_vinfo = | |
1666 STMT_VINFO_LOOP_VINFO (stmt_info); | |
1667 gcc_assert (loop->inner == LOOP_VINFO_LOOP (inner_loop_vinfo)); | |
1668 STMT_VINFO_LOOP_VINFO (stmt_info) = res; | |
1669 } | |
1670 } | |
1671 else | |
1672 { | |
1673 /* bb in current nest. */ | |
1674 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si)) | |
1675 { | |
1676 gimple phi = gsi_stmt (si); | |
1677 gimple_set_uid (phi, 0); | |
1678 set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, res)); | |
1679 } | |
1680 | |
1681 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) | |
1682 { | |
1683 gimple stmt = gsi_stmt (si); | |
1684 gimple_set_uid (stmt, 0); | |
1685 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, res)); | |
1686 } | |
1687 } | |
1688 } | |
1689 | |
1690 /* CHECKME: We want to visit all BBs before their successors (except for | |
1691 latch blocks, for which this assertion wouldn't hold). In the simple | |
1692 case of the loop forms we allow, a dfs order of the BBs would the same | |
1693 as reversed postorder traversal, so we are safe. */ | |
1694 | |
1695 free (bbs); | |
1696 bbs = XCNEWVEC (basic_block, loop->num_nodes); | |
1697 nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p, | |
1698 bbs, loop->num_nodes, loop); | |
1699 gcc_assert (nbbs == loop->num_nodes); | |
1700 | |
1701 LOOP_VINFO_BBS (res) = bbs; | |
1702 LOOP_VINFO_NITERS (res) = NULL; | |
1703 LOOP_VINFO_NITERS_UNCHANGED (res) = NULL; | |
1704 LOOP_VINFO_COST_MODEL_MIN_ITERS (res) = 0; | |
1705 LOOP_VINFO_VECTORIZABLE_P (res) = 0; | |
1706 LOOP_PEELING_FOR_ALIGNMENT (res) = 0; | |
1707 LOOP_VINFO_VECT_FACTOR (res) = 0; | |
1708 LOOP_VINFO_DATAREFS (res) = VEC_alloc (data_reference_p, heap, 10); | |
1709 LOOP_VINFO_DDRS (res) = VEC_alloc (ddr_p, heap, 10 * 10); | |
1710 LOOP_VINFO_UNALIGNED_DR (res) = NULL; | |
1711 LOOP_VINFO_MAY_MISALIGN_STMTS (res) = | |
1712 VEC_alloc (gimple, heap, | |
1713 PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIGNMENT_CHECKS)); | |
1714 LOOP_VINFO_MAY_ALIAS_DDRS (res) = | |
1715 VEC_alloc (ddr_p, heap, | |
1716 PARAM_VALUE (PARAM_VECT_MAX_VERSION_FOR_ALIAS_CHECKS)); | |
1717 LOOP_VINFO_STRIDED_STORES (res) = VEC_alloc (gimple, heap, 10); | |
1718 LOOP_VINFO_SLP_INSTANCES (res) = VEC_alloc (slp_instance, heap, 10); | |
1719 LOOP_VINFO_SLP_UNROLLING_FACTOR (res) = 1; | |
1720 | |
1721 return res; | |
1722 } | |
1723 | |
1724 | |
1725 /* Function destroy_loop_vec_info. | |
1726 | |
1727 Free LOOP_VINFO struct, as well as all the stmt_vec_info structs of all the | |
1728 stmts in the loop. */ | |
1729 | |
1730 void | |
1731 destroy_loop_vec_info (loop_vec_info loop_vinfo, bool clean_stmts) | |
1732 { | |
1733 struct loop *loop; | |
1734 basic_block *bbs; | |
1735 int nbbs; | |
1736 gimple_stmt_iterator si; | |
1737 int j; | |
1738 VEC (slp_instance, heap) *slp_instances; | |
1739 slp_instance instance; | |
1740 | |
1741 if (!loop_vinfo) | |
1742 return; | |
1743 | |
1744 loop = LOOP_VINFO_LOOP (loop_vinfo); | |
1745 | |
1746 bbs = LOOP_VINFO_BBS (loop_vinfo); | |
1747 nbbs = loop->num_nodes; | |
1748 | |
1749 if (!clean_stmts) | |
1750 { | |
1751 free (LOOP_VINFO_BBS (loop_vinfo)); | |
1752 free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo)); | |
1753 free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo)); | |
1754 VEC_free (gimple, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)); | |
1755 | |
1756 free (loop_vinfo); | |
1757 loop->aux = NULL; | |
1758 return; | |
1759 } | |
1760 | |
1761 for (j = 0; j < nbbs; j++) | |
1762 { | |
1763 basic_block bb = bbs[j]; | |
1764 | |
1765 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si)) | |
1766 free_stmt_vec_info (gsi_stmt (si)); | |
1767 | |
1768 for (si = gsi_start_bb (bb); !gsi_end_p (si); ) | |
1769 { | |
1770 gimple stmt = gsi_stmt (si); | |
1771 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
1772 | |
1773 if (stmt_info) | |
1774 { | |
1775 /* Check if this is a "pattern stmt" (introduced by the | |
1776 vectorizer during the pattern recognition pass). */ | |
1777 bool remove_stmt_p = false; | |
1778 gimple orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info); | |
1779 if (orig_stmt) | |
1780 { | |
1781 stmt_vec_info orig_stmt_info = vinfo_for_stmt (orig_stmt); | |
1782 if (orig_stmt_info | |
1783 && STMT_VINFO_IN_PATTERN_P (orig_stmt_info)) | |
1784 remove_stmt_p = true; | |
1785 } | |
1786 | |
1787 /* Free stmt_vec_info. */ | |
1788 free_stmt_vec_info (stmt); | |
1789 | |
1790 /* Remove dead "pattern stmts". */ | |
1791 if (remove_stmt_p) | |
1792 gsi_remove (&si, true); | |
1793 } | |
1794 gsi_next (&si); | |
1795 } | |
1796 } | |
1797 | |
1798 free (LOOP_VINFO_BBS (loop_vinfo)); | |
1799 free_data_refs (LOOP_VINFO_DATAREFS (loop_vinfo)); | |
1800 free_dependence_relations (LOOP_VINFO_DDRS (loop_vinfo)); | |
1801 VEC_free (gimple, heap, LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo)); | |
1802 VEC_free (ddr_p, heap, LOOP_VINFO_MAY_ALIAS_DDRS (loop_vinfo)); | |
1803 slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo); | |
1804 for (j = 0; VEC_iterate (slp_instance, slp_instances, j, instance); j++) | |
1805 vect_free_slp_instance (instance); | |
1806 | |
1807 VEC_free (slp_instance, heap, LOOP_VINFO_SLP_INSTANCES (loop_vinfo)); | |
1808 VEC_free (gimple, heap, LOOP_VINFO_STRIDED_STORES (loop_vinfo)); | |
1809 | |
1810 free (loop_vinfo); | |
1811 loop->aux = NULL; | |
1812 } | |
1813 | |
1814 | |
1815 /* Function vect_force_dr_alignment_p. | |
1816 | |
1817 Returns whether the alignment of a DECL can be forced to be aligned | |
1818 on ALIGNMENT bit boundary. */ | |
1819 | |
1820 bool | |
1821 vect_can_force_dr_alignment_p (const_tree decl, unsigned int alignment) | |
1822 { | |
1823 if (TREE_CODE (decl) != VAR_DECL) | |
1824 return false; | |
1825 | |
1826 if (DECL_EXTERNAL (decl)) | |
1827 return false; | |
1828 | |
1829 if (TREE_ASM_WRITTEN (decl)) | |
1830 return false; | |
1831 | |
1832 if (TREE_STATIC (decl)) | |
1833 return (alignment <= MAX_OFILE_ALIGNMENT); | |
1834 else | |
1835 return (alignment <= MAX_STACK_ALIGNMENT); | |
1836 } | |
1837 | |
1838 | |
1839 /* Function get_vectype_for_scalar_type. | |
1840 | |
1841 Returns the vector type corresponding to SCALAR_TYPE as supported | |
1842 by the target. */ | |
1843 | |
1844 tree | |
1845 get_vectype_for_scalar_type (tree scalar_type) | |
1846 { | |
1847 enum machine_mode inner_mode = TYPE_MODE (scalar_type); | |
1848 int nbytes = GET_MODE_SIZE (inner_mode); | |
1849 int nunits; | |
1850 tree vectype; | |
1851 | |
1852 if (nbytes == 0 || nbytes >= UNITS_PER_SIMD_WORD (inner_mode)) | |
1853 return NULL_TREE; | |
1854 | |
1855 /* FORNOW: Only a single vector size per mode (UNITS_PER_SIMD_WORD) | |
1856 is expected. */ | |
1857 nunits = UNITS_PER_SIMD_WORD (inner_mode) / nbytes; | |
1858 | |
1859 vectype = build_vector_type (scalar_type, nunits); | |
1860 if (vect_print_dump_info (REPORT_DETAILS)) | |
1861 { | |
1862 fprintf (vect_dump, "get vectype with %d units of type ", nunits); | |
1863 print_generic_expr (vect_dump, scalar_type, TDF_SLIM); | |
1864 } | |
1865 | |
1866 if (!vectype) | |
1867 return NULL_TREE; | |
1868 | |
1869 if (vect_print_dump_info (REPORT_DETAILS)) | |
1870 { | |
1871 fprintf (vect_dump, "vectype: "); | |
1872 print_generic_expr (vect_dump, vectype, TDF_SLIM); | |
1873 } | |
1874 | |
1875 if (!VECTOR_MODE_P (TYPE_MODE (vectype)) | |
1876 && !INTEGRAL_MODE_P (TYPE_MODE (vectype))) | |
1877 { | |
1878 if (vect_print_dump_info (REPORT_DETAILS)) | |
1879 fprintf (vect_dump, "mode not supported by target."); | |
1880 return NULL_TREE; | |
1881 } | |
1882 | |
1883 return vectype; | |
1884 } | |
1885 | |
1886 | |
1887 /* Function vect_supportable_dr_alignment | |
1888 | |
1889 Return whether the data reference DR is supported with respect to its | |
1890 alignment. */ | |
1891 | |
1892 enum dr_alignment_support | |
1893 vect_supportable_dr_alignment (struct data_reference *dr) | |
1894 { | |
1895 gimple stmt = DR_STMT (dr); | |
1896 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
1897 tree vectype = STMT_VINFO_VECTYPE (stmt_info); | |
1898 enum machine_mode mode = (int) TYPE_MODE (vectype); | |
1899 struct loop *vect_loop = LOOP_VINFO_LOOP (STMT_VINFO_LOOP_VINFO (stmt_info)); | |
1900 bool nested_in_vect_loop = nested_in_vect_loop_p (vect_loop, stmt); | |
1901 bool invariant_in_outerloop = false; | |
1902 | |
1903 if (aligned_access_p (dr)) | |
1904 return dr_aligned; | |
1905 | |
1906 if (nested_in_vect_loop) | |
1907 { | |
1908 tree outerloop_step = STMT_VINFO_DR_STEP (stmt_info); | |
1909 invariant_in_outerloop = | |
1910 (tree_int_cst_compare (outerloop_step, size_zero_node) == 0); | |
1911 } | |
1912 | |
1913 /* Possibly unaligned access. */ | |
1914 | |
1915 /* We can choose between using the implicit realignment scheme (generating | |
1916 a misaligned_move stmt) and the explicit realignment scheme (generating | |
1917 aligned loads with a REALIGN_LOAD). There are two variants to the explicit | |
1918 realignment scheme: optimized, and unoptimized. | |
1919 We can optimize the realignment only if the step between consecutive | |
1920 vector loads is equal to the vector size. Since the vector memory | |
1921 accesses advance in steps of VS (Vector Size) in the vectorized loop, it | |
1922 is guaranteed that the misalignment amount remains the same throughout the | |
1923 execution of the vectorized loop. Therefore, we can create the | |
1924 "realignment token" (the permutation mask that is passed to REALIGN_LOAD) | |
1925 at the loop preheader. | |
1926 | |
1927 However, in the case of outer-loop vectorization, when vectorizing a | |
1928 memory access in the inner-loop nested within the LOOP that is now being | |
1929 vectorized, while it is guaranteed that the misalignment of the | |
1930 vectorized memory access will remain the same in different outer-loop | |
1931 iterations, it is *not* guaranteed that is will remain the same throughout | |
1932 the execution of the inner-loop. This is because the inner-loop advances | |
1933 with the original scalar step (and not in steps of VS). If the inner-loop | |
1934 step happens to be a multiple of VS, then the misalignment remains fixed | |
1935 and we can use the optimized realignment scheme. For example: | |
1936 | |
1937 for (i=0; i<N; i++) | |
1938 for (j=0; j<M; j++) | |
1939 s += a[i+j]; | |
1940 | |
1941 When vectorizing the i-loop in the above example, the step between | |
1942 consecutive vector loads is 1, and so the misalignment does not remain | |
1943 fixed across the execution of the inner-loop, and the realignment cannot | |
1944 be optimized (as illustrated in the following pseudo vectorized loop): | |
1945 | |
1946 for (i=0; i<N; i+=4) | |
1947 for (j=0; j<M; j++){ | |
1948 vs += vp[i+j]; // misalignment of &vp[i+j] is {0,1,2,3,0,1,2,3,...} | |
1949 // when j is {0,1,2,3,4,5,6,7,...} respectively. | |
1950 // (assuming that we start from an aligned address). | |
1951 } | |
1952 | |
1953 We therefore have to use the unoptimized realignment scheme: | |
1954 | |
1955 for (i=0; i<N; i+=4) | |
1956 for (j=k; j<M; j+=4) | |
1957 vs += vp[i+j]; // misalignment of &vp[i+j] is always k (assuming | |
1958 // that the misalignment of the initial address is | |
1959 // 0). | |
1960 | |
1961 The loop can then be vectorized as follows: | |
1962 | |
1963 for (k=0; k<4; k++){ | |
1964 rt = get_realignment_token (&vp[k]); | |
1965 for (i=0; i<N; i+=4){ | |
1966 v1 = vp[i+k]; | |
1967 for (j=k; j<M; j+=4){ | |
1968 v2 = vp[i+j+VS-1]; | |
1969 va = REALIGN_LOAD <v1,v2,rt>; | |
1970 vs += va; | |
1971 v1 = v2; | |
1972 } | |
1973 } | |
1974 } */ | |
1975 | |
1976 if (DR_IS_READ (dr)) | |
1977 { | |
1978 if (optab_handler (vec_realign_load_optab, mode)->insn_code != | |
1979 CODE_FOR_nothing | |
1980 && (!targetm.vectorize.builtin_mask_for_load | |
1981 || targetm.vectorize.builtin_mask_for_load ())) | |
1982 { | |
1983 tree vectype = STMT_VINFO_VECTYPE (stmt_info); | |
1984 if (nested_in_vect_loop | |
1985 && (TREE_INT_CST_LOW (DR_STEP (dr)) | |
1986 != GET_MODE_SIZE (TYPE_MODE (vectype)))) | |
1987 return dr_explicit_realign; | |
1988 else | |
1989 return dr_explicit_realign_optimized; | |
1990 } | |
1991 | |
1992 if (optab_handler (movmisalign_optab, mode)->insn_code != | |
1993 CODE_FOR_nothing) | |
1994 /* Can't software pipeline the loads, but can at least do them. */ | |
1995 return dr_unaligned_supported; | |
1996 } | |
1997 | |
1998 /* Unsupported. */ | |
1999 return dr_unaligned_unsupported; | |
2000 } | |
2001 | |
2002 | |
2003 /* Function vect_is_simple_use. | |
2004 | |
2005 Input: | |
2006 LOOP - the loop that is being vectorized. | |
2007 OPERAND - operand of a stmt in LOOP. | |
2008 DEF - the defining stmt in case OPERAND is an SSA_NAME. | |
2009 | |
2010 Returns whether a stmt with OPERAND can be vectorized. | |
2011 Supportable operands are constants, loop invariants, and operands that are | |
2012 defined by the current iteration of the loop. Unsupportable operands are | |
2013 those that are defined by a previous iteration of the loop (as is the case | |
2014 in reduction/induction computations). */ | |
2015 | |
2016 bool | |
2017 vect_is_simple_use (tree operand, loop_vec_info loop_vinfo, gimple *def_stmt, | |
2018 tree *def, enum vect_def_type *dt) | |
2019 { | |
2020 basic_block bb; | |
2021 stmt_vec_info stmt_vinfo; | |
2022 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
2023 | |
2024 *def_stmt = NULL; | |
2025 *def = NULL_TREE; | |
2026 | |
2027 if (vect_print_dump_info (REPORT_DETAILS)) | |
2028 { | |
2029 fprintf (vect_dump, "vect_is_simple_use: operand "); | |
2030 print_generic_expr (vect_dump, operand, TDF_SLIM); | |
2031 } | |
2032 | |
2033 if (TREE_CODE (operand) == INTEGER_CST || TREE_CODE (operand) == REAL_CST) | |
2034 { | |
2035 *dt = vect_constant_def; | |
2036 return true; | |
2037 } | |
2038 if (is_gimple_min_invariant (operand)) | |
2039 { | |
2040 *def = operand; | |
2041 *dt = vect_invariant_def; | |
2042 return true; | |
2043 } | |
2044 | |
2045 if (TREE_CODE (operand) == PAREN_EXPR) | |
2046 { | |
2047 if (vect_print_dump_info (REPORT_DETAILS)) | |
2048 fprintf (vect_dump, "non-associatable copy."); | |
2049 operand = TREE_OPERAND (operand, 0); | |
2050 } | |
2051 if (TREE_CODE (operand) != SSA_NAME) | |
2052 { | |
2053 if (vect_print_dump_info (REPORT_DETAILS)) | |
2054 fprintf (vect_dump, "not ssa-name."); | |
2055 return false; | |
2056 } | |
2057 | |
2058 *def_stmt = SSA_NAME_DEF_STMT (operand); | |
2059 if (*def_stmt == NULL) | |
2060 { | |
2061 if (vect_print_dump_info (REPORT_DETAILS)) | |
2062 fprintf (vect_dump, "no def_stmt."); | |
2063 return false; | |
2064 } | |
2065 | |
2066 if (vect_print_dump_info (REPORT_DETAILS)) | |
2067 { | |
2068 fprintf (vect_dump, "def_stmt: "); | |
2069 print_gimple_stmt (vect_dump, *def_stmt, 0, TDF_SLIM); | |
2070 } | |
2071 | |
2072 /* empty stmt is expected only in case of a function argument. | |
2073 (Otherwise - we expect a phi_node or a GIMPLE_ASSIGN). */ | |
2074 if (gimple_nop_p (*def_stmt)) | |
2075 { | |
2076 *def = operand; | |
2077 *dt = vect_invariant_def; | |
2078 return true; | |
2079 } | |
2080 | |
2081 bb = gimple_bb (*def_stmt); | |
2082 if (!flow_bb_inside_loop_p (loop, bb)) | |
2083 *dt = vect_invariant_def; | |
2084 else | |
2085 { | |
2086 stmt_vinfo = vinfo_for_stmt (*def_stmt); | |
2087 *dt = STMT_VINFO_DEF_TYPE (stmt_vinfo); | |
2088 } | |
2089 | |
2090 if (*dt == vect_unknown_def_type) | |
2091 { | |
2092 if (vect_print_dump_info (REPORT_DETAILS)) | |
2093 fprintf (vect_dump, "Unsupported pattern."); | |
2094 return false; | |
2095 } | |
2096 | |
2097 if (vect_print_dump_info (REPORT_DETAILS)) | |
2098 fprintf (vect_dump, "type of def: %d.",*dt); | |
2099 | |
2100 switch (gimple_code (*def_stmt)) | |
2101 { | |
2102 case GIMPLE_PHI: | |
2103 *def = gimple_phi_result (*def_stmt); | |
2104 break; | |
2105 | |
2106 case GIMPLE_ASSIGN: | |
2107 *def = gimple_assign_lhs (*def_stmt); | |
2108 break; | |
2109 | |
2110 case GIMPLE_CALL: | |
2111 *def = gimple_call_lhs (*def_stmt); | |
2112 if (*def != NULL) | |
2113 break; | |
2114 /* FALLTHRU */ | |
2115 default: | |
2116 if (vect_print_dump_info (REPORT_DETAILS)) | |
2117 fprintf (vect_dump, "unsupported defining stmt: "); | |
2118 return false; | |
2119 } | |
2120 | |
2121 return true; | |
2122 } | |
2123 | |
2124 | |
2125 /* Function supportable_widening_operation | |
2126 | |
2127 Check whether an operation represented by the code CODE is a | |
2128 widening operation that is supported by the target platform in | |
2129 vector form (i.e., when operating on arguments of type VECTYPE). | |
2130 | |
2131 Widening operations we currently support are NOP (CONVERT), FLOAT | |
2132 and WIDEN_MULT. This function checks if these operations are supported | |
2133 by the target platform either directly (via vector tree-codes), or via | |
2134 target builtins. | |
2135 | |
2136 Output: | |
2137 - CODE1 and CODE2 are codes of vector operations to be used when | |
2138 vectorizing the operation, if available. | |
2139 - DECL1 and DECL2 are decls of target builtin functions to be used | |
2140 when vectorizing the operation, if available. In this case, | |
2141 CODE1 and CODE2 are CALL_EXPR. | |
2142 - MULTI_STEP_CVT determines the number of required intermediate steps in | |
2143 case of multi-step conversion (like char->short->int - in that case | |
2144 MULTI_STEP_CVT will be 1). | |
2145 - INTERM_TYPES contains the intermediate type required to perform the | |
2146 widening operation (short in the above example). */ | |
2147 | |
2148 bool | |
2149 supportable_widening_operation (enum tree_code code, gimple stmt, tree vectype, | |
2150 tree *decl1, tree *decl2, | |
2151 enum tree_code *code1, enum tree_code *code2, | |
2152 int *multi_step_cvt, | |
2153 VEC (tree, heap) **interm_types) | |
2154 { | |
2155 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
2156 loop_vec_info loop_info = STMT_VINFO_LOOP_VINFO (stmt_info); | |
2157 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info); | |
2158 bool ordered_p; | |
2159 enum machine_mode vec_mode; | |
2160 enum insn_code icode1 = 0, icode2 = 0; | |
2161 optab optab1, optab2; | |
2162 tree type = gimple_expr_type (stmt); | |
2163 tree wide_vectype = get_vectype_for_scalar_type (type); | |
2164 enum tree_code c1, c2; | |
2165 | |
2166 /* The result of a vectorized widening operation usually requires two vectors | |
2167 (because the widened results do not fit int one vector). The generated | |
2168 vector results would normally be expected to be generated in the same | |
2169 order as in the original scalar computation, i.e. if 8 results are | |
2170 generated in each vector iteration, they are to be organized as follows: | |
2171 vect1: [res1,res2,res3,res4], vect2: [res5,res6,res7,res8]. | |
2172 | |
2173 However, in the special case that the result of the widening operation is | |
2174 used in a reduction computation only, the order doesn't matter (because | |
2175 when vectorizing a reduction we change the order of the computation). | |
2176 Some targets can take advantage of this and generate more efficient code. | |
2177 For example, targets like Altivec, that support widen_mult using a sequence | |
2178 of {mult_even,mult_odd} generate the following vectors: | |
2179 vect1: [res1,res3,res5,res7], vect2: [res2,res4,res6,res8]. | |
2180 | |
2181 When vectorizing outer-loops, we execute the inner-loop sequentially | |
2182 (each vectorized inner-loop iteration contributes to VF outer-loop | |
2183 iterations in parallel). We therefore don't allow to change the order | |
2184 of the computation in the inner-loop during outer-loop vectorization. */ | |
2185 | |
2186 if (STMT_VINFO_RELEVANT (stmt_info) == vect_used_by_reduction | |
2187 && !nested_in_vect_loop_p (vect_loop, stmt)) | |
2188 ordered_p = false; | |
2189 else | |
2190 ordered_p = true; | |
2191 | |
2192 if (!ordered_p | |
2193 && code == WIDEN_MULT_EXPR | |
2194 && targetm.vectorize.builtin_mul_widen_even | |
2195 && targetm.vectorize.builtin_mul_widen_even (vectype) | |
2196 && targetm.vectorize.builtin_mul_widen_odd | |
2197 && targetm.vectorize.builtin_mul_widen_odd (vectype)) | |
2198 { | |
2199 if (vect_print_dump_info (REPORT_DETAILS)) | |
2200 fprintf (vect_dump, "Unordered widening operation detected."); | |
2201 | |
2202 *code1 = *code2 = CALL_EXPR; | |
2203 *decl1 = targetm.vectorize.builtin_mul_widen_even (vectype); | |
2204 *decl2 = targetm.vectorize.builtin_mul_widen_odd (vectype); | |
2205 return true; | |
2206 } | |
2207 | |
2208 switch (code) | |
2209 { | |
2210 case WIDEN_MULT_EXPR: | |
2211 if (BYTES_BIG_ENDIAN) | |
2212 { | |
2213 c1 = VEC_WIDEN_MULT_HI_EXPR; | |
2214 c2 = VEC_WIDEN_MULT_LO_EXPR; | |
2215 } | |
2216 else | |
2217 { | |
2218 c2 = VEC_WIDEN_MULT_HI_EXPR; | |
2219 c1 = VEC_WIDEN_MULT_LO_EXPR; | |
2220 } | |
2221 break; | |
2222 | |
2223 CASE_CONVERT: | |
2224 if (BYTES_BIG_ENDIAN) | |
2225 { | |
2226 c1 = VEC_UNPACK_HI_EXPR; | |
2227 c2 = VEC_UNPACK_LO_EXPR; | |
2228 } | |
2229 else | |
2230 { | |
2231 c2 = VEC_UNPACK_HI_EXPR; | |
2232 c1 = VEC_UNPACK_LO_EXPR; | |
2233 } | |
2234 break; | |
2235 | |
2236 case FLOAT_EXPR: | |
2237 if (BYTES_BIG_ENDIAN) | |
2238 { | |
2239 c1 = VEC_UNPACK_FLOAT_HI_EXPR; | |
2240 c2 = VEC_UNPACK_FLOAT_LO_EXPR; | |
2241 } | |
2242 else | |
2243 { | |
2244 c2 = VEC_UNPACK_FLOAT_HI_EXPR; | |
2245 c1 = VEC_UNPACK_FLOAT_LO_EXPR; | |
2246 } | |
2247 break; | |
2248 | |
2249 case FIX_TRUNC_EXPR: | |
2250 /* ??? Not yet implemented due to missing VEC_UNPACK_FIX_TRUNC_HI_EXPR/ | |
2251 VEC_UNPACK_FIX_TRUNC_LO_EXPR tree codes and optabs used for | |
2252 computing the operation. */ | |
2253 return false; | |
2254 | |
2255 default: | |
2256 gcc_unreachable (); | |
2257 } | |
2258 | |
2259 if (code == FIX_TRUNC_EXPR) | |
2260 { | |
2261 /* The signedness is determined from output operand. */ | |
2262 optab1 = optab_for_tree_code (c1, type, optab_default); | |
2263 optab2 = optab_for_tree_code (c2, type, optab_default); | |
2264 } | |
2265 else | |
2266 { | |
2267 optab1 = optab_for_tree_code (c1, vectype, optab_default); | |
2268 optab2 = optab_for_tree_code (c2, vectype, optab_default); | |
2269 } | |
2270 | |
2271 if (!optab1 || !optab2) | |
2272 return false; | |
2273 | |
2274 vec_mode = TYPE_MODE (vectype); | |
2275 if ((icode1 = optab_handler (optab1, vec_mode)->insn_code) == CODE_FOR_nothing | |
2276 || (icode2 = optab_handler (optab2, vec_mode)->insn_code) | |
2277 == CODE_FOR_nothing) | |
2278 return false; | |
2279 | |
2280 /* Check if it's a multi-step conversion that can be done using intermediate | |
2281 types. */ | |
2282 if (insn_data[icode1].operand[0].mode != TYPE_MODE (wide_vectype) | |
2283 || insn_data[icode2].operand[0].mode != TYPE_MODE (wide_vectype)) | |
2284 { | |
2285 int i; | |
2286 tree prev_type = vectype, intermediate_type; | |
2287 enum machine_mode intermediate_mode, prev_mode = vec_mode; | |
2288 optab optab3, optab4; | |
2289 | |
2290 if (!CONVERT_EXPR_CODE_P (code)) | |
2291 return false; | |
2292 | |
2293 *code1 = c1; | |
2294 *code2 = c2; | |
2295 | |
2296 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS | |
2297 intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS | |
2298 to get to NARROW_VECTYPE, and fail if we do not. */ | |
2299 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS); | |
2300 for (i = 0; i < 3; i++) | |
2301 { | |
2302 intermediate_mode = insn_data[icode1].operand[0].mode; | |
2303 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode, | |
2304 TYPE_UNSIGNED (prev_type)); | |
2305 optab3 = optab_for_tree_code (c1, intermediate_type, optab_default); | |
2306 optab4 = optab_for_tree_code (c2, intermediate_type, optab_default); | |
2307 | |
2308 if (!optab3 || !optab4 | |
2309 || (icode1 = optab1->handlers[(int) prev_mode].insn_code) | |
2310 == CODE_FOR_nothing | |
2311 || insn_data[icode1].operand[0].mode != intermediate_mode | |
2312 || (icode2 = optab2->handlers[(int) prev_mode].insn_code) | |
2313 == CODE_FOR_nothing | |
2314 || insn_data[icode2].operand[0].mode != intermediate_mode | |
2315 || (icode1 = optab3->handlers[(int) intermediate_mode].insn_code) | |
2316 == CODE_FOR_nothing | |
2317 || (icode2 = optab4->handlers[(int) intermediate_mode].insn_code) | |
2318 == CODE_FOR_nothing) | |
2319 return false; | |
2320 | |
2321 VEC_quick_push (tree, *interm_types, intermediate_type); | |
2322 (*multi_step_cvt)++; | |
2323 | |
2324 if (insn_data[icode1].operand[0].mode == TYPE_MODE (wide_vectype) | |
2325 && insn_data[icode2].operand[0].mode == TYPE_MODE (wide_vectype)) | |
2326 return true; | |
2327 | |
2328 prev_type = intermediate_type; | |
2329 prev_mode = intermediate_mode; | |
2330 } | |
2331 | |
2332 return false; | |
2333 } | |
2334 | |
2335 *code1 = c1; | |
2336 *code2 = c2; | |
2337 return true; | |
2338 } | |
2339 | |
2340 | |
2341 /* Function supportable_narrowing_operation | |
2342 | |
2343 Check whether an operation represented by the code CODE is a | |
2344 narrowing operation that is supported by the target platform in | |
2345 vector form (i.e., when operating on arguments of type VECTYPE). | |
2346 | |
2347 Narrowing operations we currently support are NOP (CONVERT) and | |
2348 FIX_TRUNC. This function checks if these operations are supported by | |
2349 the target platform directly via vector tree-codes. | |
2350 | |
2351 Output: | |
2352 - CODE1 is the code of a vector operation to be used when | |
2353 vectorizing the operation, if available. | |
2354 - MULTI_STEP_CVT determines the number of required intermediate steps in | |
2355 case of multi-step conversion (like int->short->char - in that case | |
2356 MULTI_STEP_CVT will be 1). | |
2357 - INTERM_TYPES contains the intermediate type required to perform the | |
2358 narrowing operation (short in the above example). */ | |
2359 | |
2360 bool | |
2361 supportable_narrowing_operation (enum tree_code code, | |
2362 const_gimple stmt, tree vectype, | |
2363 enum tree_code *code1, int *multi_step_cvt, | |
2364 VEC (tree, heap) **interm_types) | |
2365 { | |
2366 enum machine_mode vec_mode; | |
2367 enum insn_code icode1; | |
2368 optab optab1, interm_optab; | |
2369 tree type = gimple_expr_type (stmt); | |
2370 tree narrow_vectype = get_vectype_for_scalar_type (type); | |
2371 enum tree_code c1; | |
2372 tree intermediate_type, prev_type; | |
2373 int i; | |
2374 | |
2375 switch (code) | |
2376 { | |
2377 CASE_CONVERT: | |
2378 c1 = VEC_PACK_TRUNC_EXPR; | |
2379 break; | |
2380 | |
2381 case FIX_TRUNC_EXPR: | |
2382 c1 = VEC_PACK_FIX_TRUNC_EXPR; | |
2383 break; | |
2384 | |
2385 case FLOAT_EXPR: | |
2386 /* ??? Not yet implemented due to missing VEC_PACK_FLOAT_EXPR | |
2387 tree code and optabs used for computing the operation. */ | |
2388 return false; | |
2389 | |
2390 default: | |
2391 gcc_unreachable (); | |
2392 } | |
2393 | |
2394 if (code == FIX_TRUNC_EXPR) | |
2395 /* The signedness is determined from output operand. */ | |
2396 optab1 = optab_for_tree_code (c1, type, optab_default); | |
2397 else | |
2398 optab1 = optab_for_tree_code (c1, vectype, optab_default); | |
2399 | |
2400 if (!optab1) | |
2401 return false; | |
2402 | |
2403 vec_mode = TYPE_MODE (vectype); | |
2404 if ((icode1 = optab_handler (optab1, vec_mode)->insn_code) | |
2405 == CODE_FOR_nothing) | |
2406 return false; | |
2407 | |
2408 /* Check if it's a multi-step conversion that can be done using intermediate | |
2409 types. */ | |
2410 if (insn_data[icode1].operand[0].mode != TYPE_MODE (narrow_vectype)) | |
2411 { | |
2412 enum machine_mode intermediate_mode, prev_mode = vec_mode; | |
2413 | |
2414 *code1 = c1; | |
2415 prev_type = vectype; | |
2416 /* We assume here that there will not be more than MAX_INTERM_CVT_STEPS | |
2417 intermediate steps in promotion sequence. We try MAX_INTERM_CVT_STEPS | |
2418 to get to NARROW_VECTYPE, and fail if we do not. */ | |
2419 *interm_types = VEC_alloc (tree, heap, MAX_INTERM_CVT_STEPS); | |
2420 for (i = 0; i < 3; i++) | |
2421 { | |
2422 intermediate_mode = insn_data[icode1].operand[0].mode; | |
2423 intermediate_type = lang_hooks.types.type_for_mode (intermediate_mode, | |
2424 TYPE_UNSIGNED (prev_type)); | |
2425 interm_optab = optab_for_tree_code (c1, intermediate_type, | |
2426 optab_default); | |
2427 if (!interm_optab | |
2428 || (icode1 = optab1->handlers[(int) prev_mode].insn_code) | |
2429 == CODE_FOR_nothing | |
2430 || insn_data[icode1].operand[0].mode != intermediate_mode | |
2431 || (icode1 | |
2432 = interm_optab->handlers[(int) intermediate_mode].insn_code) | |
2433 == CODE_FOR_nothing) | |
2434 return false; | |
2435 | |
2436 VEC_quick_push (tree, *interm_types, intermediate_type); | |
2437 (*multi_step_cvt)++; | |
2438 | |
2439 if (insn_data[icode1].operand[0].mode == TYPE_MODE (narrow_vectype)) | |
2440 return true; | |
2441 | |
2442 prev_type = intermediate_type; | |
2443 prev_mode = intermediate_mode; | |
2444 } | |
2445 | |
2446 return false; | |
2447 } | |
2448 | |
2449 *code1 = c1; | |
2450 return true; | |
2451 } | |
2452 | |
2453 | |
2454 /* Function reduction_code_for_scalar_code | |
2455 | |
2456 Input: | |
2457 CODE - tree_code of a reduction operations. | |
2458 | |
2459 Output: | |
2460 REDUC_CODE - the corresponding tree-code to be used to reduce the | |
2461 vector of partial results into a single scalar result (which | |
2462 will also reside in a vector). | |
2463 | |
2464 Return TRUE if a corresponding REDUC_CODE was found, FALSE otherwise. */ | |
2465 | |
2466 bool | |
2467 reduction_code_for_scalar_code (enum tree_code code, | |
2468 enum tree_code *reduc_code) | |
2469 { | |
2470 switch (code) | |
2471 { | |
2472 case MAX_EXPR: | |
2473 *reduc_code = REDUC_MAX_EXPR; | |
2474 return true; | |
2475 | |
2476 case MIN_EXPR: | |
2477 *reduc_code = REDUC_MIN_EXPR; | |
2478 return true; | |
2479 | |
2480 case PLUS_EXPR: | |
2481 *reduc_code = REDUC_PLUS_EXPR; | |
2482 return true; | |
2483 | |
2484 default: | |
2485 return false; | |
2486 } | |
2487 } | |
2488 | |
2489 /* Error reporting helper for vect_is_simple_reduction below. GIMPLE statement | |
2490 STMT is printed with a message MSG. */ | |
2491 | |
2492 static void | |
2493 report_vect_op (gimple stmt, const char *msg) | |
2494 { | |
2495 fprintf (vect_dump, "%s", msg); | |
2496 print_gimple_stmt (vect_dump, stmt, 0, TDF_SLIM); | |
2497 } | |
2498 | |
2499 /* Function vect_is_simple_reduction | |
2500 | |
2501 Detect a cross-iteration def-use cycle that represents a simple | |
2502 reduction computation. We look for the following pattern: | |
2503 | |
2504 loop_header: | |
2505 a1 = phi < a0, a2 > | |
2506 a3 = ... | |
2507 a2 = operation (a3, a1) | |
2508 | |
2509 such that: | |
2510 1. operation is commutative and associative and it is safe to | |
2511 change the order of the computation. | |
2512 2. no uses for a2 in the loop (a2 is used out of the loop) | |
2513 3. no uses of a1 in the loop besides the reduction operation. | |
2514 | |
2515 Condition 1 is tested here. | |
2516 Conditions 2,3 are tested in vect_mark_stmts_to_be_vectorized. */ | |
2517 | |
2518 gimple | |
2519 vect_is_simple_reduction (loop_vec_info loop_info, gimple phi) | |
2520 { | |
2521 struct loop *loop = (gimple_bb (phi))->loop_father; | |
2522 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info); | |
2523 edge latch_e = loop_latch_edge (loop); | |
2524 tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e); | |
2525 gimple def_stmt, def1, def2; | |
2526 enum tree_code code; | |
2527 tree op1, op2; | |
2528 tree type; | |
2529 int nloop_uses; | |
2530 tree name; | |
2531 imm_use_iterator imm_iter; | |
2532 use_operand_p use_p; | |
2533 | |
2534 gcc_assert (loop == vect_loop || flow_loop_nested_p (vect_loop, loop)); | |
2535 | |
2536 name = PHI_RESULT (phi); | |
2537 nloop_uses = 0; | |
2538 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name) | |
2539 { | |
2540 gimple use_stmt = USE_STMT (use_p); | |
2541 if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)) | |
2542 && vinfo_for_stmt (use_stmt) | |
2543 && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt))) | |
2544 nloop_uses++; | |
2545 if (nloop_uses > 1) | |
2546 { | |
2547 if (vect_print_dump_info (REPORT_DETAILS)) | |
2548 fprintf (vect_dump, "reduction used in loop."); | |
2549 return NULL; | |
2550 } | |
2551 } | |
2552 | |
2553 if (TREE_CODE (loop_arg) != SSA_NAME) | |
2554 { | |
2555 if (vect_print_dump_info (REPORT_DETAILS)) | |
2556 { | |
2557 fprintf (vect_dump, "reduction: not ssa_name: "); | |
2558 print_generic_expr (vect_dump, loop_arg, TDF_SLIM); | |
2559 } | |
2560 return NULL; | |
2561 } | |
2562 | |
2563 def_stmt = SSA_NAME_DEF_STMT (loop_arg); | |
2564 if (!def_stmt) | |
2565 { | |
2566 if (vect_print_dump_info (REPORT_DETAILS)) | |
2567 fprintf (vect_dump, "reduction: no def_stmt."); | |
2568 return NULL; | |
2569 } | |
2570 | |
2571 if (!is_gimple_assign (def_stmt)) | |
2572 { | |
2573 if (vect_print_dump_info (REPORT_DETAILS)) | |
2574 print_gimple_stmt (vect_dump, def_stmt, 0, TDF_SLIM); | |
2575 return NULL; | |
2576 } | |
2577 | |
2578 name = gimple_assign_lhs (def_stmt); | |
2579 nloop_uses = 0; | |
2580 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name) | |
2581 { | |
2582 gimple use_stmt = USE_STMT (use_p); | |
2583 if (flow_bb_inside_loop_p (loop, gimple_bb (use_stmt)) | |
2584 && vinfo_for_stmt (use_stmt) | |
2585 && !is_pattern_stmt_p (vinfo_for_stmt (use_stmt))) | |
2586 nloop_uses++; | |
2587 if (nloop_uses > 1) | |
2588 { | |
2589 if (vect_print_dump_info (REPORT_DETAILS)) | |
2590 fprintf (vect_dump, "reduction used in loop."); | |
2591 return NULL; | |
2592 } | |
2593 } | |
2594 | |
2595 code = gimple_assign_rhs_code (def_stmt); | |
2596 | |
2597 if (!commutative_tree_code (code) || !associative_tree_code (code)) | |
2598 { | |
2599 if (vect_print_dump_info (REPORT_DETAILS)) | |
2600 report_vect_op (def_stmt, "reduction: not commutative/associative: "); | |
2601 return NULL; | |
2602 } | |
2603 | |
2604 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS) | |
2605 { | |
2606 if (vect_print_dump_info (REPORT_DETAILS)) | |
2607 report_vect_op (def_stmt, "reduction: not binary operation: "); | |
2608 return NULL; | |
2609 } | |
2610 | |
2611 op1 = gimple_assign_rhs1 (def_stmt); | |
2612 op2 = gimple_assign_rhs2 (def_stmt); | |
2613 if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME) | |
2614 { | |
2615 if (vect_print_dump_info (REPORT_DETAILS)) | |
2616 report_vect_op (def_stmt, "reduction: uses not ssa_names: "); | |
2617 return NULL; | |
2618 } | |
2619 | |
2620 /* Check that it's ok to change the order of the computation. */ | |
2621 type = TREE_TYPE (gimple_assign_lhs (def_stmt)); | |
2622 if (TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op1)) | |
2623 || TYPE_MAIN_VARIANT (type) != TYPE_MAIN_VARIANT (TREE_TYPE (op2))) | |
2624 { | |
2625 if (vect_print_dump_info (REPORT_DETAILS)) | |
2626 { | |
2627 fprintf (vect_dump, "reduction: multiple types: operation type: "); | |
2628 print_generic_expr (vect_dump, type, TDF_SLIM); | |
2629 fprintf (vect_dump, ", operands types: "); | |
2630 print_generic_expr (vect_dump, TREE_TYPE (op1), TDF_SLIM); | |
2631 fprintf (vect_dump, ","); | |
2632 print_generic_expr (vect_dump, TREE_TYPE (op2), TDF_SLIM); | |
2633 } | |
2634 return NULL; | |
2635 } | |
2636 | |
2637 /* Generally, when vectorizing a reduction we change the order of the | |
2638 computation. This may change the behavior of the program in some | |
2639 cases, so we need to check that this is ok. One exception is when | |
2640 vectorizing an outer-loop: the inner-loop is executed sequentially, | |
2641 and therefore vectorizing reductions in the inner-loop during | |
2642 outer-loop vectorization is safe. */ | |
2643 | |
2644 /* CHECKME: check for !flag_finite_math_only too? */ | |
2645 if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math | |
2646 && !nested_in_vect_loop_p (vect_loop, def_stmt)) | |
2647 { | |
2648 /* Changing the order of operations changes the semantics. */ | |
2649 if (vect_print_dump_info (REPORT_DETAILS)) | |
2650 report_vect_op (def_stmt, "reduction: unsafe fp math optimization: "); | |
2651 return NULL; | |
2652 } | |
2653 else if (INTEGRAL_TYPE_P (type) && TYPE_OVERFLOW_TRAPS (type) | |
2654 && !nested_in_vect_loop_p (vect_loop, def_stmt)) | |
2655 { | |
2656 /* Changing the order of operations changes the semantics. */ | |
2657 if (vect_print_dump_info (REPORT_DETAILS)) | |
2658 report_vect_op (def_stmt, "reduction: unsafe int math optimization: "); | |
2659 return NULL; | |
2660 } | |
2661 else if (SAT_FIXED_POINT_TYPE_P (type)) | |
2662 { | |
2663 /* Changing the order of operations changes the semantics. */ | |
2664 if (vect_print_dump_info (REPORT_DETAILS)) | |
2665 report_vect_op (def_stmt, | |
2666 "reduction: unsafe fixed-point math optimization: "); | |
2667 return NULL; | |
2668 } | |
2669 | |
2670 /* reduction is safe. we're dealing with one of the following: | |
2671 1) integer arithmetic and no trapv | |
2672 2) floating point arithmetic, and special flags permit this optimization. | |
2673 */ | |
2674 def1 = SSA_NAME_DEF_STMT (op1); | |
2675 def2 = SSA_NAME_DEF_STMT (op2); | |
2676 if (!def1 || !def2 || gimple_nop_p (def1) || gimple_nop_p (def2)) | |
2677 { | |
2678 if (vect_print_dump_info (REPORT_DETAILS)) | |
2679 report_vect_op (def_stmt, "reduction: no defs for operands: "); | |
2680 return NULL; | |
2681 } | |
2682 | |
2683 | |
2684 /* Check that one def is the reduction def, defined by PHI, | |
2685 the other def is either defined in the loop ("vect_loop_def"), | |
2686 or it's an induction (defined by a loop-header phi-node). */ | |
2687 | |
2688 if (def2 == phi | |
2689 && flow_bb_inside_loop_p (loop, gimple_bb (def1)) | |
2690 && (is_gimple_assign (def1) | |
2691 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) == vect_induction_def | |
2692 || (gimple_code (def1) == GIMPLE_PHI | |
2693 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) == vect_loop_def | |
2694 && !is_loop_header_bb_p (gimple_bb (def1))))) | |
2695 { | |
2696 if (vect_print_dump_info (REPORT_DETAILS)) | |
2697 report_vect_op (def_stmt, "detected reduction:"); | |
2698 return def_stmt; | |
2699 } | |
2700 else if (def1 == phi | |
2701 && flow_bb_inside_loop_p (loop, gimple_bb (def2)) | |
2702 && (is_gimple_assign (def2) | |
2703 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2)) == vect_induction_def | |
2704 || (gimple_code (def2) == GIMPLE_PHI | |
2705 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2)) == vect_loop_def | |
2706 && !is_loop_header_bb_p (gimple_bb (def2))))) | |
2707 { | |
2708 /* Swap operands (just for simplicity - so that the rest of the code | |
2709 can assume that the reduction variable is always the last (second) | |
2710 argument). */ | |
2711 if (vect_print_dump_info (REPORT_DETAILS)) | |
2712 report_vect_op (def_stmt , | |
2713 "detected reduction: need to swap operands:"); | |
2714 swap_tree_operands (def_stmt, gimple_assign_rhs1_ptr (def_stmt), | |
2715 gimple_assign_rhs2_ptr (def_stmt)); | |
2716 return def_stmt; | |
2717 } | |
2718 else | |
2719 { | |
2720 if (vect_print_dump_info (REPORT_DETAILS)) | |
2721 report_vect_op (def_stmt, "reduction: unknown pattern."); | |
2722 return NULL; | |
2723 } | |
2724 } | |
2725 | |
2726 | |
2727 /* Function vect_is_simple_iv_evolution. | |
2728 | |
2729 FORNOW: A simple evolution of an induction variables in the loop is | |
2730 considered a polynomial evolution with constant step. */ | |
2731 | |
2732 bool | |
2733 vect_is_simple_iv_evolution (unsigned loop_nb, tree access_fn, tree * init, | |
2734 tree * step) | |
2735 { | |
2736 tree init_expr; | |
2737 tree step_expr; | |
2738 tree evolution_part = evolution_part_in_loop_num (access_fn, loop_nb); | |
2739 | |
2740 /* When there is no evolution in this loop, the evolution function | |
2741 is not "simple". */ | |
2742 if (evolution_part == NULL_TREE) | |
2743 return false; | |
2744 | |
2745 /* When the evolution is a polynomial of degree >= 2 | |
2746 the evolution function is not "simple". */ | |
2747 if (tree_is_chrec (evolution_part)) | |
2748 return false; | |
2749 | |
2750 step_expr = evolution_part; | |
2751 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb)); | |
2752 | |
2753 if (vect_print_dump_info (REPORT_DETAILS)) | |
2754 { | |
2755 fprintf (vect_dump, "step: "); | |
2756 print_generic_expr (vect_dump, step_expr, TDF_SLIM); | |
2757 fprintf (vect_dump, ", init: "); | |
2758 print_generic_expr (vect_dump, init_expr, TDF_SLIM); | |
2759 } | |
2760 | |
2761 *init = init_expr; | |
2762 *step = step_expr; | |
2763 | |
2764 if (TREE_CODE (step_expr) != INTEGER_CST) | |
2765 { | |
2766 if (vect_print_dump_info (REPORT_DETAILS)) | |
2767 fprintf (vect_dump, "step unknown."); | |
2768 return false; | |
2769 } | |
2770 | |
2771 return true; | |
2772 } | |
2773 | |
2774 | |
2775 /* Function vectorize_loops. | |
2776 | |
2777 Entry Point to loop vectorization phase. */ | |
2778 | |
2779 unsigned | |
2780 vectorize_loops (void) | |
2781 { | |
2782 unsigned int i; | |
2783 unsigned int num_vectorized_loops = 0; | |
2784 unsigned int vect_loops_num; | |
2785 loop_iterator li; | |
2786 struct loop *loop; | |
2787 | |
2788 vect_loops_num = number_of_loops (); | |
2789 | |
2790 /* Bail out if there are no loops. */ | |
2791 if (vect_loops_num <= 1) | |
2792 return 0; | |
2793 | |
2794 /* Fix the verbosity level if not defined explicitly by the user. */ | |
2795 vect_set_dump_settings (); | |
2796 | |
2797 /* Allocate the bitmap that records which virtual variables that | |
2798 need to be renamed. */ | |
2799 vect_memsyms_to_rename = BITMAP_ALLOC (NULL); | |
2800 | |
2801 init_stmt_vec_info_vec (); | |
2802 | |
2803 /* ----------- Analyze loops. ----------- */ | |
2804 | |
2805 /* If some loop was duplicated, it gets bigger number | |
2806 than all previously defined loops. This fact allows us to run | |
2807 only over initial loops skipping newly generated ones. */ | |
2808 FOR_EACH_LOOP (li, loop, 0) | |
2809 if (optimize_loop_nest_for_speed_p (loop)) | |
2810 { | |
2811 loop_vec_info loop_vinfo; | |
2812 | |
2813 vect_loop_location = find_loop_location (loop); | |
2814 loop_vinfo = vect_analyze_loop (loop); | |
2815 loop->aux = loop_vinfo; | |
2816 | |
2817 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo)) | |
2818 continue; | |
2819 | |
2820 vect_transform_loop (loop_vinfo); | |
2821 num_vectorized_loops++; | |
2822 } | |
2823 vect_loop_location = UNKNOWN_LOC; | |
2824 | |
2825 statistics_counter_event (cfun, "Vectorized loops", num_vectorized_loops); | |
2826 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOOPS) | |
2827 || (vect_print_dump_info (REPORT_VECTORIZED_LOOPS) | |
2828 && num_vectorized_loops > 0)) | |
2829 fprintf (vect_dump, "vectorized %u loops in function.\n", | |
2830 num_vectorized_loops); | |
2831 | |
2832 /* ----------- Finalize. ----------- */ | |
2833 | |
2834 BITMAP_FREE (vect_memsyms_to_rename); | |
2835 | |
2836 for (i = 1; i < vect_loops_num; i++) | |
2837 { | |
2838 loop_vec_info loop_vinfo; | |
2839 | |
2840 loop = get_loop (i); | |
2841 if (!loop) | |
2842 continue; | |
2843 loop_vinfo = (loop_vec_info) loop->aux; | |
2844 destroy_loop_vec_info (loop_vinfo, true); | |
2845 loop->aux = NULL; | |
2846 } | |
2847 | |
2848 free_stmt_vec_info_vec (); | |
2849 | |
2850 return num_vectorized_loops > 0 ? TODO_cleanup_cfg : 0; | |
2851 } | |
2852 | |
2853 /* Increase alignment of global arrays to improve vectorization potential. | |
2854 TODO: | |
2855 - Consider also structs that have an array field. | |
2856 - Use ipa analysis to prune arrays that can't be vectorized? | |
2857 This should involve global alignment analysis and in the future also | |
2858 array padding. */ | |
2859 | |
2860 static unsigned int | |
2861 increase_alignment (void) | |
2862 { | |
2863 struct varpool_node *vnode; | |
2864 | |
2865 /* Increase the alignment of all global arrays for vectorization. */ | |
2866 for (vnode = varpool_nodes_queue; | |
2867 vnode; | |
2868 vnode = vnode->next_needed) | |
2869 { | |
2870 tree vectype, decl = vnode->decl; | |
2871 unsigned int alignment; | |
2872 | |
2873 if (TREE_CODE (TREE_TYPE (decl)) != ARRAY_TYPE) | |
2874 continue; | |
2875 vectype = get_vectype_for_scalar_type (TREE_TYPE (TREE_TYPE (decl))); | |
2876 if (!vectype) | |
2877 continue; | |
2878 alignment = TYPE_ALIGN (vectype); | |
2879 if (DECL_ALIGN (decl) >= alignment) | |
2880 continue; | |
2881 | |
2882 if (vect_can_force_dr_alignment_p (decl, alignment)) | |
2883 { | |
2884 DECL_ALIGN (decl) = TYPE_ALIGN (vectype); | |
2885 DECL_USER_ALIGN (decl) = 1; | |
2886 if (dump_file) | |
2887 { | |
2888 fprintf (dump_file, "Increasing alignment of decl: "); | |
2889 print_generic_expr (dump_file, decl, TDF_SLIM); | |
2890 } | |
2891 } | |
2892 } | |
2893 return 0; | |
2894 } | |
2895 | |
2896 static bool | |
2897 gate_increase_alignment (void) | |
2898 { | |
2899 return flag_section_anchors && flag_tree_vectorize; | |
2900 } | |
2901 | |
2902 struct simple_ipa_opt_pass pass_ipa_increase_alignment = | |
2903 { | |
2904 { | |
2905 SIMPLE_IPA_PASS, | |
2906 "increase_alignment", /* name */ | |
2907 gate_increase_alignment, /* gate */ | |
2908 increase_alignment, /* execute */ | |
2909 NULL, /* sub */ | |
2910 NULL, /* next */ | |
2911 0, /* static_pass_number */ | |
2912 0, /* tv_id */ | |
2913 0, /* properties_required */ | |
2914 0, /* properties_provided */ | |
2915 0, /* properties_destroyed */ | |
2916 0, /* todo_flags_start */ | |
2917 0 /* todo_flags_finish */ | |
2918 } | |
2919 }; |