Mercurial > hg > CbC > CbC_gcc
annotate gcc/tree-vectorizer.c @ 111:04ced10e8804
gcc 7
author | kono |
---|---|
date | Fri, 27 Oct 2017 22:46:09 +0900 |
parents | f6334be47118 |
children | 84e7813d76e9 |
rev | line source |
---|---|
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1 /* Vectorizer |
111 | 2 Copyright (C) 2003-2017 Free Software Foundation, Inc. |
0 | 3 Contributed by Dorit Naishlos <dorit@il.ibm.com> |
4 | |
5 This file is part of GCC. | |
6 | |
7 GCC is free software; you can redistribute it and/or modify it under | |
8 the terms of the GNU General Public License as published by the Free | |
9 Software Foundation; either version 3, or (at your option) any later | |
10 version. | |
11 | |
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 for more details. | |
16 | |
17 You should have received a copy of the GNU General Public License | |
18 along with GCC; see the file COPYING3. If not see | |
19 <http://www.gnu.org/licenses/>. */ | |
20 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
21 /* Loop and basic block vectorizer. |
0 | 22 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
23 This file contains drivers for the three vectorizers: |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
24 (1) loop vectorizer (inter-iteration parallelism), |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
25 (2) loop-aware SLP (intra-iteration parallelism) (invoked by the loop |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
26 vectorizer) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
27 (3) BB vectorizer (out-of-loops), aka SLP |
0 | 28 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
29 The rest of the vectorizer's code is organized as follows: |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
30 - tree-vect-loop.c - loop specific parts such as reductions, etc. These are |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
31 used by drivers (1) and (2). |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
32 - tree-vect-loop-manip.c - vectorizer's loop control-flow utilities, used by |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
33 drivers (1) and (2). |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
34 - tree-vect-slp.c - BB vectorization specific analysis and transformation, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
35 used by drivers (2) and (3). |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
36 - tree-vect-stmts.c - statements analysis and transformation (used by all). |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
37 - tree-vect-data-refs.c - vectorizer specific data-refs analysis and |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
38 manipulations (used by all). |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
39 - tree-vect-patterns.c - vectorizable code patterns detector (used by all) |
0 | 40 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
41 Here's a poor attempt at illustrating that: |
0 | 42 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
43 tree-vectorizer.c: |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
44 loop_vect() loop_aware_slp() slp_vect() |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
45 | / \ / |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
46 | / \ / |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
47 tree-vect-loop.c tree-vect-slp.c |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
48 | \ \ / / | |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
49 | \ \/ / | |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
50 | \ /\ / | |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
51 | \ / \ / | |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
52 tree-vect-stmts.c tree-vect-data-refs.c |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
53 \ / |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
54 tree-vect-patterns.c |
0 | 55 */ |
56 | |
57 #include "config.h" | |
58 #include "system.h" | |
59 #include "coretypes.h" | |
111 | 60 #include "backend.h" |
0 | 61 #include "tree.h" |
111 | 62 #include "gimple.h" |
63 #include "predict.h" | |
64 #include "tree-pass.h" | |
65 #include "ssa.h" | |
66 #include "cgraph.h" | |
67 #include "fold-const.h" | |
68 #include "stor-layout.h" | |
69 #include "gimple-iterator.h" | |
70 #include "gimple-walk.h" | |
71 #include "tree-ssa-loop-manip.h" | |
72 #include "tree-ssa-loop-niter.h" | |
73 #include "tree-cfg.h" | |
0 | 74 #include "cfgloop.h" |
75 #include "tree-vectorizer.h" | |
111 | 76 #include "tree-ssa-propagate.h" |
77 #include "dbgcnt.h" | |
78 #include "tree-scalar-evolution.h" | |
79 #include "stringpool.h" | |
80 #include "attribs.h" | |
0 | 81 |
82 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
83 /* Loop or bb location. */ |
111 | 84 source_location vect_location; |
0 | 85 |
86 /* Vector mapping GIMPLE stmt to stmt_vec_info. */ | |
111 | 87 vec<stmt_vec_info> stmt_vec_info_vec; |
0 | 88 |
111 | 89 /* For mapping simduid to vectorization factor. */ |
0 | 90 |
111 | 91 struct simduid_to_vf : free_ptr_hash<simduid_to_vf> |
92 { | |
93 unsigned int simduid; | |
94 int vf; | |
95 | |
96 /* hash_table support. */ | |
97 static inline hashval_t hash (const simduid_to_vf *); | |
98 static inline int equal (const simduid_to_vf *, const simduid_to_vf *); | |
99 }; | |
100 | |
101 inline hashval_t | |
102 simduid_to_vf::hash (const simduid_to_vf *p) | |
103 { | |
104 return p->simduid; | |
105 } | |
106 | |
107 inline int | |
108 simduid_to_vf::equal (const simduid_to_vf *p1, const simduid_to_vf *p2) | |
109 { | |
110 return p1->simduid == p2->simduid; | |
111 } | |
112 | |
113 /* This hash maps the OMP simd array to the corresponding simduid used | |
114 to index into it. Like thus, | |
0 | 115 |
111 | 116 _7 = GOMP_SIMD_LANE (simduid.0) |
117 ... | |
118 ... | |
119 D.1737[_7] = stuff; | |
120 | |
121 | |
122 This hash maps from the OMP simd array (D.1737[]) to DECL_UID of | |
123 simduid.0. */ | |
124 | |
125 struct simd_array_to_simduid : free_ptr_hash<simd_array_to_simduid> | |
126 { | |
127 tree decl; | |
128 unsigned int simduid; | |
129 | |
130 /* hash_table support. */ | |
131 static inline hashval_t hash (const simd_array_to_simduid *); | |
132 static inline int equal (const simd_array_to_simduid *, | |
133 const simd_array_to_simduid *); | |
134 }; | |
135 | |
136 inline hashval_t | |
137 simd_array_to_simduid::hash (const simd_array_to_simduid *p) | |
138 { | |
139 return DECL_UID (p->decl); | |
140 } | |
141 | |
142 inline int | |
143 simd_array_to_simduid::equal (const simd_array_to_simduid *p1, | |
144 const simd_array_to_simduid *p2) | |
145 { | |
146 return p1->decl == p2->decl; | |
147 } | |
148 | |
149 /* Fold IFN_GOMP_SIMD_LANE, IFN_GOMP_SIMD_VF, IFN_GOMP_SIMD_LAST_LANE, | |
150 into their corresponding constants and remove | |
151 IFN_GOMP_SIMD_ORDERED_{START,END}. */ | |
0 | 152 |
153 static void | |
111 | 154 adjust_simduid_builtins (hash_table<simduid_to_vf> *htab) |
0 | 155 { |
111 | 156 basic_block bb; |
0 | 157 |
111 | 158 FOR_EACH_BB_FN (bb, cfun) |
0 | 159 { |
111 | 160 gimple_stmt_iterator i; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
161 |
111 | 162 for (i = gsi_start_bb (bb); !gsi_end_p (i); ) |
163 { | |
164 unsigned int vf = 1; | |
165 enum internal_fn ifn; | |
166 gimple *stmt = gsi_stmt (i); | |
167 tree t; | |
168 if (!is_gimple_call (stmt) | |
169 || !gimple_call_internal_p (stmt)) | |
170 { | |
171 gsi_next (&i); | |
172 continue; | |
173 } | |
174 ifn = gimple_call_internal_fn (stmt); | |
175 switch (ifn) | |
176 { | |
177 case IFN_GOMP_SIMD_LANE: | |
178 case IFN_GOMP_SIMD_VF: | |
179 case IFN_GOMP_SIMD_LAST_LANE: | |
180 break; | |
181 case IFN_GOMP_SIMD_ORDERED_START: | |
182 case IFN_GOMP_SIMD_ORDERED_END: | |
183 if (integer_onep (gimple_call_arg (stmt, 0))) | |
184 { | |
185 enum built_in_function bcode | |
186 = (ifn == IFN_GOMP_SIMD_ORDERED_START | |
187 ? BUILT_IN_GOMP_ORDERED_START | |
188 : BUILT_IN_GOMP_ORDERED_END); | |
189 gimple *g | |
190 = gimple_build_call (builtin_decl_explicit (bcode), 0); | |
191 tree vdef = gimple_vdef (stmt); | |
192 gimple_set_vdef (g, vdef); | |
193 SSA_NAME_DEF_STMT (vdef) = g; | |
194 gimple_set_vuse (g, gimple_vuse (stmt)); | |
195 gsi_replace (&i, g, true); | |
196 continue; | |
197 } | |
198 gsi_remove (&i, true); | |
199 unlink_stmt_vdef (stmt); | |
200 continue; | |
201 default: | |
202 gsi_next (&i); | |
203 continue; | |
204 } | |
205 tree arg = gimple_call_arg (stmt, 0); | |
206 gcc_assert (arg != NULL_TREE); | |
207 gcc_assert (TREE_CODE (arg) == SSA_NAME); | |
208 simduid_to_vf *p = NULL, data; | |
209 data.simduid = DECL_UID (SSA_NAME_VAR (arg)); | |
210 /* Need to nullify loop safelen field since it's value is not | |
211 valid after transformation. */ | |
212 if (bb->loop_father && bb->loop_father->safelen > 0) | |
213 bb->loop_father->safelen = 0; | |
214 if (htab) | |
215 { | |
216 p = htab->find (&data); | |
217 if (p) | |
218 vf = p->vf; | |
219 } | |
220 switch (ifn) | |
221 { | |
222 case IFN_GOMP_SIMD_VF: | |
223 t = build_int_cst (unsigned_type_node, vf); | |
224 break; | |
225 case IFN_GOMP_SIMD_LANE: | |
226 t = build_int_cst (unsigned_type_node, 0); | |
227 break; | |
228 case IFN_GOMP_SIMD_LAST_LANE: | |
229 t = gimple_call_arg (stmt, 1); | |
230 break; | |
231 default: | |
232 gcc_unreachable (); | |
233 } | |
234 tree lhs = gimple_call_lhs (stmt); | |
235 if (lhs) | |
236 replace_uses_by (lhs, t); | |
237 release_defs (stmt); | |
238 gsi_remove (&i, true); | |
239 } | |
0 | 240 } |
111 | 241 } |
0 | 242 |
111 | 243 /* Helper structure for note_simd_array_uses. */ |
244 | |
245 struct note_simd_array_uses_struct | |
246 { | |
247 hash_table<simd_array_to_simduid> **htab; | |
248 unsigned int simduid; | |
249 }; | |
250 | |
251 /* Callback for note_simd_array_uses, called through walk_gimple_op. */ | |
252 | |
253 static tree | |
254 note_simd_array_uses_cb (tree *tp, int *walk_subtrees, void *data) | |
255 { | |
256 struct walk_stmt_info *wi = (struct walk_stmt_info *) data; | |
257 struct note_simd_array_uses_struct *ns | |
258 = (struct note_simd_array_uses_struct *) wi->info; | |
0 | 259 |
111 | 260 if (TYPE_P (*tp)) |
261 *walk_subtrees = 0; | |
262 else if (VAR_P (*tp) | |
263 && lookup_attribute ("omp simd array", DECL_ATTRIBUTES (*tp)) | |
264 && DECL_CONTEXT (*tp) == current_function_decl) | |
265 { | |
266 simd_array_to_simduid data; | |
267 if (!*ns->htab) | |
268 *ns->htab = new hash_table<simd_array_to_simduid> (15); | |
269 data.decl = *tp; | |
270 data.simduid = ns->simduid; | |
271 simd_array_to_simduid **slot = (*ns->htab)->find_slot (&data, INSERT); | |
272 if (*slot == NULL) | |
273 { | |
274 simd_array_to_simduid *p = XNEW (simd_array_to_simduid); | |
275 *p = data; | |
276 *slot = p; | |
277 } | |
278 else if ((*slot)->simduid != ns->simduid) | |
279 (*slot)->simduid = -1U; | |
280 *walk_subtrees = 0; | |
281 } | |
282 return NULL_TREE; | |
0 | 283 } |
284 | |
111 | 285 /* Find "omp simd array" temporaries and map them to corresponding |
286 simduid. */ | |
0 | 287 |
111 | 288 static void |
289 note_simd_array_uses (hash_table<simd_array_to_simduid> **htab) | |
290 { | |
291 basic_block bb; | |
292 gimple_stmt_iterator gsi; | |
293 struct walk_stmt_info wi; | |
294 struct note_simd_array_uses_struct ns; | |
295 | |
296 memset (&wi, 0, sizeof (wi)); | |
297 wi.info = &ns; | |
298 ns.htab = htab; | |
299 | |
300 FOR_EACH_BB_FN (bb, cfun) | |
301 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
302 { | |
303 gimple *stmt = gsi_stmt (gsi); | |
304 if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt)) | |
305 continue; | |
306 switch (gimple_call_internal_fn (stmt)) | |
307 { | |
308 case IFN_GOMP_SIMD_LANE: | |
309 case IFN_GOMP_SIMD_VF: | |
310 case IFN_GOMP_SIMD_LAST_LANE: | |
311 break; | |
312 default: | |
313 continue; | |
314 } | |
315 tree lhs = gimple_call_lhs (stmt); | |
316 if (lhs == NULL_TREE) | |
317 continue; | |
318 imm_use_iterator use_iter; | |
319 gimple *use_stmt; | |
320 ns.simduid = DECL_UID (SSA_NAME_VAR (gimple_call_arg (stmt, 0))); | |
321 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, lhs) | |
322 if (!is_gimple_debug (use_stmt)) | |
323 walk_gimple_op (use_stmt, note_simd_array_uses_cb, &wi); | |
324 } | |
325 } | |
326 | |
327 /* Shrink arrays with "omp simd array" attribute to the corresponding | |
328 vectorization factor. */ | |
0 | 329 |
111 | 330 static void |
331 shrink_simd_arrays | |
332 (hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab, | |
333 hash_table<simduid_to_vf> *simduid_to_vf_htab) | |
334 { | |
335 for (hash_table<simd_array_to_simduid>::iterator iter | |
336 = simd_array_to_simduid_htab->begin (); | |
337 iter != simd_array_to_simduid_htab->end (); ++iter) | |
338 if ((*iter)->simduid != -1U) | |
339 { | |
340 tree decl = (*iter)->decl; | |
341 int vf = 1; | |
342 if (simduid_to_vf_htab) | |
343 { | |
344 simduid_to_vf *p = NULL, data; | |
345 data.simduid = (*iter)->simduid; | |
346 p = simduid_to_vf_htab->find (&data); | |
347 if (p) | |
348 vf = p->vf; | |
349 } | |
350 tree atype | |
351 = build_array_type_nelts (TREE_TYPE (TREE_TYPE (decl)), vf); | |
352 TREE_TYPE (decl) = atype; | |
353 relayout_decl (decl); | |
354 } | |
355 | |
356 delete simd_array_to_simduid_htab; | |
357 } | |
358 | |
359 /* Initialize the vec_info with kind KIND_IN and target cost data | |
360 TARGET_COST_DATA_IN. */ | |
361 | |
362 vec_info::vec_info (vec_info::vec_kind kind_in, void *target_cost_data_in) | |
363 : kind (kind_in), | |
364 datarefs (vNULL), | |
365 ddrs (vNULL), | |
366 target_cost_data (target_cost_data_in) | |
367 { | |
368 } | |
369 | |
370 vec_info::~vec_info () | |
371 { | |
372 slp_instance instance; | |
373 struct data_reference *dr; | |
374 unsigned int i; | |
375 | |
376 FOR_EACH_VEC_ELT (datarefs, i, dr) | |
377 if (dr->aux) | |
378 { | |
379 free (dr->aux); | |
380 dr->aux = NULL; | |
381 } | |
382 | |
383 FOR_EACH_VEC_ELT (slp_instances, i, instance) | |
384 vect_free_slp_instance (instance); | |
385 | |
386 free_data_refs (datarefs); | |
387 free_dependence_relations (ddrs); | |
388 destroy_cost_data (target_cost_data); | |
389 } | |
390 | |
391 /* A helper function to free scev and LOOP niter information, as well as | |
392 clear loop constraint LOOP_C_FINITE. */ | |
393 | |
394 void | |
395 vect_free_loop_info_assumptions (struct loop *loop) | |
396 { | |
397 scev_reset_htab (); | |
398 /* We need to explicitly reset upper bound information since they are | |
399 used even after free_numbers_of_iterations_estimates. */ | |
400 loop->any_upper_bound = false; | |
401 loop->any_likely_upper_bound = false; | |
402 free_numbers_of_iterations_estimates (loop); | |
403 loop_constraint_clear (loop, LOOP_C_FINITE); | |
404 } | |
405 | |
406 /* Return whether STMT is inside the region we try to vectorize. */ | |
0 | 407 |
408 bool | |
111 | 409 vect_stmt_in_region_p (vec_info *vinfo, gimple *stmt) |
0 | 410 { |
111 | 411 if (!gimple_bb (stmt)) |
0 | 412 return false; |
413 | |
111 | 414 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo)) |
415 { | |
416 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
417 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt))) | |
418 return false; | |
419 } | |
0 | 420 else |
111 | 421 { |
422 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo); | |
423 if (gimple_bb (stmt) != BB_VINFO_BB (bb_vinfo) | |
424 || gimple_uid (stmt) == -1U | |
425 || gimple_code (stmt) == GIMPLE_PHI) | |
426 return false; | |
427 } | |
0 | 428 |
429 return true; | |
430 } | |
431 | |
432 | |
111 | 433 /* If LOOP has been versioned during ifcvt, return the internal call |
434 guarding it. */ | |
435 | |
436 static gimple * | |
437 vect_loop_vectorized_call (struct loop *loop) | |
438 { | |
439 basic_block bb = loop_preheader_edge (loop)->src; | |
440 gimple *g; | |
441 do | |
442 { | |
443 g = last_stmt (bb); | |
444 if (g) | |
445 break; | |
446 if (!single_pred_p (bb)) | |
447 break; | |
448 bb = single_pred (bb); | |
449 } | |
450 while (1); | |
451 if (g && gimple_code (g) == GIMPLE_COND) | |
452 { | |
453 gimple_stmt_iterator gsi = gsi_for_stmt (g); | |
454 gsi_prev (&gsi); | |
455 if (!gsi_end_p (gsi)) | |
456 { | |
457 g = gsi_stmt (gsi); | |
458 if (gimple_call_internal_p (g, IFN_LOOP_VECTORIZED) | |
459 && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->num | |
460 || tree_to_shwi (gimple_call_arg (g, 1)) == loop->num)) | |
461 return g; | |
462 } | |
463 } | |
464 return NULL; | |
465 } | |
466 | |
467 /* Fold loop internal call G like IFN_LOOP_VECTORIZED/IFN_LOOP_DIST_ALIAS | |
468 to VALUE and update any immediate uses of it's LHS. */ | |
469 | |
470 static void | |
471 fold_loop_internal_call (gimple *g, tree value) | |
472 { | |
473 tree lhs = gimple_call_lhs (g); | |
474 use_operand_p use_p; | |
475 imm_use_iterator iter; | |
476 gimple *use_stmt; | |
477 gimple_stmt_iterator gsi = gsi_for_stmt (g); | |
478 | |
479 update_call_from_tree (&gsi, value); | |
480 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) | |
481 { | |
482 FOR_EACH_IMM_USE_ON_STMT (use_p, iter) | |
483 SET_USE (use_p, value); | |
484 update_stmt (use_stmt); | |
485 } | |
486 } | |
487 | |
488 /* If LOOP has been versioned during loop distribution, return the gurading | |
489 internal call. */ | |
490 | |
491 static gimple * | |
492 vect_loop_dist_alias_call (struct loop *loop) | |
493 { | |
494 basic_block bb; | |
495 basic_block entry; | |
496 struct loop *outer, *orig; | |
497 gimple_stmt_iterator gsi; | |
498 gimple *g; | |
499 | |
500 if (loop->orig_loop_num == 0) | |
501 return NULL; | |
502 | |
503 orig = get_loop (cfun, loop->orig_loop_num); | |
504 if (orig == NULL) | |
505 { | |
506 /* The original loop is somehow destroyed. Clear the information. */ | |
507 loop->orig_loop_num = 0; | |
508 return NULL; | |
509 } | |
510 | |
511 if (loop != orig) | |
512 bb = nearest_common_dominator (CDI_DOMINATORS, loop->header, orig->header); | |
513 else | |
514 bb = loop_preheader_edge (loop)->src; | |
515 | |
516 outer = bb->loop_father; | |
517 entry = ENTRY_BLOCK_PTR_FOR_FN (cfun); | |
518 | |
519 /* Look upward in dominance tree. */ | |
520 for (; bb != entry && flow_bb_inside_loop_p (outer, bb); | |
521 bb = get_immediate_dominator (CDI_DOMINATORS, bb)) | |
522 { | |
523 g = last_stmt (bb); | |
524 if (g == NULL || gimple_code (g) != GIMPLE_COND) | |
525 continue; | |
526 | |
527 gsi = gsi_for_stmt (g); | |
528 gsi_prev (&gsi); | |
529 if (gsi_end_p (gsi)) | |
530 continue; | |
531 | |
532 g = gsi_stmt (gsi); | |
533 /* The guarding internal function call must have the same distribution | |
534 alias id. */ | |
535 if (gimple_call_internal_p (g, IFN_LOOP_DIST_ALIAS) | |
536 && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->orig_loop_num)) | |
537 return g; | |
538 } | |
539 return NULL; | |
540 } | |
541 | |
542 /* Set the uids of all the statements in basic blocks inside loop | |
543 represented by LOOP_VINFO. LOOP_VECTORIZED_CALL is the internal | |
544 call guarding the loop which has been if converted. */ | |
545 static void | |
546 set_uid_loop_bbs (loop_vec_info loop_vinfo, gimple *loop_vectorized_call) | |
547 { | |
548 tree arg = gimple_call_arg (loop_vectorized_call, 1); | |
549 basic_block *bbs; | |
550 unsigned int i; | |
551 struct loop *scalar_loop = get_loop (cfun, tree_to_shwi (arg)); | |
552 | |
553 LOOP_VINFO_SCALAR_LOOP (loop_vinfo) = scalar_loop; | |
554 gcc_checking_assert (vect_loop_vectorized_call (scalar_loop) | |
555 == loop_vectorized_call); | |
556 /* If we are going to vectorize outer loop, prevent vectorization | |
557 of the inner loop in the scalar loop - either the scalar loop is | |
558 thrown away, so it is a wasted work, or is used only for | |
559 a few iterations. */ | |
560 if (scalar_loop->inner) | |
561 { | |
562 gimple *g = vect_loop_vectorized_call (scalar_loop->inner); | |
563 if (g) | |
564 { | |
565 arg = gimple_call_arg (g, 0); | |
566 get_loop (cfun, tree_to_shwi (arg))->dont_vectorize = true; | |
567 fold_loop_internal_call (g, boolean_false_node); | |
568 } | |
569 } | |
570 bbs = get_loop_body (scalar_loop); | |
571 for (i = 0; i < scalar_loop->num_nodes; i++) | |
572 { | |
573 basic_block bb = bbs[i]; | |
574 gimple_stmt_iterator gsi; | |
575 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
576 { | |
577 gimple *phi = gsi_stmt (gsi); | |
578 gimple_set_uid (phi, 0); | |
579 } | |
580 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
581 { | |
582 gimple *stmt = gsi_stmt (gsi); | |
583 gimple_set_uid (stmt, 0); | |
584 } | |
585 } | |
586 free (bbs); | |
587 } | |
588 | |
0 | 589 /* Function vectorize_loops. |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
590 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
591 Entry point to loop vectorization phase. */ |
0 | 592 |
593 unsigned | |
594 vectorize_loops (void) | |
595 { | |
596 unsigned int i; | |
597 unsigned int num_vectorized_loops = 0; | |
598 unsigned int vect_loops_num; | |
599 struct loop *loop; | |
111 | 600 hash_table<simduid_to_vf> *simduid_to_vf_htab = NULL; |
601 hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL; | |
602 bool any_ifcvt_loops = false; | |
603 unsigned ret = 0; | |
604 struct loop *new_loop; | |
0 | 605 |
111 | 606 vect_loops_num = number_of_loops (cfun); |
0 | 607 |
608 /* Bail out if there are no loops. */ | |
609 if (vect_loops_num <= 1) | |
610 return 0; | |
611 | |
111 | 612 if (cfun->has_simduid_loops) |
613 note_simd_array_uses (&simd_array_to_simduid_htab); | |
0 | 614 |
615 init_stmt_vec_info_vec (); | |
616 | |
617 /* ----------- Analyze loops. ----------- */ | |
618 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
619 /* If some loop was duplicated, it gets bigger number |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
620 than all previously defined loops. This fact allows us to run |
0 | 621 only over initial loops skipping newly generated ones. */ |
111 | 622 FOR_EACH_LOOP (loop, 0) |
623 if (loop->dont_vectorize) | |
0 | 624 { |
111 | 625 any_ifcvt_loops = true; |
626 /* If-conversion sometimes versions both the outer loop | |
627 (for the case when outer loop vectorization might be | |
628 desirable) as well as the inner loop in the scalar version | |
629 of the loop. So we have: | |
630 if (LOOP_VECTORIZED (1, 3)) | |
631 { | |
632 loop1 | |
633 loop2 | |
634 } | |
635 else | |
636 loop3 (copy of loop1) | |
637 if (LOOP_VECTORIZED (4, 5)) | |
638 loop4 (copy of loop2) | |
639 else | |
640 loop5 (copy of loop4) | |
641 If FOR_EACH_LOOP gives us loop3 first (which has | |
642 dont_vectorize set), make sure to process loop1 before loop4; | |
643 so that we can prevent vectorization of loop4 if loop1 | |
644 is successfully vectorized. */ | |
645 if (loop->inner) | |
646 { | |
647 gimple *loop_vectorized_call | |
648 = vect_loop_vectorized_call (loop); | |
649 if (loop_vectorized_call | |
650 && vect_loop_vectorized_call (loop->inner)) | |
651 { | |
652 tree arg = gimple_call_arg (loop_vectorized_call, 0); | |
653 struct loop *vector_loop | |
654 = get_loop (cfun, tree_to_shwi (arg)); | |
655 if (vector_loop && vector_loop != loop) | |
656 { | |
657 loop = vector_loop; | |
658 /* Make sure we don't vectorize it twice. */ | |
659 loop->dont_vectorize = true; | |
660 goto try_vectorize; | |
661 } | |
662 } | |
663 } | |
664 } | |
665 else | |
666 { | |
667 loop_vec_info loop_vinfo, orig_loop_vinfo; | |
668 gimple *loop_vectorized_call, *loop_dist_alias_call; | |
669 try_vectorize: | |
670 if (!((flag_tree_loop_vectorize | |
671 && optimize_loop_nest_for_speed_p (loop)) | |
672 || loop->force_vectorize)) | |
673 continue; | |
674 orig_loop_vinfo = NULL; | |
675 loop_vectorized_call = vect_loop_vectorized_call (loop); | |
676 loop_dist_alias_call = vect_loop_dist_alias_call (loop); | |
677 vectorize_epilogue: | |
678 vect_location = find_loop_location (loop); | |
679 if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION | |
680 && dump_enabled_p ()) | |
681 dump_printf (MSG_NOTE, "\nAnalyzing loop at %s:%d\n", | |
682 LOCATION_FILE (vect_location), | |
683 LOCATION_LINE (vect_location)); | |
0 | 684 |
111 | 685 loop_vinfo = vect_analyze_loop (loop, orig_loop_vinfo); |
0 | 686 loop->aux = loop_vinfo; |
687 | |
688 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo)) | |
111 | 689 { |
690 /* Free existing information if loop is analyzed with some | |
691 assumptions. */ | |
692 if (loop_constraint_set_p (loop, LOOP_C_FINITE)) | |
693 vect_free_loop_info_assumptions (loop); | |
694 | |
695 /* If we applied if-conversion then try to vectorize the | |
696 BB of innermost loops. | |
697 ??? Ideally BB vectorization would learn to vectorize | |
698 control flow by applying if-conversion on-the-fly, the | |
699 following retains the if-converted loop body even when | |
700 only non-if-converted parts took part in BB vectorization. */ | |
701 if (flag_tree_slp_vectorize != 0 | |
702 && loop_vectorized_call | |
703 && ! loop->inner) | |
704 { | |
705 basic_block bb = loop->header; | |
706 bool has_mask_load_store = false; | |
707 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); | |
708 !gsi_end_p (gsi); gsi_next (&gsi)) | |
709 { | |
710 gimple *stmt = gsi_stmt (gsi); | |
711 if (is_gimple_call (stmt) | |
712 && gimple_call_internal_p (stmt) | |
713 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD | |
714 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE)) | |
715 { | |
716 has_mask_load_store = true; | |
717 break; | |
718 } | |
719 gimple_set_uid (stmt, -1); | |
720 gimple_set_visited (stmt, false); | |
721 } | |
722 if (! has_mask_load_store && vect_slp_bb (bb)) | |
723 { | |
724 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, | |
725 "basic block vectorized\n"); | |
726 fold_loop_internal_call (loop_vectorized_call, | |
727 boolean_true_node); | |
728 loop_vectorized_call = NULL; | |
729 ret |= TODO_cleanup_cfg; | |
730 } | |
731 } | |
732 /* If outer loop vectorization fails for LOOP_VECTORIZED guarded | |
733 loop, don't vectorize its inner loop; we'll attempt to | |
734 vectorize LOOP_VECTORIZED guarded inner loop of the scalar | |
735 loop version. */ | |
736 if (loop_vectorized_call && loop->inner) | |
737 loop->inner->dont_vectorize = true; | |
738 continue; | |
739 } | |
0 | 740 |
111 | 741 if (!dbg_cnt (vect_loop)) |
742 { | |
743 /* We may miss some if-converted loops due to | |
744 debug counter. Set any_ifcvt_loops to visit | |
745 them at finalization. */ | |
746 any_ifcvt_loops = true; | |
747 /* Free existing information if loop is analyzed with some | |
748 assumptions. */ | |
749 if (loop_constraint_set_p (loop, LOOP_C_FINITE)) | |
750 vect_free_loop_info_assumptions (loop); | |
751 | |
752 break; | |
753 } | |
754 | |
755 if (loop_vectorized_call) | |
756 set_uid_loop_bbs (loop_vinfo, loop_vectorized_call); | |
757 if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION | |
758 && dump_enabled_p ()) | |
759 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, | |
760 "loop vectorized\n"); | |
761 new_loop = vect_transform_loop (loop_vinfo); | |
0 | 762 num_vectorized_loops++; |
111 | 763 /* Now that the loop has been vectorized, allow it to be unrolled |
764 etc. */ | |
765 loop->force_vectorize = false; | |
766 | |
767 if (loop->simduid) | |
768 { | |
769 simduid_to_vf *simduid_to_vf_data = XNEW (simduid_to_vf); | |
770 if (!simduid_to_vf_htab) | |
771 simduid_to_vf_htab = new hash_table<simduid_to_vf> (15); | |
772 simduid_to_vf_data->simduid = DECL_UID (loop->simduid); | |
773 simduid_to_vf_data->vf = loop_vinfo->vectorization_factor; | |
774 *simduid_to_vf_htab->find_slot (simduid_to_vf_data, INSERT) | |
775 = simduid_to_vf_data; | |
776 } | |
777 | |
778 if (loop_vectorized_call) | |
779 { | |
780 fold_loop_internal_call (loop_vectorized_call, boolean_true_node); | |
781 loop_vectorized_call = NULL; | |
782 ret |= TODO_cleanup_cfg; | |
783 } | |
784 if (loop_dist_alias_call) | |
785 { | |
786 tree value = gimple_call_arg (loop_dist_alias_call, 1); | |
787 fold_loop_internal_call (loop_dist_alias_call, value); | |
788 loop_dist_alias_call = NULL; | |
789 ret |= TODO_cleanup_cfg; | |
790 } | |
791 | |
792 if (new_loop) | |
793 { | |
794 /* Epilogue of vectorized loop must be vectorized too. */ | |
795 vect_loops_num = number_of_loops (cfun); | |
796 loop = new_loop; | |
797 orig_loop_vinfo = loop_vinfo; /* To pass vect_analyze_loop. */ | |
798 goto vectorize_epilogue; | |
799 } | |
0 | 800 } |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
801 |
111 | 802 vect_location = UNKNOWN_LOCATION; |
0 | 803 |
804 statistics_counter_event (cfun, "Vectorized loops", num_vectorized_loops); | |
111 | 805 if (dump_enabled_p () |
806 || (num_vectorized_loops > 0 && dump_enabled_p ())) | |
807 dump_printf_loc (MSG_NOTE, vect_location, | |
808 "vectorized %u loops in function.\n", | |
809 num_vectorized_loops); | |
0 | 810 |
811 /* ----------- Finalize. ----------- */ | |
812 | |
111 | 813 if (any_ifcvt_loops) |
814 for (i = 1; i < vect_loops_num; i++) | |
815 { | |
816 loop = get_loop (cfun, i); | |
817 if (loop && loop->dont_vectorize) | |
818 { | |
819 gimple *g = vect_loop_vectorized_call (loop); | |
820 if (g) | |
821 { | |
822 fold_loop_internal_call (g, boolean_false_node); | |
823 ret |= TODO_cleanup_cfg; | |
824 g = NULL; | |
825 } | |
826 else | |
827 g = vect_loop_dist_alias_call (loop); | |
828 | |
829 if (g) | |
830 { | |
831 fold_loop_internal_call (g, boolean_false_node); | |
832 ret |= TODO_cleanup_cfg; | |
833 } | |
834 } | |
835 } | |
0 | 836 |
837 for (i = 1; i < vect_loops_num; i++) | |
838 { | |
839 loop_vec_info loop_vinfo; | |
111 | 840 bool has_mask_store; |
0 | 841 |
111 | 842 loop = get_loop (cfun, i); |
0 | 843 if (!loop) |
844 continue; | |
845 loop_vinfo = (loop_vec_info) loop->aux; | |
111 | 846 has_mask_store = false; |
847 if (loop_vinfo) | |
848 has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo); | |
849 delete loop_vinfo; | |
850 if (has_mask_store) | |
851 optimize_mask_stores (loop); | |
0 | 852 loop->aux = NULL; |
853 } | |
854 | |
855 free_stmt_vec_info_vec (); | |
856 | |
111 | 857 /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins. */ |
858 if (cfun->has_simduid_loops) | |
859 adjust_simduid_builtins (simduid_to_vf_htab); | |
860 | |
861 /* Shrink any "omp array simd" temporary arrays to the | |
862 actual vectorization factors. */ | |
863 if (simd_array_to_simduid_htab) | |
864 shrink_simd_arrays (simd_array_to_simduid_htab, simduid_to_vf_htab); | |
865 delete simduid_to_vf_htab; | |
866 cfun->has_simduid_loops = false; | |
867 | |
868 if (num_vectorized_loops > 0) | |
869 { | |
870 /* If we vectorized any loop only virtual SSA form needs to be updated. | |
871 ??? Also while we try hard to update loop-closed SSA form we fail | |
872 to properly do this in some corner-cases (see PR56286). */ | |
873 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals); | |
874 return TODO_cleanup_cfg; | |
875 } | |
876 | |
877 return ret; | |
878 } | |
879 | |
880 | |
881 /* Entry point to the simduid cleanup pass. */ | |
882 | |
883 namespace { | |
884 | |
885 const pass_data pass_data_simduid_cleanup = | |
886 { | |
887 GIMPLE_PASS, /* type */ | |
888 "simduid", /* name */ | |
889 OPTGROUP_NONE, /* optinfo_flags */ | |
890 TV_NONE, /* tv_id */ | |
891 ( PROP_ssa | PROP_cfg ), /* properties_required */ | |
892 0, /* properties_provided */ | |
893 0, /* properties_destroyed */ | |
894 0, /* todo_flags_start */ | |
895 0, /* todo_flags_finish */ | |
896 }; | |
897 | |
898 class pass_simduid_cleanup : public gimple_opt_pass | |
899 { | |
900 public: | |
901 pass_simduid_cleanup (gcc::context *ctxt) | |
902 : gimple_opt_pass (pass_data_simduid_cleanup, ctxt) | |
903 {} | |
904 | |
905 /* opt_pass methods: */ | |
906 opt_pass * clone () { return new pass_simduid_cleanup (m_ctxt); } | |
907 virtual bool gate (function *fun) { return fun->has_simduid_loops; } | |
908 virtual unsigned int execute (function *); | |
909 | |
910 }; // class pass_simduid_cleanup | |
911 | |
912 unsigned int | |
913 pass_simduid_cleanup::execute (function *fun) | |
914 { | |
915 hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL; | |
916 | |
917 note_simd_array_uses (&simd_array_to_simduid_htab); | |
918 | |
919 /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins. */ | |
920 adjust_simduid_builtins (NULL); | |
921 | |
922 /* Shrink any "omp array simd" temporary arrays to the | |
923 actual vectorization factors. */ | |
924 if (simd_array_to_simduid_htab) | |
925 shrink_simd_arrays (simd_array_to_simduid_htab, NULL); | |
926 fun->has_simduid_loops = false; | |
927 return 0; | |
928 } | |
929 | |
930 } // anon namespace | |
931 | |
932 gimple_opt_pass * | |
933 make_pass_simduid_cleanup (gcc::context *ctxt) | |
934 { | |
935 return new pass_simduid_cleanup (ctxt); | |
0 | 936 } |
937 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
938 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
939 /* Entry point to basic block SLP phase. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
940 |
111 | 941 namespace { |
942 | |
943 const pass_data pass_data_slp_vectorize = | |
944 { | |
945 GIMPLE_PASS, /* type */ | |
946 "slp", /* name */ | |
947 OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */ | |
948 TV_TREE_SLP_VECTORIZATION, /* tv_id */ | |
949 ( PROP_ssa | PROP_cfg ), /* properties_required */ | |
950 0, /* properties_provided */ | |
951 0, /* properties_destroyed */ | |
952 0, /* todo_flags_start */ | |
953 TODO_update_ssa, /* todo_flags_finish */ | |
954 }; | |
955 | |
956 class pass_slp_vectorize : public gimple_opt_pass | |
957 { | |
958 public: | |
959 pass_slp_vectorize (gcc::context *ctxt) | |
960 : gimple_opt_pass (pass_data_slp_vectorize, ctxt) | |
961 {} | |
962 | |
963 /* opt_pass methods: */ | |
964 opt_pass * clone () { return new pass_slp_vectorize (m_ctxt); } | |
965 virtual bool gate (function *) { return flag_tree_slp_vectorize != 0; } | |
966 virtual unsigned int execute (function *); | |
967 | |
968 }; // class pass_slp_vectorize | |
969 | |
970 unsigned int | |
971 pass_slp_vectorize::execute (function *fun) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
972 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
973 basic_block bb; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
974 |
111 | 975 bool in_loop_pipeline = scev_initialized_p (); |
976 if (!in_loop_pipeline) | |
977 { | |
978 loop_optimizer_init (LOOPS_NORMAL); | |
979 scev_initialize (); | |
980 } | |
981 | |
982 /* Mark all stmts as not belonging to the current region and unvisited. */ | |
983 FOR_EACH_BB_FN (bb, fun) | |
984 { | |
985 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); | |
986 gsi_next (&gsi)) | |
987 { | |
988 gimple *stmt = gsi_stmt (gsi); | |
989 gimple_set_uid (stmt, -1); | |
990 gimple_set_visited (stmt, false); | |
991 } | |
992 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
993 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
994 init_stmt_vec_info_vec (); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
995 |
111 | 996 FOR_EACH_BB_FN (bb, fun) |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
997 { |
111 | 998 if (vect_slp_bb (bb)) |
999 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, | |
1000 "basic block vectorized\n"); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1001 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1002 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1003 free_stmt_vec_info_vec (); |
111 | 1004 |
1005 if (!in_loop_pipeline) | |
1006 { | |
1007 scev_finalize (); | |
1008 loop_optimizer_finalize (); | |
1009 } | |
1010 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1011 return 0; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1012 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1013 |
111 | 1014 } // anon namespace |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1015 |
111 | 1016 gimple_opt_pass * |
1017 make_pass_slp_vectorize (gcc::context *ctxt) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1018 { |
111 | 1019 return new pass_slp_vectorize (ctxt); |
1020 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1021 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1022 |
0 | 1023 /* Increase alignment of global arrays to improve vectorization potential. |
1024 TODO: | |
1025 - Consider also structs that have an array field. | |
1026 - Use ipa analysis to prune arrays that can't be vectorized? | |
1027 This should involve global alignment analysis and in the future also | |
1028 array padding. */ | |
1029 | |
111 | 1030 static unsigned get_vec_alignment_for_type (tree); |
1031 static hash_map<tree, unsigned> *type_align_map; | |
1032 | |
1033 /* Return alignment of array's vector type corresponding to scalar type. | |
1034 0 if no vector type exists. */ | |
1035 static unsigned | |
1036 get_vec_alignment_for_array_type (tree type) | |
1037 { | |
1038 gcc_assert (TREE_CODE (type) == ARRAY_TYPE); | |
1039 | |
1040 tree vectype = get_vectype_for_scalar_type (strip_array_types (type)); | |
1041 if (!vectype | |
1042 || !TYPE_SIZE (type) | |
1043 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST | |
1044 || tree_int_cst_lt (TYPE_SIZE (type), TYPE_SIZE (vectype))) | |
1045 return 0; | |
1046 | |
1047 return TYPE_ALIGN (vectype); | |
1048 } | |
1049 | |
1050 /* Return alignment of field having maximum alignment of vector type | |
1051 corresponding to it's scalar type. For now, we only consider fields whose | |
1052 offset is a multiple of it's vector alignment. | |
1053 0 if no suitable field is found. */ | |
1054 static unsigned | |
1055 get_vec_alignment_for_record_type (tree type) | |
1056 { | |
1057 gcc_assert (TREE_CODE (type) == RECORD_TYPE); | |
1058 | |
1059 unsigned max_align = 0, alignment; | |
1060 HOST_WIDE_INT offset; | |
1061 tree offset_tree; | |
1062 | |
1063 if (TYPE_PACKED (type)) | |
1064 return 0; | |
1065 | |
1066 unsigned *slot = type_align_map->get (type); | |
1067 if (slot) | |
1068 return *slot; | |
1069 | |
1070 for (tree field = first_field (type); | |
1071 field != NULL_TREE; | |
1072 field = DECL_CHAIN (field)) | |
1073 { | |
1074 /* Skip if not FIELD_DECL or if alignment is set by user. */ | |
1075 if (TREE_CODE (field) != FIELD_DECL | |
1076 || DECL_USER_ALIGN (field) | |
1077 || DECL_ARTIFICIAL (field)) | |
1078 continue; | |
1079 | |
1080 /* We don't need to process the type further if offset is variable, | |
1081 since the offsets of remaining members will also be variable. */ | |
1082 if (TREE_CODE (DECL_FIELD_OFFSET (field)) != INTEGER_CST | |
1083 || TREE_CODE (DECL_FIELD_BIT_OFFSET (field)) != INTEGER_CST) | |
1084 break; | |
1085 | |
1086 /* Similarly stop processing the type if offset_tree | |
1087 does not fit in unsigned HOST_WIDE_INT. */ | |
1088 offset_tree = bit_position (field); | |
1089 if (!tree_fits_uhwi_p (offset_tree)) | |
1090 break; | |
1091 | |
1092 offset = tree_to_uhwi (offset_tree); | |
1093 alignment = get_vec_alignment_for_type (TREE_TYPE (field)); | |
1094 | |
1095 /* Get maximum alignment of vectorized field/array among those members | |
1096 whose offset is multiple of the vector alignment. */ | |
1097 if (alignment | |
1098 && (offset % alignment == 0) | |
1099 && (alignment > max_align)) | |
1100 max_align = alignment; | |
1101 } | |
1102 | |
1103 type_align_map->put (type, max_align); | |
1104 return max_align; | |
1105 } | |
1106 | |
1107 /* Return alignment of vector type corresponding to decl's scalar type | |
1108 or 0 if it doesn't exist or the vector alignment is lesser than | |
1109 decl's alignment. */ | |
1110 static unsigned | |
1111 get_vec_alignment_for_type (tree type) | |
1112 { | |
1113 if (type == NULL_TREE) | |
1114 return 0; | |
1115 | |
1116 gcc_assert (TYPE_P (type)); | |
1117 | |
1118 static unsigned alignment = 0; | |
1119 switch (TREE_CODE (type)) | |
1120 { | |
1121 case ARRAY_TYPE: | |
1122 alignment = get_vec_alignment_for_array_type (type); | |
1123 break; | |
1124 case RECORD_TYPE: | |
1125 alignment = get_vec_alignment_for_record_type (type); | |
1126 break; | |
1127 default: | |
1128 alignment = 0; | |
1129 break; | |
1130 } | |
1131 | |
1132 return (alignment > TYPE_ALIGN (type)) ? alignment : 0; | |
1133 } | |
1134 | |
1135 /* Entry point to increase_alignment pass. */ | |
0 | 1136 static unsigned int |
1137 increase_alignment (void) | |
1138 { | |
111 | 1139 varpool_node *vnode; |
1140 | |
1141 vect_location = UNKNOWN_LOCATION; | |
1142 type_align_map = new hash_map<tree, unsigned>; | |
0 | 1143 |
1144 /* Increase the alignment of all global arrays for vectorization. */ | |
111 | 1145 FOR_EACH_DEFINED_VARIABLE (vnode) |
0 | 1146 { |
111 | 1147 tree decl = vnode->decl; |
0 | 1148 unsigned int alignment; |
1149 | |
111 | 1150 if ((decl_in_symtab_p (decl) |
1151 && !symtab_node::get (decl)->can_increase_alignment_p ()) | |
1152 || DECL_USER_ALIGN (decl) || DECL_ARTIFICIAL (decl)) | |
1153 continue; | |
0 | 1154 |
111 | 1155 alignment = get_vec_alignment_for_type (TREE_TYPE (decl)); |
1156 if (alignment && vect_can_force_dr_alignment_p (decl, alignment)) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1157 { |
111 | 1158 vnode->increase_alignment (alignment); |
1159 dump_printf (MSG_NOTE, "Increasing alignment of decl: "); | |
1160 dump_generic_expr (MSG_NOTE, TDF_SLIM, decl); | |
1161 dump_printf (MSG_NOTE, "\n"); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1162 } |
0 | 1163 } |
111 | 1164 |
1165 delete type_align_map; | |
0 | 1166 return 0; |
1167 } | |
1168 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1169 |
111 | 1170 namespace { |
1171 | |
1172 const pass_data pass_data_ipa_increase_alignment = | |
0 | 1173 { |
111 | 1174 SIMPLE_IPA_PASS, /* type */ |
1175 "increase_alignment", /* name */ | |
1176 OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */ | |
1177 TV_IPA_OPT, /* tv_id */ | |
1178 0, /* properties_required */ | |
1179 0, /* properties_provided */ | |
1180 0, /* properties_destroyed */ | |
1181 0, /* todo_flags_start */ | |
1182 0, /* todo_flags_finish */ | |
1183 }; | |
0 | 1184 |
111 | 1185 class pass_ipa_increase_alignment : public simple_ipa_opt_pass |
0 | 1186 { |
111 | 1187 public: |
1188 pass_ipa_increase_alignment (gcc::context *ctxt) | |
1189 : simple_ipa_opt_pass (pass_data_ipa_increase_alignment, ctxt) | |
1190 {} | |
1191 | |
1192 /* opt_pass methods: */ | |
1193 virtual bool gate (function *) | |
1194 { | |
1195 return flag_section_anchors && flag_tree_loop_vectorize; | |
1196 } | |
1197 | |
1198 virtual unsigned int execute (function *) { return increase_alignment (); } | |
1199 | |
1200 }; // class pass_ipa_increase_alignment | |
1201 | |
1202 } // anon namespace | |
1203 | |
1204 simple_ipa_opt_pass * | |
1205 make_pass_ipa_increase_alignment (gcc::context *ctxt) | |
1206 { | |
1207 return new pass_ipa_increase_alignment (ctxt); | |
1208 } |