Mercurial > hg > CbC > CbC_gcc
comparison gcc/tree-vectorizer.c @ 111:04ced10e8804
gcc 7
author | kono |
---|---|
date | Fri, 27 Oct 2017 22:46:09 +0900 |
parents | f6334be47118 |
children | 84e7813d76e9 |
comparison
equal
deleted
inserted
replaced
68:561a7518be6b | 111:04ced10e8804 |
---|---|
1 /* Vectorizer | 1 /* Vectorizer |
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 | 2 Copyright (C) 2003-2017 Free Software Foundation, Inc. |
3 Free Software Foundation, Inc. | |
4 Contributed by Dorit Naishlos <dorit@il.ibm.com> | 3 Contributed by Dorit Naishlos <dorit@il.ibm.com> |
5 | 4 |
6 This file is part of GCC. | 5 This file is part of GCC. |
7 | 6 |
8 GCC is free software; you can redistribute it and/or modify it under | 7 GCC is free software; you can redistribute it and/or modify it under |
56 */ | 55 */ |
57 | 56 |
58 #include "config.h" | 57 #include "config.h" |
59 #include "system.h" | 58 #include "system.h" |
60 #include "coretypes.h" | 59 #include "coretypes.h" |
61 #include "tm.h" | 60 #include "backend.h" |
62 #include "ggc.h" | |
63 #include "tree.h" | 61 #include "tree.h" |
64 #include "tree-pretty-print.h" | 62 #include "gimple.h" |
65 #include "tree-flow.h" | 63 #include "predict.h" |
66 #include "tree-dump.h" | 64 #include "tree-pass.h" |
65 #include "ssa.h" | |
66 #include "cgraph.h" | |
67 #include "fold-const.h" | |
68 #include "stor-layout.h" | |
69 #include "gimple-iterator.h" | |
70 #include "gimple-walk.h" | |
71 #include "tree-ssa-loop-manip.h" | |
72 #include "tree-ssa-loop-niter.h" | |
73 #include "tree-cfg.h" | |
67 #include "cfgloop.h" | 74 #include "cfgloop.h" |
68 #include "cfglayout.h" | |
69 #include "tree-vectorizer.h" | 75 #include "tree-vectorizer.h" |
70 #include "tree-pass.h" | 76 #include "tree-ssa-propagate.h" |
71 #include "timevar.h" | 77 #include "dbgcnt.h" |
72 | 78 #include "tree-scalar-evolution.h" |
73 /* vect_dump will be set to stderr or dump_file if exist. */ | 79 #include "stringpool.h" |
74 FILE *vect_dump; | 80 #include "attribs.h" |
75 | 81 |
76 /* vect_verbosity_level set to an invalid value | |
77 to mark that it's uninitialized. */ | |
78 static enum vect_verbosity_levels vect_verbosity_level = MAX_VERBOSITY_LEVEL; | |
79 | 82 |
80 /* Loop or bb location. */ | 83 /* Loop or bb location. */ |
81 LOC vect_location; | 84 source_location vect_location; |
82 | 85 |
83 /* Vector mapping GIMPLE stmt to stmt_vec_info. */ | 86 /* Vector mapping GIMPLE stmt to stmt_vec_info. */ |
84 VEC(vec_void_p,heap) *stmt_vec_info_vec; | 87 vec<stmt_vec_info> stmt_vec_info_vec; |
85 | |
86 | 88 |
87 | 89 /* For mapping simduid to vectorization factor. */ |
88 /* Function vect_set_dump_settings. | 90 |
89 | 91 struct simduid_to_vf : free_ptr_hash<simduid_to_vf> |
90 Fix the verbosity level of the vectorizer if the | 92 { |
91 requested level was not set explicitly using the flag | 93 unsigned int simduid; |
92 -ftree-vectorizer-verbose=N. | 94 int vf; |
93 Decide where to print the debugging information (dump_file/stderr). | 95 |
94 If the user defined the verbosity level, but there is no dump file, | 96 /* hash_table support. */ |
95 print to stderr, otherwise print to the dump file. */ | 97 static inline hashval_t hash (const simduid_to_vf *); |
98 static inline int equal (const simduid_to_vf *, const simduid_to_vf *); | |
99 }; | |
100 | |
101 inline hashval_t | |
102 simduid_to_vf::hash (const simduid_to_vf *p) | |
103 { | |
104 return p->simduid; | |
105 } | |
106 | |
107 inline int | |
108 simduid_to_vf::equal (const simduid_to_vf *p1, const simduid_to_vf *p2) | |
109 { | |
110 return p1->simduid == p2->simduid; | |
111 } | |
112 | |
113 /* This hash maps the OMP simd array to the corresponding simduid used | |
114 to index into it. Like thus, | |
115 | |
116 _7 = GOMP_SIMD_LANE (simduid.0) | |
117 ... | |
118 ... | |
119 D.1737[_7] = stuff; | |
120 | |
121 | |
122 This hash maps from the OMP simd array (D.1737[]) to DECL_UID of | |
123 simduid.0. */ | |
124 | |
125 struct simd_array_to_simduid : free_ptr_hash<simd_array_to_simduid> | |
126 { | |
127 tree decl; | |
128 unsigned int simduid; | |
129 | |
130 /* hash_table support. */ | |
131 static inline hashval_t hash (const simd_array_to_simduid *); | |
132 static inline int equal (const simd_array_to_simduid *, | |
133 const simd_array_to_simduid *); | |
134 }; | |
135 | |
136 inline hashval_t | |
137 simd_array_to_simduid::hash (const simd_array_to_simduid *p) | |
138 { | |
139 return DECL_UID (p->decl); | |
140 } | |
141 | |
142 inline int | |
143 simd_array_to_simduid::equal (const simd_array_to_simduid *p1, | |
144 const simd_array_to_simduid *p2) | |
145 { | |
146 return p1->decl == p2->decl; | |
147 } | |
148 | |
149 /* Fold IFN_GOMP_SIMD_LANE, IFN_GOMP_SIMD_VF, IFN_GOMP_SIMD_LAST_LANE, | |
150 into their corresponding constants and remove | |
151 IFN_GOMP_SIMD_ORDERED_{START,END}. */ | |
96 | 152 |
97 static void | 153 static void |
98 vect_set_dump_settings (bool slp) | 154 adjust_simduid_builtins (hash_table<simduid_to_vf> *htab) |
99 { | 155 { |
100 vect_dump = dump_file; | 156 basic_block bb; |
101 | 157 |
102 /* Check if the verbosity level was defined by the user: */ | 158 FOR_EACH_BB_FN (bb, cfun) |
103 if (user_vect_verbosity_level != MAX_VERBOSITY_LEVEL) | 159 { |
104 { | 160 gimple_stmt_iterator i; |
105 vect_verbosity_level = user_vect_verbosity_level; | 161 |
106 /* Ignore user defined verbosity if dump flags require higher level of | 162 for (i = gsi_start_bb (bb); !gsi_end_p (i); ) |
107 verbosity. */ | 163 { |
108 if (dump_file) | 164 unsigned int vf = 1; |
109 { | 165 enum internal_fn ifn; |
110 if (((dump_flags & TDF_DETAILS) | 166 gimple *stmt = gsi_stmt (i); |
111 && vect_verbosity_level >= REPORT_DETAILS) | 167 tree t; |
112 || ((dump_flags & TDF_STATS) | 168 if (!is_gimple_call (stmt) |
113 && vect_verbosity_level >= REPORT_UNVECTORIZED_LOCATIONS)) | 169 || !gimple_call_internal_p (stmt)) |
114 return; | 170 { |
115 } | 171 gsi_next (&i); |
116 else | 172 continue; |
117 { | 173 } |
118 /* If there is no dump file, print to stderr in case of loop | 174 ifn = gimple_call_internal_fn (stmt); |
119 vectorization. */ | 175 switch (ifn) |
120 if (!slp) | 176 { |
121 vect_dump = stderr; | 177 case IFN_GOMP_SIMD_LANE: |
122 | 178 case IFN_GOMP_SIMD_VF: |
123 return; | 179 case IFN_GOMP_SIMD_LAST_LANE: |
124 } | 180 break; |
125 } | 181 case IFN_GOMP_SIMD_ORDERED_START: |
126 | 182 case IFN_GOMP_SIMD_ORDERED_END: |
127 /* User didn't specify verbosity level: */ | 183 if (integer_onep (gimple_call_arg (stmt, 0))) |
128 if (dump_file && (dump_flags & TDF_DETAILS)) | 184 { |
129 vect_verbosity_level = REPORT_DETAILS; | 185 enum built_in_function bcode |
130 else if (dump_file && (dump_flags & TDF_STATS)) | 186 = (ifn == IFN_GOMP_SIMD_ORDERED_START |
131 vect_verbosity_level = REPORT_UNVECTORIZED_LOCATIONS; | 187 ? BUILT_IN_GOMP_ORDERED_START |
188 : BUILT_IN_GOMP_ORDERED_END); | |
189 gimple *g | |
190 = gimple_build_call (builtin_decl_explicit (bcode), 0); | |
191 tree vdef = gimple_vdef (stmt); | |
192 gimple_set_vdef (g, vdef); | |
193 SSA_NAME_DEF_STMT (vdef) = g; | |
194 gimple_set_vuse (g, gimple_vuse (stmt)); | |
195 gsi_replace (&i, g, true); | |
196 continue; | |
197 } | |
198 gsi_remove (&i, true); | |
199 unlink_stmt_vdef (stmt); | |
200 continue; | |
201 default: | |
202 gsi_next (&i); | |
203 continue; | |
204 } | |
205 tree arg = gimple_call_arg (stmt, 0); | |
206 gcc_assert (arg != NULL_TREE); | |
207 gcc_assert (TREE_CODE (arg) == SSA_NAME); | |
208 simduid_to_vf *p = NULL, data; | |
209 data.simduid = DECL_UID (SSA_NAME_VAR (arg)); | |
210 /* Need to nullify loop safelen field since it's value is not | |
211 valid after transformation. */ | |
212 if (bb->loop_father && bb->loop_father->safelen > 0) | |
213 bb->loop_father->safelen = 0; | |
214 if (htab) | |
215 { | |
216 p = htab->find (&data); | |
217 if (p) | |
218 vf = p->vf; | |
219 } | |
220 switch (ifn) | |
221 { | |
222 case IFN_GOMP_SIMD_VF: | |
223 t = build_int_cst (unsigned_type_node, vf); | |
224 break; | |
225 case IFN_GOMP_SIMD_LANE: | |
226 t = build_int_cst (unsigned_type_node, 0); | |
227 break; | |
228 case IFN_GOMP_SIMD_LAST_LANE: | |
229 t = gimple_call_arg (stmt, 1); | |
230 break; | |
231 default: | |
232 gcc_unreachable (); | |
233 } | |
234 tree lhs = gimple_call_lhs (stmt); | |
235 if (lhs) | |
236 replace_uses_by (lhs, t); | |
237 release_defs (stmt); | |
238 gsi_remove (&i, true); | |
239 } | |
240 } | |
241 } | |
242 | |
243 /* Helper structure for note_simd_array_uses. */ | |
244 | |
245 struct note_simd_array_uses_struct | |
246 { | |
247 hash_table<simd_array_to_simduid> **htab; | |
248 unsigned int simduid; | |
249 }; | |
250 | |
251 /* Callback for note_simd_array_uses, called through walk_gimple_op. */ | |
252 | |
253 static tree | |
254 note_simd_array_uses_cb (tree *tp, int *walk_subtrees, void *data) | |
255 { | |
256 struct walk_stmt_info *wi = (struct walk_stmt_info *) data; | |
257 struct note_simd_array_uses_struct *ns | |
258 = (struct note_simd_array_uses_struct *) wi->info; | |
259 | |
260 if (TYPE_P (*tp)) | |
261 *walk_subtrees = 0; | |
262 else if (VAR_P (*tp) | |
263 && lookup_attribute ("omp simd array", DECL_ATTRIBUTES (*tp)) | |
264 && DECL_CONTEXT (*tp) == current_function_decl) | |
265 { | |
266 simd_array_to_simduid data; | |
267 if (!*ns->htab) | |
268 *ns->htab = new hash_table<simd_array_to_simduid> (15); | |
269 data.decl = *tp; | |
270 data.simduid = ns->simduid; | |
271 simd_array_to_simduid **slot = (*ns->htab)->find_slot (&data, INSERT); | |
272 if (*slot == NULL) | |
273 { | |
274 simd_array_to_simduid *p = XNEW (simd_array_to_simduid); | |
275 *p = data; | |
276 *slot = p; | |
277 } | |
278 else if ((*slot)->simduid != ns->simduid) | |
279 (*slot)->simduid = -1U; | |
280 *walk_subtrees = 0; | |
281 } | |
282 return NULL_TREE; | |
283 } | |
284 | |
285 /* Find "omp simd array" temporaries and map them to corresponding | |
286 simduid. */ | |
287 | |
288 static void | |
289 note_simd_array_uses (hash_table<simd_array_to_simduid> **htab) | |
290 { | |
291 basic_block bb; | |
292 gimple_stmt_iterator gsi; | |
293 struct walk_stmt_info wi; | |
294 struct note_simd_array_uses_struct ns; | |
295 | |
296 memset (&wi, 0, sizeof (wi)); | |
297 wi.info = &ns; | |
298 ns.htab = htab; | |
299 | |
300 FOR_EACH_BB_FN (bb, cfun) | |
301 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
302 { | |
303 gimple *stmt = gsi_stmt (gsi); | |
304 if (!is_gimple_call (stmt) || !gimple_call_internal_p (stmt)) | |
305 continue; | |
306 switch (gimple_call_internal_fn (stmt)) | |
307 { | |
308 case IFN_GOMP_SIMD_LANE: | |
309 case IFN_GOMP_SIMD_VF: | |
310 case IFN_GOMP_SIMD_LAST_LANE: | |
311 break; | |
312 default: | |
313 continue; | |
314 } | |
315 tree lhs = gimple_call_lhs (stmt); | |
316 if (lhs == NULL_TREE) | |
317 continue; | |
318 imm_use_iterator use_iter; | |
319 gimple *use_stmt; | |
320 ns.simduid = DECL_UID (SSA_NAME_VAR (gimple_call_arg (stmt, 0))); | |
321 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, lhs) | |
322 if (!is_gimple_debug (use_stmt)) | |
323 walk_gimple_op (use_stmt, note_simd_array_uses_cb, &wi); | |
324 } | |
325 } | |
326 | |
327 /* Shrink arrays with "omp simd array" attribute to the corresponding | |
328 vectorization factor. */ | |
329 | |
330 static void | |
331 shrink_simd_arrays | |
332 (hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab, | |
333 hash_table<simduid_to_vf> *simduid_to_vf_htab) | |
334 { | |
335 for (hash_table<simd_array_to_simduid>::iterator iter | |
336 = simd_array_to_simduid_htab->begin (); | |
337 iter != simd_array_to_simduid_htab->end (); ++iter) | |
338 if ((*iter)->simduid != -1U) | |
339 { | |
340 tree decl = (*iter)->decl; | |
341 int vf = 1; | |
342 if (simduid_to_vf_htab) | |
343 { | |
344 simduid_to_vf *p = NULL, data; | |
345 data.simduid = (*iter)->simduid; | |
346 p = simduid_to_vf_htab->find (&data); | |
347 if (p) | |
348 vf = p->vf; | |
349 } | |
350 tree atype | |
351 = build_array_type_nelts (TREE_TYPE (TREE_TYPE (decl)), vf); | |
352 TREE_TYPE (decl) = atype; | |
353 relayout_decl (decl); | |
354 } | |
355 | |
356 delete simd_array_to_simduid_htab; | |
357 } | |
358 | |
359 /* Initialize the vec_info with kind KIND_IN and target cost data | |
360 TARGET_COST_DATA_IN. */ | |
361 | |
362 vec_info::vec_info (vec_info::vec_kind kind_in, void *target_cost_data_in) | |
363 : kind (kind_in), | |
364 datarefs (vNULL), | |
365 ddrs (vNULL), | |
366 target_cost_data (target_cost_data_in) | |
367 { | |
368 } | |
369 | |
370 vec_info::~vec_info () | |
371 { | |
372 slp_instance instance; | |
373 struct data_reference *dr; | |
374 unsigned int i; | |
375 | |
376 FOR_EACH_VEC_ELT (datarefs, i, dr) | |
377 if (dr->aux) | |
378 { | |
379 free (dr->aux); | |
380 dr->aux = NULL; | |
381 } | |
382 | |
383 FOR_EACH_VEC_ELT (slp_instances, i, instance) | |
384 vect_free_slp_instance (instance); | |
385 | |
386 free_data_refs (datarefs); | |
387 free_dependence_relations (ddrs); | |
388 destroy_cost_data (target_cost_data); | |
389 } | |
390 | |
391 /* A helper function to free scev and LOOP niter information, as well as | |
392 clear loop constraint LOOP_C_FINITE. */ | |
393 | |
394 void | |
395 vect_free_loop_info_assumptions (struct loop *loop) | |
396 { | |
397 scev_reset_htab (); | |
398 /* We need to explicitly reset upper bound information since they are | |
399 used even after free_numbers_of_iterations_estimates. */ | |
400 loop->any_upper_bound = false; | |
401 loop->any_likely_upper_bound = false; | |
402 free_numbers_of_iterations_estimates (loop); | |
403 loop_constraint_clear (loop, LOOP_C_FINITE); | |
404 } | |
405 | |
406 /* Return whether STMT is inside the region we try to vectorize. */ | |
407 | |
408 bool | |
409 vect_stmt_in_region_p (vec_info *vinfo, gimple *stmt) | |
410 { | |
411 if (!gimple_bb (stmt)) | |
412 return false; | |
413 | |
414 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo)) | |
415 { | |
416 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
417 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt))) | |
418 return false; | |
419 } | |
132 else | 420 else |
133 vect_verbosity_level = REPORT_NONE; | 421 { |
134 | 422 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo); |
135 gcc_assert (dump_file || vect_verbosity_level == REPORT_NONE); | 423 if (gimple_bb (stmt) != BB_VINFO_BB (bb_vinfo) |
136 } | 424 || gimple_uid (stmt) == -1U |
137 | 425 || gimple_code (stmt) == GIMPLE_PHI) |
138 | 426 return false; |
139 /* Function debug_loop_details. | 427 } |
140 | 428 |
141 For vectorization debug dumps. */ | 429 return true; |
142 | 430 } |
143 bool | 431 |
144 vect_print_dump_info (enum vect_verbosity_levels vl) | 432 |
145 { | 433 /* If LOOP has been versioned during ifcvt, return the internal call |
146 if (vl > vect_verbosity_level) | 434 guarding it. */ |
147 return false; | 435 |
148 | 436 static gimple * |
149 if (!current_function_decl || !vect_dump) | 437 vect_loop_vectorized_call (struct loop *loop) |
150 return false; | 438 { |
151 | 439 basic_block bb = loop_preheader_edge (loop)->src; |
152 if (vect_location == UNKNOWN_LOC) | 440 gimple *g; |
153 fprintf (vect_dump, "\n%s:%d: note: ", | 441 do |
154 DECL_SOURCE_FILE (current_function_decl), | 442 { |
155 DECL_SOURCE_LINE (current_function_decl)); | 443 g = last_stmt (bb); |
444 if (g) | |
445 break; | |
446 if (!single_pred_p (bb)) | |
447 break; | |
448 bb = single_pred (bb); | |
449 } | |
450 while (1); | |
451 if (g && gimple_code (g) == GIMPLE_COND) | |
452 { | |
453 gimple_stmt_iterator gsi = gsi_for_stmt (g); | |
454 gsi_prev (&gsi); | |
455 if (!gsi_end_p (gsi)) | |
456 { | |
457 g = gsi_stmt (gsi); | |
458 if (gimple_call_internal_p (g, IFN_LOOP_VECTORIZED) | |
459 && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->num | |
460 || tree_to_shwi (gimple_call_arg (g, 1)) == loop->num)) | |
461 return g; | |
462 } | |
463 } | |
464 return NULL; | |
465 } | |
466 | |
467 /* Fold loop internal call G like IFN_LOOP_VECTORIZED/IFN_LOOP_DIST_ALIAS | |
468 to VALUE and update any immediate uses of it's LHS. */ | |
469 | |
470 static void | |
471 fold_loop_internal_call (gimple *g, tree value) | |
472 { | |
473 tree lhs = gimple_call_lhs (g); | |
474 use_operand_p use_p; | |
475 imm_use_iterator iter; | |
476 gimple *use_stmt; | |
477 gimple_stmt_iterator gsi = gsi_for_stmt (g); | |
478 | |
479 update_call_from_tree (&gsi, value); | |
480 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) | |
481 { | |
482 FOR_EACH_IMM_USE_ON_STMT (use_p, iter) | |
483 SET_USE (use_p, value); | |
484 update_stmt (use_stmt); | |
485 } | |
486 } | |
487 | |
488 /* If LOOP has been versioned during loop distribution, return the gurading | |
489 internal call. */ | |
490 | |
491 static gimple * | |
492 vect_loop_dist_alias_call (struct loop *loop) | |
493 { | |
494 basic_block bb; | |
495 basic_block entry; | |
496 struct loop *outer, *orig; | |
497 gimple_stmt_iterator gsi; | |
498 gimple *g; | |
499 | |
500 if (loop->orig_loop_num == 0) | |
501 return NULL; | |
502 | |
503 orig = get_loop (cfun, loop->orig_loop_num); | |
504 if (orig == NULL) | |
505 { | |
506 /* The original loop is somehow destroyed. Clear the information. */ | |
507 loop->orig_loop_num = 0; | |
508 return NULL; | |
509 } | |
510 | |
511 if (loop != orig) | |
512 bb = nearest_common_dominator (CDI_DOMINATORS, loop->header, orig->header); | |
156 else | 513 else |
157 fprintf (vect_dump, "\n%s:%d: note: ", | 514 bb = loop_preheader_edge (loop)->src; |
158 LOC_FILE (vect_location), LOC_LINE (vect_location)); | 515 |
159 | 516 outer = bb->loop_father; |
160 return true; | 517 entry = ENTRY_BLOCK_PTR_FOR_FN (cfun); |
161 } | 518 |
162 | 519 /* Look upward in dominance tree. */ |
520 for (; bb != entry && flow_bb_inside_loop_p (outer, bb); | |
521 bb = get_immediate_dominator (CDI_DOMINATORS, bb)) | |
522 { | |
523 g = last_stmt (bb); | |
524 if (g == NULL || gimple_code (g) != GIMPLE_COND) | |
525 continue; | |
526 | |
527 gsi = gsi_for_stmt (g); | |
528 gsi_prev (&gsi); | |
529 if (gsi_end_p (gsi)) | |
530 continue; | |
531 | |
532 g = gsi_stmt (gsi); | |
533 /* The guarding internal function call must have the same distribution | |
534 alias id. */ | |
535 if (gimple_call_internal_p (g, IFN_LOOP_DIST_ALIAS) | |
536 && (tree_to_shwi (gimple_call_arg (g, 0)) == loop->orig_loop_num)) | |
537 return g; | |
538 } | |
539 return NULL; | |
540 } | |
541 | |
542 /* Set the uids of all the statements in basic blocks inside loop | |
543 represented by LOOP_VINFO. LOOP_VECTORIZED_CALL is the internal | |
544 call guarding the loop which has been if converted. */ | |
545 static void | |
546 set_uid_loop_bbs (loop_vec_info loop_vinfo, gimple *loop_vectorized_call) | |
547 { | |
548 tree arg = gimple_call_arg (loop_vectorized_call, 1); | |
549 basic_block *bbs; | |
550 unsigned int i; | |
551 struct loop *scalar_loop = get_loop (cfun, tree_to_shwi (arg)); | |
552 | |
553 LOOP_VINFO_SCALAR_LOOP (loop_vinfo) = scalar_loop; | |
554 gcc_checking_assert (vect_loop_vectorized_call (scalar_loop) | |
555 == loop_vectorized_call); | |
556 /* If we are going to vectorize outer loop, prevent vectorization | |
557 of the inner loop in the scalar loop - either the scalar loop is | |
558 thrown away, so it is a wasted work, or is used only for | |
559 a few iterations. */ | |
560 if (scalar_loop->inner) | |
561 { | |
562 gimple *g = vect_loop_vectorized_call (scalar_loop->inner); | |
563 if (g) | |
564 { | |
565 arg = gimple_call_arg (g, 0); | |
566 get_loop (cfun, tree_to_shwi (arg))->dont_vectorize = true; | |
567 fold_loop_internal_call (g, boolean_false_node); | |
568 } | |
569 } | |
570 bbs = get_loop_body (scalar_loop); | |
571 for (i = 0; i < scalar_loop->num_nodes; i++) | |
572 { | |
573 basic_block bb = bbs[i]; | |
574 gimple_stmt_iterator gsi; | |
575 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
576 { | |
577 gimple *phi = gsi_stmt (gsi); | |
578 gimple_set_uid (phi, 0); | |
579 } | |
580 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
581 { | |
582 gimple *stmt = gsi_stmt (gsi); | |
583 gimple_set_uid (stmt, 0); | |
584 } | |
585 } | |
586 free (bbs); | |
587 } | |
163 | 588 |
164 /* Function vectorize_loops. | 589 /* Function vectorize_loops. |
165 | 590 |
166 Entry point to loop vectorization phase. */ | 591 Entry point to loop vectorization phase. */ |
167 | 592 |
169 vectorize_loops (void) | 594 vectorize_loops (void) |
170 { | 595 { |
171 unsigned int i; | 596 unsigned int i; |
172 unsigned int num_vectorized_loops = 0; | 597 unsigned int num_vectorized_loops = 0; |
173 unsigned int vect_loops_num; | 598 unsigned int vect_loops_num; |
174 loop_iterator li; | |
175 struct loop *loop; | 599 struct loop *loop; |
176 | 600 hash_table<simduid_to_vf> *simduid_to_vf_htab = NULL; |
177 vect_loops_num = number_of_loops (); | 601 hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL; |
602 bool any_ifcvt_loops = false; | |
603 unsigned ret = 0; | |
604 struct loop *new_loop; | |
605 | |
606 vect_loops_num = number_of_loops (cfun); | |
178 | 607 |
179 /* Bail out if there are no loops. */ | 608 /* Bail out if there are no loops. */ |
180 if (vect_loops_num <= 1) | 609 if (vect_loops_num <= 1) |
181 return 0; | 610 return 0; |
182 | 611 |
183 /* Fix the verbosity level if not defined explicitly by the user. */ | 612 if (cfun->has_simduid_loops) |
184 vect_set_dump_settings (false); | 613 note_simd_array_uses (&simd_array_to_simduid_htab); |
185 | 614 |
186 init_stmt_vec_info_vec (); | 615 init_stmt_vec_info_vec (); |
187 | 616 |
188 /* ----------- Analyze loops. ----------- */ | 617 /* ----------- Analyze loops. ----------- */ |
189 | 618 |
190 /* If some loop was duplicated, it gets bigger number | 619 /* If some loop was duplicated, it gets bigger number |
191 than all previously defined loops. This fact allows us to run | 620 than all previously defined loops. This fact allows us to run |
192 only over initial loops skipping newly generated ones. */ | 621 only over initial loops skipping newly generated ones. */ |
193 FOR_EACH_LOOP (li, loop, 0) | 622 FOR_EACH_LOOP (loop, 0) |
194 if (optimize_loop_nest_for_speed_p (loop)) | 623 if (loop->dont_vectorize) |
195 { | 624 { |
196 loop_vec_info loop_vinfo; | 625 any_ifcvt_loops = true; |
197 | 626 /* If-conversion sometimes versions both the outer loop |
627 (for the case when outer loop vectorization might be | |
628 desirable) as well as the inner loop in the scalar version | |
629 of the loop. So we have: | |
630 if (LOOP_VECTORIZED (1, 3)) | |
631 { | |
632 loop1 | |
633 loop2 | |
634 } | |
635 else | |
636 loop3 (copy of loop1) | |
637 if (LOOP_VECTORIZED (4, 5)) | |
638 loop4 (copy of loop2) | |
639 else | |
640 loop5 (copy of loop4) | |
641 If FOR_EACH_LOOP gives us loop3 first (which has | |
642 dont_vectorize set), make sure to process loop1 before loop4; | |
643 so that we can prevent vectorization of loop4 if loop1 | |
644 is successfully vectorized. */ | |
645 if (loop->inner) | |
646 { | |
647 gimple *loop_vectorized_call | |
648 = vect_loop_vectorized_call (loop); | |
649 if (loop_vectorized_call | |
650 && vect_loop_vectorized_call (loop->inner)) | |
651 { | |
652 tree arg = gimple_call_arg (loop_vectorized_call, 0); | |
653 struct loop *vector_loop | |
654 = get_loop (cfun, tree_to_shwi (arg)); | |
655 if (vector_loop && vector_loop != loop) | |
656 { | |
657 loop = vector_loop; | |
658 /* Make sure we don't vectorize it twice. */ | |
659 loop->dont_vectorize = true; | |
660 goto try_vectorize; | |
661 } | |
662 } | |
663 } | |
664 } | |
665 else | |
666 { | |
667 loop_vec_info loop_vinfo, orig_loop_vinfo; | |
668 gimple *loop_vectorized_call, *loop_dist_alias_call; | |
669 try_vectorize: | |
670 if (!((flag_tree_loop_vectorize | |
671 && optimize_loop_nest_for_speed_p (loop)) | |
672 || loop->force_vectorize)) | |
673 continue; | |
674 orig_loop_vinfo = NULL; | |
675 loop_vectorized_call = vect_loop_vectorized_call (loop); | |
676 loop_dist_alias_call = vect_loop_dist_alias_call (loop); | |
677 vectorize_epilogue: | |
198 vect_location = find_loop_location (loop); | 678 vect_location = find_loop_location (loop); |
199 loop_vinfo = vect_analyze_loop (loop); | 679 if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION |
680 && dump_enabled_p ()) | |
681 dump_printf (MSG_NOTE, "\nAnalyzing loop at %s:%d\n", | |
682 LOCATION_FILE (vect_location), | |
683 LOCATION_LINE (vect_location)); | |
684 | |
685 loop_vinfo = vect_analyze_loop (loop, orig_loop_vinfo); | |
200 loop->aux = loop_vinfo; | 686 loop->aux = loop_vinfo; |
201 | 687 |
202 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo)) | 688 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo)) |
203 continue; | 689 { |
204 | 690 /* Free existing information if loop is analyzed with some |
205 vect_transform_loop (loop_vinfo); | 691 assumptions. */ |
692 if (loop_constraint_set_p (loop, LOOP_C_FINITE)) | |
693 vect_free_loop_info_assumptions (loop); | |
694 | |
695 /* If we applied if-conversion then try to vectorize the | |
696 BB of innermost loops. | |
697 ??? Ideally BB vectorization would learn to vectorize | |
698 control flow by applying if-conversion on-the-fly, the | |
699 following retains the if-converted loop body even when | |
700 only non-if-converted parts took part in BB vectorization. */ | |
701 if (flag_tree_slp_vectorize != 0 | |
702 && loop_vectorized_call | |
703 && ! loop->inner) | |
704 { | |
705 basic_block bb = loop->header; | |
706 bool has_mask_load_store = false; | |
707 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); | |
708 !gsi_end_p (gsi); gsi_next (&gsi)) | |
709 { | |
710 gimple *stmt = gsi_stmt (gsi); | |
711 if (is_gimple_call (stmt) | |
712 && gimple_call_internal_p (stmt) | |
713 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD | |
714 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE)) | |
715 { | |
716 has_mask_load_store = true; | |
717 break; | |
718 } | |
719 gimple_set_uid (stmt, -1); | |
720 gimple_set_visited (stmt, false); | |
721 } | |
722 if (! has_mask_load_store && vect_slp_bb (bb)) | |
723 { | |
724 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, | |
725 "basic block vectorized\n"); | |
726 fold_loop_internal_call (loop_vectorized_call, | |
727 boolean_true_node); | |
728 loop_vectorized_call = NULL; | |
729 ret |= TODO_cleanup_cfg; | |
730 } | |
731 } | |
732 /* If outer loop vectorization fails for LOOP_VECTORIZED guarded | |
733 loop, don't vectorize its inner loop; we'll attempt to | |
734 vectorize LOOP_VECTORIZED guarded inner loop of the scalar | |
735 loop version. */ | |
736 if (loop_vectorized_call && loop->inner) | |
737 loop->inner->dont_vectorize = true; | |
738 continue; | |
739 } | |
740 | |
741 if (!dbg_cnt (vect_loop)) | |
742 { | |
743 /* We may miss some if-converted loops due to | |
744 debug counter. Set any_ifcvt_loops to visit | |
745 them at finalization. */ | |
746 any_ifcvt_loops = true; | |
747 /* Free existing information if loop is analyzed with some | |
748 assumptions. */ | |
749 if (loop_constraint_set_p (loop, LOOP_C_FINITE)) | |
750 vect_free_loop_info_assumptions (loop); | |
751 | |
752 break; | |
753 } | |
754 | |
755 if (loop_vectorized_call) | |
756 set_uid_loop_bbs (loop_vinfo, loop_vectorized_call); | |
757 if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION | |
758 && dump_enabled_p ()) | |
759 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, | |
760 "loop vectorized\n"); | |
761 new_loop = vect_transform_loop (loop_vinfo); | |
206 num_vectorized_loops++; | 762 num_vectorized_loops++; |
763 /* Now that the loop has been vectorized, allow it to be unrolled | |
764 etc. */ | |
765 loop->force_vectorize = false; | |
766 | |
767 if (loop->simduid) | |
768 { | |
769 simduid_to_vf *simduid_to_vf_data = XNEW (simduid_to_vf); | |
770 if (!simduid_to_vf_htab) | |
771 simduid_to_vf_htab = new hash_table<simduid_to_vf> (15); | |
772 simduid_to_vf_data->simduid = DECL_UID (loop->simduid); | |
773 simduid_to_vf_data->vf = loop_vinfo->vectorization_factor; | |
774 *simduid_to_vf_htab->find_slot (simduid_to_vf_data, INSERT) | |
775 = simduid_to_vf_data; | |
776 } | |
777 | |
778 if (loop_vectorized_call) | |
779 { | |
780 fold_loop_internal_call (loop_vectorized_call, boolean_true_node); | |
781 loop_vectorized_call = NULL; | |
782 ret |= TODO_cleanup_cfg; | |
783 } | |
784 if (loop_dist_alias_call) | |
785 { | |
786 tree value = gimple_call_arg (loop_dist_alias_call, 1); | |
787 fold_loop_internal_call (loop_dist_alias_call, value); | |
788 loop_dist_alias_call = NULL; | |
789 ret |= TODO_cleanup_cfg; | |
790 } | |
791 | |
792 if (new_loop) | |
793 { | |
794 /* Epilogue of vectorized loop must be vectorized too. */ | |
795 vect_loops_num = number_of_loops (cfun); | |
796 loop = new_loop; | |
797 orig_loop_vinfo = loop_vinfo; /* To pass vect_analyze_loop. */ | |
798 goto vectorize_epilogue; | |
799 } | |
207 } | 800 } |
208 | 801 |
209 vect_location = UNKNOWN_LOC; | 802 vect_location = UNKNOWN_LOCATION; |
210 | 803 |
211 statistics_counter_event (cfun, "Vectorized loops", num_vectorized_loops); | 804 statistics_counter_event (cfun, "Vectorized loops", num_vectorized_loops); |
212 if (vect_print_dump_info (REPORT_UNVECTORIZED_LOCATIONS) | 805 if (dump_enabled_p () |
213 || (num_vectorized_loops > 0 | 806 || (num_vectorized_loops > 0 && dump_enabled_p ())) |
214 && vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS))) | 807 dump_printf_loc (MSG_NOTE, vect_location, |
215 fprintf (vect_dump, "vectorized %u loops in function.\n", | 808 "vectorized %u loops in function.\n", |
216 num_vectorized_loops); | 809 num_vectorized_loops); |
217 | 810 |
218 /* ----------- Finalize. ----------- */ | 811 /* ----------- Finalize. ----------- */ |
219 | 812 |
220 mark_sym_for_renaming (gimple_vop (cfun)); | 813 if (any_ifcvt_loops) |
814 for (i = 1; i < vect_loops_num; i++) | |
815 { | |
816 loop = get_loop (cfun, i); | |
817 if (loop && loop->dont_vectorize) | |
818 { | |
819 gimple *g = vect_loop_vectorized_call (loop); | |
820 if (g) | |
821 { | |
822 fold_loop_internal_call (g, boolean_false_node); | |
823 ret |= TODO_cleanup_cfg; | |
824 g = NULL; | |
825 } | |
826 else | |
827 g = vect_loop_dist_alias_call (loop); | |
828 | |
829 if (g) | |
830 { | |
831 fold_loop_internal_call (g, boolean_false_node); | |
832 ret |= TODO_cleanup_cfg; | |
833 } | |
834 } | |
835 } | |
221 | 836 |
222 for (i = 1; i < vect_loops_num; i++) | 837 for (i = 1; i < vect_loops_num; i++) |
223 { | 838 { |
224 loop_vec_info loop_vinfo; | 839 loop_vec_info loop_vinfo; |
225 | 840 bool has_mask_store; |
226 loop = get_loop (i); | 841 |
842 loop = get_loop (cfun, i); | |
227 if (!loop) | 843 if (!loop) |
228 continue; | 844 continue; |
229 loop_vinfo = (loop_vec_info) loop->aux; | 845 loop_vinfo = (loop_vec_info) loop->aux; |
230 destroy_loop_vec_info (loop_vinfo, true); | 846 has_mask_store = false; |
847 if (loop_vinfo) | |
848 has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo); | |
849 delete loop_vinfo; | |
850 if (has_mask_store) | |
851 optimize_mask_stores (loop); | |
231 loop->aux = NULL; | 852 loop->aux = NULL; |
232 } | 853 } |
233 | 854 |
234 free_stmt_vec_info_vec (); | 855 free_stmt_vec_info_vec (); |
235 | 856 |
236 return num_vectorized_loops > 0 ? TODO_cleanup_cfg : 0; | 857 /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins. */ |
858 if (cfun->has_simduid_loops) | |
859 adjust_simduid_builtins (simduid_to_vf_htab); | |
860 | |
861 /* Shrink any "omp array simd" temporary arrays to the | |
862 actual vectorization factors. */ | |
863 if (simd_array_to_simduid_htab) | |
864 shrink_simd_arrays (simd_array_to_simduid_htab, simduid_to_vf_htab); | |
865 delete simduid_to_vf_htab; | |
866 cfun->has_simduid_loops = false; | |
867 | |
868 if (num_vectorized_loops > 0) | |
869 { | |
870 /* If we vectorized any loop only virtual SSA form needs to be updated. | |
871 ??? Also while we try hard to update loop-closed SSA form we fail | |
872 to properly do this in some corner-cases (see PR56286). */ | |
873 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_only_virtuals); | |
874 return TODO_cleanup_cfg; | |
875 } | |
876 | |
877 return ret; | |
878 } | |
879 | |
880 | |
881 /* Entry point to the simduid cleanup pass. */ | |
882 | |
883 namespace { | |
884 | |
885 const pass_data pass_data_simduid_cleanup = | |
886 { | |
887 GIMPLE_PASS, /* type */ | |
888 "simduid", /* name */ | |
889 OPTGROUP_NONE, /* optinfo_flags */ | |
890 TV_NONE, /* tv_id */ | |
891 ( PROP_ssa | PROP_cfg ), /* properties_required */ | |
892 0, /* properties_provided */ | |
893 0, /* properties_destroyed */ | |
894 0, /* todo_flags_start */ | |
895 0, /* todo_flags_finish */ | |
896 }; | |
897 | |
898 class pass_simduid_cleanup : public gimple_opt_pass | |
899 { | |
900 public: | |
901 pass_simduid_cleanup (gcc::context *ctxt) | |
902 : gimple_opt_pass (pass_data_simduid_cleanup, ctxt) | |
903 {} | |
904 | |
905 /* opt_pass methods: */ | |
906 opt_pass * clone () { return new pass_simduid_cleanup (m_ctxt); } | |
907 virtual bool gate (function *fun) { return fun->has_simduid_loops; } | |
908 virtual unsigned int execute (function *); | |
909 | |
910 }; // class pass_simduid_cleanup | |
911 | |
912 unsigned int | |
913 pass_simduid_cleanup::execute (function *fun) | |
914 { | |
915 hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL; | |
916 | |
917 note_simd_array_uses (&simd_array_to_simduid_htab); | |
918 | |
919 /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins. */ | |
920 adjust_simduid_builtins (NULL); | |
921 | |
922 /* Shrink any "omp array simd" temporary arrays to the | |
923 actual vectorization factors. */ | |
924 if (simd_array_to_simduid_htab) | |
925 shrink_simd_arrays (simd_array_to_simduid_htab, NULL); | |
926 fun->has_simduid_loops = false; | |
927 return 0; | |
928 } | |
929 | |
930 } // anon namespace | |
931 | |
932 gimple_opt_pass * | |
933 make_pass_simduid_cleanup (gcc::context *ctxt) | |
934 { | |
935 return new pass_simduid_cleanup (ctxt); | |
237 } | 936 } |
238 | 937 |
239 | 938 |
240 /* Entry point to basic block SLP phase. */ | 939 /* Entry point to basic block SLP phase. */ |
241 | 940 |
242 static unsigned int | 941 namespace { |
243 execute_vect_slp (void) | 942 |
943 const pass_data pass_data_slp_vectorize = | |
944 { | |
945 GIMPLE_PASS, /* type */ | |
946 "slp", /* name */ | |
947 OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */ | |
948 TV_TREE_SLP_VECTORIZATION, /* tv_id */ | |
949 ( PROP_ssa | PROP_cfg ), /* properties_required */ | |
950 0, /* properties_provided */ | |
951 0, /* properties_destroyed */ | |
952 0, /* todo_flags_start */ | |
953 TODO_update_ssa, /* todo_flags_finish */ | |
954 }; | |
955 | |
956 class pass_slp_vectorize : public gimple_opt_pass | |
957 { | |
958 public: | |
959 pass_slp_vectorize (gcc::context *ctxt) | |
960 : gimple_opt_pass (pass_data_slp_vectorize, ctxt) | |
961 {} | |
962 | |
963 /* opt_pass methods: */ | |
964 opt_pass * clone () { return new pass_slp_vectorize (m_ctxt); } | |
965 virtual bool gate (function *) { return flag_tree_slp_vectorize != 0; } | |
966 virtual unsigned int execute (function *); | |
967 | |
968 }; // class pass_slp_vectorize | |
969 | |
970 unsigned int | |
971 pass_slp_vectorize::execute (function *fun) | |
244 { | 972 { |
245 basic_block bb; | 973 basic_block bb; |
246 | 974 |
247 /* Fix the verbosity level if not defined explicitly by the user. */ | 975 bool in_loop_pipeline = scev_initialized_p (); |
248 vect_set_dump_settings (true); | 976 if (!in_loop_pipeline) |
977 { | |
978 loop_optimizer_init (LOOPS_NORMAL); | |
979 scev_initialize (); | |
980 } | |
981 | |
982 /* Mark all stmts as not belonging to the current region and unvisited. */ | |
983 FOR_EACH_BB_FN (bb, fun) | |
984 { | |
985 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); | |
986 gsi_next (&gsi)) | |
987 { | |
988 gimple *stmt = gsi_stmt (gsi); | |
989 gimple_set_uid (stmt, -1); | |
990 gimple_set_visited (stmt, false); | |
991 } | |
992 } | |
249 | 993 |
250 init_stmt_vec_info_vec (); | 994 init_stmt_vec_info_vec (); |
251 | 995 |
252 FOR_EACH_BB (bb) | 996 FOR_EACH_BB_FN (bb, fun) |
253 { | 997 { |
254 vect_location = find_bb_location (bb); | 998 if (vect_slp_bb (bb)) |
255 | 999 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, |
256 if (vect_slp_analyze_bb (bb)) | 1000 "basic block vectorized\n"); |
257 { | |
258 vect_slp_transform_bb (bb); | |
259 | |
260 if (vect_print_dump_info (REPORT_VECTORIZED_LOCATIONS)) | |
261 fprintf (vect_dump, "basic block vectorized using SLP\n"); | |
262 } | |
263 } | 1001 } |
264 | 1002 |
265 free_stmt_vec_info_vec (); | 1003 free_stmt_vec_info_vec (); |
1004 | |
1005 if (!in_loop_pipeline) | |
1006 { | |
1007 scev_finalize (); | |
1008 loop_optimizer_finalize (); | |
1009 } | |
1010 | |
266 return 0; | 1011 return 0; |
267 } | 1012 } |
268 | 1013 |
269 static bool | 1014 } // anon namespace |
270 gate_vect_slp (void) | 1015 |
271 { | 1016 gimple_opt_pass * |
272 /* Apply SLP either if the vectorizer is on and the user didn't specify | 1017 make_pass_slp_vectorize (gcc::context *ctxt) |
273 whether to run SLP or not, or if the SLP flag was set by the user. */ | 1018 { |
274 return ((flag_tree_vectorize != 0 && flag_tree_slp_vectorize != 0) | 1019 return new pass_slp_vectorize (ctxt); |
275 || flag_tree_slp_vectorize == 1); | 1020 } |
276 } | |
277 | |
278 struct gimple_opt_pass pass_slp_vectorize = | |
279 { | |
280 { | |
281 GIMPLE_PASS, | |
282 "slp", /* name */ | |
283 gate_vect_slp, /* gate */ | |
284 execute_vect_slp, /* execute */ | |
285 NULL, /* sub */ | |
286 NULL, /* next */ | |
287 0, /* static_pass_number */ | |
288 TV_TREE_SLP_VECTORIZATION, /* tv_id */ | |
289 PROP_ssa | PROP_cfg, /* properties_required */ | |
290 0, /* properties_provided */ | |
291 0, /* properties_destroyed */ | |
292 0, /* todo_flags_start */ | |
293 TODO_ggc_collect | |
294 | TODO_verify_ssa | |
295 | TODO_dump_func | |
296 | TODO_update_ssa | |
297 | TODO_verify_stmts /* todo_flags_finish */ | |
298 } | |
299 }; | |
300 | 1021 |
301 | 1022 |
302 /* Increase alignment of global arrays to improve vectorization potential. | 1023 /* Increase alignment of global arrays to improve vectorization potential. |
303 TODO: | 1024 TODO: |
304 - Consider also structs that have an array field. | 1025 - Consider also structs that have an array field. |
305 - Use ipa analysis to prune arrays that can't be vectorized? | 1026 - Use ipa analysis to prune arrays that can't be vectorized? |
306 This should involve global alignment analysis and in the future also | 1027 This should involve global alignment analysis and in the future also |
307 array padding. */ | 1028 array padding. */ |
308 | 1029 |
1030 static unsigned get_vec_alignment_for_type (tree); | |
1031 static hash_map<tree, unsigned> *type_align_map; | |
1032 | |
1033 /* Return alignment of array's vector type corresponding to scalar type. | |
1034 0 if no vector type exists. */ | |
1035 static unsigned | |
1036 get_vec_alignment_for_array_type (tree type) | |
1037 { | |
1038 gcc_assert (TREE_CODE (type) == ARRAY_TYPE); | |
1039 | |
1040 tree vectype = get_vectype_for_scalar_type (strip_array_types (type)); | |
1041 if (!vectype | |
1042 || !TYPE_SIZE (type) | |
1043 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST | |
1044 || tree_int_cst_lt (TYPE_SIZE (type), TYPE_SIZE (vectype))) | |
1045 return 0; | |
1046 | |
1047 return TYPE_ALIGN (vectype); | |
1048 } | |
1049 | |
1050 /* Return alignment of field having maximum alignment of vector type | |
1051 corresponding to it's scalar type. For now, we only consider fields whose | |
1052 offset is a multiple of it's vector alignment. | |
1053 0 if no suitable field is found. */ | |
1054 static unsigned | |
1055 get_vec_alignment_for_record_type (tree type) | |
1056 { | |
1057 gcc_assert (TREE_CODE (type) == RECORD_TYPE); | |
1058 | |
1059 unsigned max_align = 0, alignment; | |
1060 HOST_WIDE_INT offset; | |
1061 tree offset_tree; | |
1062 | |
1063 if (TYPE_PACKED (type)) | |
1064 return 0; | |
1065 | |
1066 unsigned *slot = type_align_map->get (type); | |
1067 if (slot) | |
1068 return *slot; | |
1069 | |
1070 for (tree field = first_field (type); | |
1071 field != NULL_TREE; | |
1072 field = DECL_CHAIN (field)) | |
1073 { | |
1074 /* Skip if not FIELD_DECL or if alignment is set by user. */ | |
1075 if (TREE_CODE (field) != FIELD_DECL | |
1076 || DECL_USER_ALIGN (field) | |
1077 || DECL_ARTIFICIAL (field)) | |
1078 continue; | |
1079 | |
1080 /* We don't need to process the type further if offset is variable, | |
1081 since the offsets of remaining members will also be variable. */ | |
1082 if (TREE_CODE (DECL_FIELD_OFFSET (field)) != INTEGER_CST | |
1083 || TREE_CODE (DECL_FIELD_BIT_OFFSET (field)) != INTEGER_CST) | |
1084 break; | |
1085 | |
1086 /* Similarly stop processing the type if offset_tree | |
1087 does not fit in unsigned HOST_WIDE_INT. */ | |
1088 offset_tree = bit_position (field); | |
1089 if (!tree_fits_uhwi_p (offset_tree)) | |
1090 break; | |
1091 | |
1092 offset = tree_to_uhwi (offset_tree); | |
1093 alignment = get_vec_alignment_for_type (TREE_TYPE (field)); | |
1094 | |
1095 /* Get maximum alignment of vectorized field/array among those members | |
1096 whose offset is multiple of the vector alignment. */ | |
1097 if (alignment | |
1098 && (offset % alignment == 0) | |
1099 && (alignment > max_align)) | |
1100 max_align = alignment; | |
1101 } | |
1102 | |
1103 type_align_map->put (type, max_align); | |
1104 return max_align; | |
1105 } | |
1106 | |
1107 /* Return alignment of vector type corresponding to decl's scalar type | |
1108 or 0 if it doesn't exist or the vector alignment is lesser than | |
1109 decl's alignment. */ | |
1110 static unsigned | |
1111 get_vec_alignment_for_type (tree type) | |
1112 { | |
1113 if (type == NULL_TREE) | |
1114 return 0; | |
1115 | |
1116 gcc_assert (TYPE_P (type)); | |
1117 | |
1118 static unsigned alignment = 0; | |
1119 switch (TREE_CODE (type)) | |
1120 { | |
1121 case ARRAY_TYPE: | |
1122 alignment = get_vec_alignment_for_array_type (type); | |
1123 break; | |
1124 case RECORD_TYPE: | |
1125 alignment = get_vec_alignment_for_record_type (type); | |
1126 break; | |
1127 default: | |
1128 alignment = 0; | |
1129 break; | |
1130 } | |
1131 | |
1132 return (alignment > TYPE_ALIGN (type)) ? alignment : 0; | |
1133 } | |
1134 | |
1135 /* Entry point to increase_alignment pass. */ | |
309 static unsigned int | 1136 static unsigned int |
310 increase_alignment (void) | 1137 increase_alignment (void) |
311 { | 1138 { |
312 struct varpool_node *vnode; | 1139 varpool_node *vnode; |
1140 | |
1141 vect_location = UNKNOWN_LOCATION; | |
1142 type_align_map = new hash_map<tree, unsigned>; | |
313 | 1143 |
314 /* Increase the alignment of all global arrays for vectorization. */ | 1144 /* Increase the alignment of all global arrays for vectorization. */ |
315 for (vnode = varpool_nodes_queue; | 1145 FOR_EACH_DEFINED_VARIABLE (vnode) |
316 vnode; | 1146 { |
317 vnode = vnode->next_needed) | 1147 tree decl = vnode->decl; |
318 { | |
319 tree vectype, decl = vnode->decl; | |
320 tree t; | |
321 unsigned int alignment; | 1148 unsigned int alignment; |
322 | 1149 |
323 t = TREE_TYPE(decl); | 1150 if ((decl_in_symtab_p (decl) |
324 if (TREE_CODE (t) != ARRAY_TYPE) | 1151 && !symtab_node::get (decl)->can_increase_alignment_p ()) |
325 continue; | 1152 || DECL_USER_ALIGN (decl) || DECL_ARTIFICIAL (decl)) |
326 vectype = get_vectype_for_scalar_type (strip_array_types (t)); | 1153 continue; |
327 if (!vectype) | 1154 |
328 continue; | 1155 alignment = get_vec_alignment_for_type (TREE_TYPE (decl)); |
329 alignment = TYPE_ALIGN (vectype); | 1156 if (alignment && vect_can_force_dr_alignment_p (decl, alignment)) |
330 if (DECL_ALIGN (decl) >= alignment) | |
331 continue; | |
332 | |
333 if (vect_can_force_dr_alignment_p (decl, alignment)) | |
334 { | 1157 { |
335 DECL_ALIGN (decl) = TYPE_ALIGN (vectype); | 1158 vnode->increase_alignment (alignment); |
336 DECL_USER_ALIGN (decl) = 1; | 1159 dump_printf (MSG_NOTE, "Increasing alignment of decl: "); |
337 if (dump_file) | 1160 dump_generic_expr (MSG_NOTE, TDF_SLIM, decl); |
338 { | 1161 dump_printf (MSG_NOTE, "\n"); |
339 fprintf (dump_file, "Increasing alignment of decl: "); | |
340 print_generic_expr (dump_file, decl, TDF_SLIM); | |
341 fprintf (dump_file, "\n"); | |
342 } | |
343 } | 1162 } |
344 } | 1163 } |
1164 | |
1165 delete type_align_map; | |
345 return 0; | 1166 return 0; |
346 } | 1167 } |
347 | 1168 |
348 | 1169 |
349 static bool | 1170 namespace { |
350 gate_increase_alignment (void) | 1171 |
351 { | 1172 const pass_data pass_data_ipa_increase_alignment = |
352 return flag_section_anchors && flag_tree_vectorize; | 1173 { |
353 } | 1174 SIMPLE_IPA_PASS, /* type */ |
354 | 1175 "increase_alignment", /* name */ |
355 | 1176 OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */ |
356 struct simple_ipa_opt_pass pass_ipa_increase_alignment = | 1177 TV_IPA_OPT, /* tv_id */ |
357 { | 1178 0, /* properties_required */ |
358 { | 1179 0, /* properties_provided */ |
359 SIMPLE_IPA_PASS, | 1180 0, /* properties_destroyed */ |
360 "increase_alignment", /* name */ | 1181 0, /* todo_flags_start */ |
361 gate_increase_alignment, /* gate */ | 1182 0, /* todo_flags_finish */ |
362 increase_alignment, /* execute */ | |
363 NULL, /* sub */ | |
364 NULL, /* next */ | |
365 0, /* static_pass_number */ | |
366 TV_IPA_OPT, /* tv_id */ | |
367 0, /* properties_required */ | |
368 0, /* properties_provided */ | |
369 0, /* properties_destroyed */ | |
370 0, /* todo_flags_start */ | |
371 0 /* todo_flags_finish */ | |
372 } | |
373 }; | 1183 }; |
1184 | |
1185 class pass_ipa_increase_alignment : public simple_ipa_opt_pass | |
1186 { | |
1187 public: | |
1188 pass_ipa_increase_alignment (gcc::context *ctxt) | |
1189 : simple_ipa_opt_pass (pass_data_ipa_increase_alignment, ctxt) | |
1190 {} | |
1191 | |
1192 /* opt_pass methods: */ | |
1193 virtual bool gate (function *) | |
1194 { | |
1195 return flag_section_anchors && flag_tree_loop_vectorize; | |
1196 } | |
1197 | |
1198 virtual unsigned int execute (function *) { return increase_alignment (); } | |
1199 | |
1200 }; // class pass_ipa_increase_alignment | |
1201 | |
1202 } // anon namespace | |
1203 | |
1204 simple_ipa_opt_pass * | |
1205 make_pass_ipa_increase_alignment (gcc::context *ctxt) | |
1206 { | |
1207 return new pass_ipa_increase_alignment (ctxt); | |
1208 } |