Mercurial > hg > CbC > CbC_gcc
comparison gcc/tree-vectorizer.h @ 131:84e7813d76e9
gcc-8.2
author | mir3636 |
---|---|
date | Thu, 25 Oct 2018 07:37:49 +0900 |
parents | 04ced10e8804 |
children | 1830386684a0 |
comparison
equal
deleted
inserted
replaced
111:04ced10e8804 | 131:84e7813d76e9 |
---|---|
1 /* Vectorizer | 1 /* Vectorizer |
2 Copyright (C) 2003-2017 Free Software Foundation, Inc. | 2 Copyright (C) 2003-2018 Free Software Foundation, Inc. |
3 Contributed by Dorit Naishlos <dorit@il.ibm.com> | 3 Contributed by Dorit Naishlos <dorit@il.ibm.com> |
4 | 4 |
5 This file is part of GCC. | 5 This file is part of GCC. |
6 | 6 |
7 GCC is free software; you can redistribute it and/or modify it under | 7 GCC is free software; you can redistribute it and/or modify it under |
18 along with GCC; see the file COPYING3. If not see | 18 along with GCC; see the file COPYING3. If not see |
19 <http://www.gnu.org/licenses/>. */ | 19 <http://www.gnu.org/licenses/>. */ |
20 | 20 |
21 #ifndef GCC_TREE_VECTORIZER_H | 21 #ifndef GCC_TREE_VECTORIZER_H |
22 #define GCC_TREE_VECTORIZER_H | 22 #define GCC_TREE_VECTORIZER_H |
23 | |
24 typedef struct _stmt_vec_info *stmt_vec_info; | |
23 | 25 |
24 #include "tree-data-ref.h" | 26 #include "tree-data-ref.h" |
25 #include "tree-hash-traits.h" | 27 #include "tree-hash-traits.h" |
26 #include "target.h" | 28 #include "target.h" |
27 | 29 |
65 /* Define type of reduction. */ | 67 /* Define type of reduction. */ |
66 enum vect_reduction_type { | 68 enum vect_reduction_type { |
67 TREE_CODE_REDUCTION, | 69 TREE_CODE_REDUCTION, |
68 COND_REDUCTION, | 70 COND_REDUCTION, |
69 INTEGER_INDUC_COND_REDUCTION, | 71 INTEGER_INDUC_COND_REDUCTION, |
70 CONST_COND_REDUCTION | 72 CONST_COND_REDUCTION, |
73 | |
74 /* Retain a scalar phi and use a FOLD_EXTRACT_LAST within the loop | |
75 to implement: | |
76 | |
77 for (int i = 0; i < VF; ++i) | |
78 res = cond[i] ? val[i] : res; */ | |
79 EXTRACT_LAST_REDUCTION, | |
80 | |
81 /* Use a folding reduction within the loop to implement: | |
82 | |
83 for (int i = 0; i < VF; ++i) | |
84 res = res OP val[i]; | |
85 | |
86 (with no reassocation). */ | |
87 FOLD_LEFT_REDUCTION | |
71 }; | 88 }; |
72 | 89 |
73 #define VECTORIZABLE_CYCLE_DEF(D) (((D) == vect_reduction_def) \ | 90 #define VECTORIZABLE_CYCLE_DEF(D) (((D) == vect_reduction_def) \ |
74 || ((D) == vect_double_reduction_def) \ | 91 || ((D) == vect_double_reduction_def) \ |
75 || ((D) == vect_nested_cycle)) | 92 || ((D) == vect_nested_cycle)) |
77 /* Structure to encapsulate information about a group of like | 94 /* Structure to encapsulate information about a group of like |
78 instructions to be presented to the target cost model. */ | 95 instructions to be presented to the target cost model. */ |
79 struct stmt_info_for_cost { | 96 struct stmt_info_for_cost { |
80 int count; | 97 int count; |
81 enum vect_cost_for_stmt kind; | 98 enum vect_cost_for_stmt kind; |
82 gimple *stmt; | 99 enum vect_cost_model_location where; |
100 stmt_vec_info stmt_info; | |
83 int misalign; | 101 int misalign; |
84 }; | 102 }; |
85 | 103 |
86 typedef vec<stmt_info_for_cost> stmt_vector_for_cost; | 104 typedef vec<stmt_info_for_cost> stmt_vector_for_cost; |
87 | 105 |
99 stmts to be packed in a SIMD stmt. */ | 117 stmts to be packed in a SIMD stmt. */ |
100 struct _slp_tree { | 118 struct _slp_tree { |
101 /* Nodes that contain def-stmts of this node statements operands. */ | 119 /* Nodes that contain def-stmts of this node statements operands. */ |
102 vec<slp_tree> children; | 120 vec<slp_tree> children; |
103 /* A group of scalar stmts to be vectorized together. */ | 121 /* A group of scalar stmts to be vectorized together. */ |
104 vec<gimple *> stmts; | 122 vec<stmt_vec_info> stmts; |
105 /* Load permutation relative to the stores, NULL if there is no | 123 /* Load permutation relative to the stores, NULL if there is no |
106 permutation. */ | 124 permutation. */ |
107 vec<unsigned> load_permutation; | 125 vec<unsigned> load_permutation; |
108 /* Vectorized stmt/s. */ | 126 /* Vectorized stmt/s. */ |
109 vec<gimple *> vec_stmts; | 127 vec<stmt_vec_info> vec_stmts; |
110 /* Number of vector stmts that are created to replace the group of scalar | 128 /* Number of vector stmts that are created to replace the group of scalar |
111 stmts. It is calculated during the transformation phase as the number of | 129 stmts. It is calculated during the transformation phase as the number of |
112 scalar elements in one scalar iteration (GROUP_SIZE) multiplied by VF | 130 scalar elements in one scalar iteration (GROUP_SIZE) multiplied by VF |
113 divided by vector size. */ | 131 divided by vector size. */ |
114 unsigned int vec_stmts_size; | 132 unsigned int vec_stmts_size; |
127 | 145 |
128 /* Size of groups of scalar stmts that will be replaced by SIMD stmt/s. */ | 146 /* Size of groups of scalar stmts that will be replaced by SIMD stmt/s. */ |
129 unsigned int group_size; | 147 unsigned int group_size; |
130 | 148 |
131 /* The unrolling factor required to vectorized this SLP instance. */ | 149 /* The unrolling factor required to vectorized this SLP instance. */ |
132 unsigned int unrolling_factor; | 150 poly_uint64 unrolling_factor; |
133 | 151 |
134 /* The group of nodes that contain loads of this SLP instance. */ | 152 /* The group of nodes that contain loads of this SLP instance. */ |
135 vec<slp_tree> loads; | 153 vec<slp_tree> loads; |
136 | 154 |
137 /* The SLP node containing the reduction PHIs. */ | 155 /* The SLP node containing the reduction PHIs. */ |
157 | 175 |
158 /* Describes two objects whose addresses must be unequal for the vectorized | 176 /* Describes two objects whose addresses must be unequal for the vectorized |
159 loop to be valid. */ | 177 loop to be valid. */ |
160 typedef std::pair<tree, tree> vec_object_pair; | 178 typedef std::pair<tree, tree> vec_object_pair; |
161 | 179 |
180 /* Records that vectorization is only possible if abs (EXPR) >= MIN_VALUE. | |
181 UNSIGNED_P is true if we can assume that abs (EXPR) == EXPR. */ | |
182 struct vec_lower_bound { | |
183 vec_lower_bound () {} | |
184 vec_lower_bound (tree e, bool u, poly_uint64 m) | |
185 : expr (e), unsigned_p (u), min_value (m) {} | |
186 | |
187 tree expr; | |
188 bool unsigned_p; | |
189 poly_uint64 min_value; | |
190 }; | |
191 | |
192 /* Vectorizer state shared between different analyses like vector sizes | |
193 of the same CFG region. */ | |
194 struct vec_info_shared { | |
195 vec_info_shared(); | |
196 ~vec_info_shared(); | |
197 | |
198 void save_datarefs(); | |
199 void check_datarefs(); | |
200 | |
201 /* All data references. Freed by free_data_refs, so not an auto_vec. */ | |
202 vec<data_reference_p> datarefs; | |
203 vec<data_reference> datarefs_copy; | |
204 | |
205 /* The loop nest in which the data dependences are computed. */ | |
206 auto_vec<loop_p> loop_nest; | |
207 | |
208 /* All data dependences. Freed by free_dependence_relations, so not | |
209 an auto_vec. */ | |
210 vec<ddr_p> ddrs; | |
211 }; | |
212 | |
162 /* Vectorizer state common between loop and basic-block vectorization. */ | 213 /* Vectorizer state common between loop and basic-block vectorization. */ |
163 struct vec_info { | 214 struct vec_info { |
164 enum vec_kind { bb, loop }; | 215 enum vec_kind { bb, loop }; |
165 | 216 |
166 vec_info (vec_kind, void *); | 217 vec_info (vec_kind, void *, vec_info_shared *); |
167 ~vec_info (); | 218 ~vec_info (); |
219 | |
220 stmt_vec_info add_stmt (gimple *); | |
221 stmt_vec_info lookup_stmt (gimple *); | |
222 stmt_vec_info lookup_def (tree); | |
223 stmt_vec_info lookup_single_use (tree); | |
224 struct dr_vec_info *lookup_dr (data_reference *); | |
225 void move_dr (stmt_vec_info, stmt_vec_info); | |
226 void remove_stmt (stmt_vec_info); | |
227 void replace_stmt (gimple_stmt_iterator *, stmt_vec_info, gimple *); | |
168 | 228 |
169 /* The type of vectorization. */ | 229 /* The type of vectorization. */ |
170 vec_kind kind; | 230 vec_kind kind; |
171 | 231 |
232 /* Shared vectorizer state. */ | |
233 vec_info_shared *shared; | |
234 | |
235 /* The mapping of GIMPLE UID to stmt_vec_info. */ | |
236 vec<stmt_vec_info> stmt_vec_infos; | |
237 | |
172 /* All SLP instances. */ | 238 /* All SLP instances. */ |
173 auto_vec<slp_instance> slp_instances; | 239 auto_vec<slp_instance> slp_instances; |
174 | |
175 /* All data references. Freed by free_data_refs, so not an auto_vec. */ | |
176 vec<data_reference_p> datarefs; | |
177 | 240 |
178 /* Maps base addresses to an innermost_loop_behavior that gives the maximum | 241 /* Maps base addresses to an innermost_loop_behavior that gives the maximum |
179 known alignment for that base. */ | 242 known alignment for that base. */ |
180 vec_base_alignments base_alignments; | 243 vec_base_alignments base_alignments; |
181 | 244 |
182 /* All data dependences. Freed by free_dependence_relations, so not | |
183 an auto_vec. */ | |
184 vec<ddr_p> ddrs; | |
185 | |
186 /* All interleaving chains of stores, represented by the first | 245 /* All interleaving chains of stores, represented by the first |
187 stmt in the chain. */ | 246 stmt in the chain. */ |
188 auto_vec<gimple *> grouped_stores; | 247 auto_vec<stmt_vec_info> grouped_stores; |
189 | 248 |
190 /* Cost data used by the target cost model. */ | 249 /* Cost data used by the target cost model. */ |
191 void *target_cost_data; | 250 void *target_cost_data; |
251 | |
252 private: | |
253 stmt_vec_info new_stmt_vec_info (gimple *stmt); | |
254 void set_vinfo_for_stmt (gimple *, stmt_vec_info); | |
255 void free_stmt_vec_infos (); | |
256 void free_stmt_vec_info (stmt_vec_info); | |
192 }; | 257 }; |
193 | 258 |
194 struct _loop_vec_info; | 259 struct _loop_vec_info; |
195 struct _bb_vec_info; | 260 struct _bb_vec_info; |
196 | 261 |
209 { | 274 { |
210 return i->kind == vec_info::bb; | 275 return i->kind == vec_info::bb; |
211 } | 276 } |
212 | 277 |
213 | 278 |
279 /* In general, we can divide the vector statements in a vectorized loop | |
280 into related groups ("rgroups") and say that for each rgroup there is | |
281 some nS such that the rgroup operates on nS values from one scalar | |
282 iteration followed by nS values from the next. That is, if VF is the | |
283 vectorization factor of the loop, the rgroup operates on a sequence: | |
284 | |
285 (1,1) (1,2) ... (1,nS) (2,1) ... (2,nS) ... (VF,1) ... (VF,nS) | |
286 | |
287 where (i,j) represents a scalar value with index j in a scalar | |
288 iteration with index i. | |
289 | |
290 [ We use the term "rgroup" to emphasise that this grouping isn't | |
291 necessarily the same as the grouping of statements used elsewhere. | |
292 For example, if we implement a group of scalar loads using gather | |
293 loads, we'll use a separate gather load for each scalar load, and | |
294 thus each gather load will belong to its own rgroup. ] | |
295 | |
296 In general this sequence will occupy nV vectors concatenated | |
297 together. If these vectors have nL lanes each, the total number | |
298 of scalar values N is given by: | |
299 | |
300 N = nS * VF = nV * nL | |
301 | |
302 None of nS, VF, nV and nL are required to be a power of 2. nS and nV | |
303 are compile-time constants but VF and nL can be variable (if the target | |
304 supports variable-length vectors). | |
305 | |
306 In classical vectorization, each iteration of the vector loop would | |
307 handle exactly VF iterations of the original scalar loop. However, | |
308 in a fully-masked loop, a particular iteration of the vector loop | |
309 might handle fewer than VF iterations of the scalar loop. The vector | |
310 lanes that correspond to iterations of the scalar loop are said to be | |
311 "active" and the other lanes are said to be "inactive". | |
312 | |
313 In a fully-masked loop, many rgroups need to be masked to ensure that | |
314 they have no effect for the inactive lanes. Each such rgroup needs a | |
315 sequence of booleans in the same order as above, but with each (i,j) | |
316 replaced by a boolean that indicates whether iteration i is active. | |
317 This sequence occupies nV vector masks that again have nL lanes each. | |
318 Thus the mask sequence as a whole consists of VF independent booleans | |
319 that are each repeated nS times. | |
320 | |
321 We make the simplifying assumption that if a sequence of nV masks is | |
322 suitable for one (nS,nL) pair, we can reuse it for (nS/2,nL/2) by | |
323 VIEW_CONVERTing it. This holds for all current targets that support | |
324 fully-masked loops. For example, suppose the scalar loop is: | |
325 | |
326 float *f; | |
327 double *d; | |
328 for (int i = 0; i < n; ++i) | |
329 { | |
330 f[i * 2 + 0] += 1.0f; | |
331 f[i * 2 + 1] += 2.0f; | |
332 d[i] += 3.0; | |
333 } | |
334 | |
335 and suppose that vectors have 256 bits. The vectorized f accesses | |
336 will belong to one rgroup and the vectorized d access to another: | |
337 | |
338 f rgroup: nS = 2, nV = 1, nL = 8 | |
339 d rgroup: nS = 1, nV = 1, nL = 4 | |
340 VF = 4 | |
341 | |
342 [ In this simple example the rgroups do correspond to the normal | |
343 SLP grouping scheme. ] | |
344 | |
345 If only the first three lanes are active, the masks we need are: | |
346 | |
347 f rgroup: 1 1 | 1 1 | 1 1 | 0 0 | |
348 d rgroup: 1 | 1 | 1 | 0 | |
349 | |
350 Here we can use a mask calculated for f's rgroup for d's, but not | |
351 vice versa. | |
352 | |
353 Thus for each value of nV, it is enough to provide nV masks, with the | |
354 mask being calculated based on the highest nL (or, equivalently, based | |
355 on the highest nS) required by any rgroup with that nV. We therefore | |
356 represent the entire collection of masks as a two-level table, with the | |
357 first level being indexed by nV - 1 (since nV == 0 doesn't exist) and | |
358 the second being indexed by the mask index 0 <= i < nV. */ | |
359 | |
360 /* The masks needed by rgroups with nV vectors, according to the | |
361 description above. */ | |
362 struct rgroup_masks { | |
363 /* The largest nS for all rgroups that use these masks. */ | |
364 unsigned int max_nscalars_per_iter; | |
365 | |
366 /* The type of mask to use, based on the highest nS recorded above. */ | |
367 tree mask_type; | |
368 | |
369 /* A vector of nV masks, in iteration order. */ | |
370 vec<tree> masks; | |
371 }; | |
372 | |
373 typedef auto_vec<rgroup_masks> vec_loop_masks; | |
374 | |
214 /*-----------------------------------------------------------------*/ | 375 /*-----------------------------------------------------------------*/ |
215 /* Info on vectorized loops. */ | 376 /* Info on vectorized loops. */ |
216 /*-----------------------------------------------------------------*/ | 377 /*-----------------------------------------------------------------*/ |
217 typedef struct _loop_vec_info : public vec_info { | 378 typedef struct _loop_vec_info : public vec_info { |
218 _loop_vec_info (struct loop *); | 379 _loop_vec_info (struct loop *, vec_info_shared *); |
219 ~_loop_vec_info (); | 380 ~_loop_vec_info (); |
220 | 381 |
221 /* The loop to which this info struct refers to. */ | 382 /* The loop to which this info struct refers to. */ |
222 struct loop *loop; | 383 struct loop *loop; |
223 | 384 |
236 /* Threshold of number of iterations below which vectorzation will not be | 397 /* Threshold of number of iterations below which vectorzation will not be |
237 performed. It is calculated from MIN_PROFITABLE_ITERS and | 398 performed. It is calculated from MIN_PROFITABLE_ITERS and |
238 PARAM_MIN_VECT_LOOP_BOUND. */ | 399 PARAM_MIN_VECT_LOOP_BOUND. */ |
239 unsigned int th; | 400 unsigned int th; |
240 | 401 |
402 /* When applying loop versioning, the vector form should only be used | |
403 if the number of scalar iterations is >= this value, on top of all | |
404 the other requirements. Ignored when loop versioning is not being | |
405 used. */ | |
406 poly_uint64 versioning_threshold; | |
407 | |
241 /* Unrolling factor */ | 408 /* Unrolling factor */ |
242 int vectorization_factor; | 409 poly_uint64 vectorization_factor; |
243 | 410 |
244 /* Maximum runtime vectorization factor, or MAX_VECTORIZATION_FACTOR | 411 /* Maximum runtime vectorization factor, or MAX_VECTORIZATION_FACTOR |
245 if there is no particular limit. */ | 412 if there is no particular limit. */ |
246 unsigned HOST_WIDE_INT max_vectorization_factor; | 413 unsigned HOST_WIDE_INT max_vectorization_factor; |
247 | 414 |
415 /* The masks that a fully-masked loop should use to avoid operating | |
416 on inactive scalars. */ | |
417 vec_loop_masks masks; | |
418 | |
419 /* If we are using a loop mask to align memory addresses, this variable | |
420 contains the number of vector elements that we should skip in the | |
421 first iteration of the vector loop (i.e. the number of leading | |
422 elements that should be false in the first mask). */ | |
423 tree mask_skip_niters; | |
424 | |
425 /* Type of the variables to use in the WHILE_ULT call for fully-masked | |
426 loops. */ | |
427 tree mask_compare_type; | |
428 | |
248 /* Unknown DRs according to which loop was peeled. */ | 429 /* Unknown DRs according to which loop was peeled. */ |
249 struct data_reference *unaligned_dr; | 430 struct dr_vec_info *unaligned_dr; |
250 | 431 |
251 /* peeling_for_alignment indicates whether peeling for alignment will take | 432 /* peeling_for_alignment indicates whether peeling for alignment will take |
252 place, and what the peeling factor should be: | 433 place, and what the peeling factor should be: |
253 peeling_for_alignment = X means: | 434 peeling_for_alignment = X means: |
254 If X=0: Peeling for alignment will not be applied. | 435 If X=0: Peeling for alignment will not be applied. |
259 int peeling_for_alignment; | 440 int peeling_for_alignment; |
260 | 441 |
261 /* The mask used to check the alignment of pointers or arrays. */ | 442 /* The mask used to check the alignment of pointers or arrays. */ |
262 int ptr_mask; | 443 int ptr_mask; |
263 | 444 |
264 /* The loop nest in which the data dependences are computed. */ | |
265 auto_vec<loop_p> loop_nest; | |
266 | |
267 /* Data Dependence Relations defining address ranges that are candidates | 445 /* Data Dependence Relations defining address ranges that are candidates |
268 for a run-time aliasing check. */ | 446 for a run-time aliasing check. */ |
269 auto_vec<ddr_p> may_alias_ddrs; | 447 auto_vec<ddr_p> may_alias_ddrs; |
270 | 448 |
271 /* Data Dependence Relations defining address ranges together with segment | 449 /* Data Dependence Relations defining address ranges together with segment |
273 auto_vec<dr_with_seg_len_pair_t> comp_alias_ddrs; | 451 auto_vec<dr_with_seg_len_pair_t> comp_alias_ddrs; |
274 | 452 |
275 /* Check that the addresses of each pair of objects is unequal. */ | 453 /* Check that the addresses of each pair of objects is unequal. */ |
276 auto_vec<vec_object_pair> check_unequal_addrs; | 454 auto_vec<vec_object_pair> check_unequal_addrs; |
277 | 455 |
456 /* List of values that are required to be nonzero. This is used to check | |
457 whether things like "x[i * n] += 1;" are safe and eventually gets added | |
458 to the checks for lower bounds below. */ | |
459 auto_vec<tree> check_nonzero; | |
460 | |
461 /* List of values that need to be checked for a minimum value. */ | |
462 auto_vec<vec_lower_bound> lower_bounds; | |
463 | |
278 /* Statements in the loop that have data references that are candidates for a | 464 /* Statements in the loop that have data references that are candidates for a |
279 runtime (loop versioning) misalignment check. */ | 465 runtime (loop versioning) misalignment check. */ |
280 auto_vec<gimple *> may_misalign_stmts; | 466 auto_vec<stmt_vec_info> may_misalign_stmts; |
281 | 467 |
282 /* Reduction cycles detected in the loop. Used in loop-aware SLP. */ | 468 /* Reduction cycles detected in the loop. Used in loop-aware SLP. */ |
283 auto_vec<gimple *> reductions; | 469 auto_vec<stmt_vec_info> reductions; |
284 | 470 |
285 /* All reduction chains in the loop, represented by the first | 471 /* All reduction chains in the loop, represented by the first |
286 stmt in the chain. */ | 472 stmt in the chain. */ |
287 auto_vec<gimple *> reduction_chains; | 473 auto_vec<stmt_vec_info> reduction_chains; |
288 | 474 |
289 /* Cost vector for a single scalar iteration. */ | 475 /* Cost vector for a single scalar iteration. */ |
290 auto_vec<stmt_info_for_cost> scalar_cost_vec; | 476 auto_vec<stmt_info_for_cost> scalar_cost_vec; |
291 | 477 |
478 /* Map of IV base/step expressions to inserted name in the preheader. */ | |
479 hash_map<tree_operand_hash, tree> *ivexpr_map; | |
480 | |
292 /* The unrolling factor needed to SLP the loop. In case of that pure SLP is | 481 /* The unrolling factor needed to SLP the loop. In case of that pure SLP is |
293 applied to the loop, i.e., no unrolling is needed, this is 1. */ | 482 applied to the loop, i.e., no unrolling is needed, this is 1. */ |
294 unsigned slp_unrolling_factor; | 483 poly_uint64 slp_unrolling_factor; |
295 | 484 |
296 /* Cost of a single scalar iteration. */ | 485 /* Cost of a single scalar iteration. */ |
297 int single_scalar_iteration_cost; | 486 int single_scalar_iteration_cost; |
298 | 487 |
299 /* Is the loop vectorizable? */ | 488 /* Is the loop vectorizable? */ |
300 bool vectorizable; | 489 bool vectorizable; |
490 | |
491 /* Records whether we still have the option of using a fully-masked loop. */ | |
492 bool can_fully_mask_p; | |
493 | |
494 /* True if have decided to use a fully-masked loop. */ | |
495 bool fully_masked_p; | |
301 | 496 |
302 /* When we have grouped data accesses with gaps, we may introduce invalid | 497 /* When we have grouped data accesses with gaps, we may introduce invalid |
303 memory accesses. We peel the last iteration of the loop to prevent | 498 memory accesses. We peel the last iteration of the loop to prevent |
304 this. */ | 499 this. */ |
305 bool peeling_for_gaps; | 500 bool peeling_for_gaps; |
355 prologue peeling retain total unchanged scalar loop iterations for | 550 prologue peeling retain total unchanged scalar loop iterations for |
356 cost model. */ | 551 cost model. */ |
357 #define LOOP_VINFO_NITERS_UNCHANGED(L) (L)->num_iters_unchanged | 552 #define LOOP_VINFO_NITERS_UNCHANGED(L) (L)->num_iters_unchanged |
358 #define LOOP_VINFO_NITERS_ASSUMPTIONS(L) (L)->num_iters_assumptions | 553 #define LOOP_VINFO_NITERS_ASSUMPTIONS(L) (L)->num_iters_assumptions |
359 #define LOOP_VINFO_COST_MODEL_THRESHOLD(L) (L)->th | 554 #define LOOP_VINFO_COST_MODEL_THRESHOLD(L) (L)->th |
555 #define LOOP_VINFO_VERSIONING_THRESHOLD(L) (L)->versioning_threshold | |
360 #define LOOP_VINFO_VECTORIZABLE_P(L) (L)->vectorizable | 556 #define LOOP_VINFO_VECTORIZABLE_P(L) (L)->vectorizable |
557 #define LOOP_VINFO_CAN_FULLY_MASK_P(L) (L)->can_fully_mask_p | |
558 #define LOOP_VINFO_FULLY_MASKED_P(L) (L)->fully_masked_p | |
361 #define LOOP_VINFO_VECT_FACTOR(L) (L)->vectorization_factor | 559 #define LOOP_VINFO_VECT_FACTOR(L) (L)->vectorization_factor |
362 #define LOOP_VINFO_MAX_VECT_FACTOR(L) (L)->max_vectorization_factor | 560 #define LOOP_VINFO_MAX_VECT_FACTOR(L) (L)->max_vectorization_factor |
561 #define LOOP_VINFO_MASKS(L) (L)->masks | |
562 #define LOOP_VINFO_MASK_SKIP_NITERS(L) (L)->mask_skip_niters | |
563 #define LOOP_VINFO_MASK_COMPARE_TYPE(L) (L)->mask_compare_type | |
363 #define LOOP_VINFO_PTR_MASK(L) (L)->ptr_mask | 564 #define LOOP_VINFO_PTR_MASK(L) (L)->ptr_mask |
364 #define LOOP_VINFO_LOOP_NEST(L) (L)->loop_nest | 565 #define LOOP_VINFO_LOOP_NEST(L) (L)->shared->loop_nest |
365 #define LOOP_VINFO_DATAREFS(L) (L)->datarefs | 566 #define LOOP_VINFO_DATAREFS(L) (L)->shared->datarefs |
366 #define LOOP_VINFO_DDRS(L) (L)->ddrs | 567 #define LOOP_VINFO_DDRS(L) (L)->shared->ddrs |
367 #define LOOP_VINFO_INT_NITERS(L) (TREE_INT_CST_LOW ((L)->num_iters)) | 568 #define LOOP_VINFO_INT_NITERS(L) (TREE_INT_CST_LOW ((L)->num_iters)) |
368 #define LOOP_VINFO_PEELING_FOR_ALIGNMENT(L) (L)->peeling_for_alignment | 569 #define LOOP_VINFO_PEELING_FOR_ALIGNMENT(L) (L)->peeling_for_alignment |
369 #define LOOP_VINFO_UNALIGNED_DR(L) (L)->unaligned_dr | 570 #define LOOP_VINFO_UNALIGNED_DR(L) (L)->unaligned_dr |
370 #define LOOP_VINFO_MAY_MISALIGN_STMTS(L) (L)->may_misalign_stmts | 571 #define LOOP_VINFO_MAY_MISALIGN_STMTS(L) (L)->may_misalign_stmts |
371 #define LOOP_VINFO_MAY_ALIAS_DDRS(L) (L)->may_alias_ddrs | 572 #define LOOP_VINFO_MAY_ALIAS_DDRS(L) (L)->may_alias_ddrs |
372 #define LOOP_VINFO_COMP_ALIAS_DDRS(L) (L)->comp_alias_ddrs | 573 #define LOOP_VINFO_COMP_ALIAS_DDRS(L) (L)->comp_alias_ddrs |
373 #define LOOP_VINFO_CHECK_UNEQUAL_ADDRS(L) (L)->check_unequal_addrs | 574 #define LOOP_VINFO_CHECK_UNEQUAL_ADDRS(L) (L)->check_unequal_addrs |
575 #define LOOP_VINFO_CHECK_NONZERO(L) (L)->check_nonzero | |
576 #define LOOP_VINFO_LOWER_BOUNDS(L) (L)->lower_bounds | |
374 #define LOOP_VINFO_GROUPED_STORES(L) (L)->grouped_stores | 577 #define LOOP_VINFO_GROUPED_STORES(L) (L)->grouped_stores |
375 #define LOOP_VINFO_SLP_INSTANCES(L) (L)->slp_instances | 578 #define LOOP_VINFO_SLP_INSTANCES(L) (L)->slp_instances |
376 #define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor | 579 #define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor |
377 #define LOOP_VINFO_REDUCTIONS(L) (L)->reductions | 580 #define LOOP_VINFO_REDUCTIONS(L) (L)->reductions |
378 #define LOOP_VINFO_REDUCTION_CHAINS(L) (L)->reduction_chains | 581 #define LOOP_VINFO_REDUCTION_CHAINS(L) (L)->reduction_chains |
389 | 592 |
390 #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \ | 593 #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \ |
391 ((L)->may_misalign_stmts.length () > 0) | 594 ((L)->may_misalign_stmts.length () > 0) |
392 #define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L) \ | 595 #define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L) \ |
393 ((L)->comp_alias_ddrs.length () > 0 \ | 596 ((L)->comp_alias_ddrs.length () > 0 \ |
394 || (L)->check_unequal_addrs.length () > 0) | 597 || (L)->check_unequal_addrs.length () > 0 \ |
598 || (L)->lower_bounds.length () > 0) | |
395 #define LOOP_REQUIRES_VERSIONING_FOR_NITERS(L) \ | 599 #define LOOP_REQUIRES_VERSIONING_FOR_NITERS(L) \ |
396 (LOOP_VINFO_NITERS_ASSUMPTIONS (L)) | 600 (LOOP_VINFO_NITERS_ASSUMPTIONS (L)) |
397 #define LOOP_REQUIRES_VERSIONING(L) \ | 601 #define LOOP_REQUIRES_VERSIONING(L) \ |
398 (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (L) \ | 602 (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (L) \ |
399 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (L) \ | 603 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (L) \ |
406 (LOOP_VINFO_ORIG_LOOP_INFO (L) != NULL) | 610 (LOOP_VINFO_ORIG_LOOP_INFO (L) != NULL) |
407 | 611 |
408 #define LOOP_VINFO_ORIG_MAX_VECT_FACTOR(L) \ | 612 #define LOOP_VINFO_ORIG_MAX_VECT_FACTOR(L) \ |
409 (LOOP_VINFO_MAX_VECT_FACTOR (LOOP_VINFO_ORIG_LOOP_INFO (L))) | 613 (LOOP_VINFO_MAX_VECT_FACTOR (LOOP_VINFO_ORIG_LOOP_INFO (L))) |
410 | 614 |
615 /* Wrapper for loop_vec_info, for tracking success/failure, where a non-NULL | |
616 value signifies success, and a NULL value signifies failure, supporting | |
617 propagating an opt_problem * describing the failure back up the call | |
618 stack. */ | |
619 typedef opt_pointer_wrapper <loop_vec_info> opt_loop_vec_info; | |
620 | |
411 static inline loop_vec_info | 621 static inline loop_vec_info |
412 loop_vec_info_for_loop (struct loop *loop) | 622 loop_vec_info_for_loop (struct loop *loop) |
413 { | 623 { |
414 return (loop_vec_info) loop->aux; | 624 return (loop_vec_info) loop->aux; |
415 } | 625 } |
416 | 626 |
417 static inline bool | |
418 nested_in_vect_loop_p (struct loop *loop, gimple *stmt) | |
419 { | |
420 return (loop->inner | |
421 && (loop->inner == (gimple_bb (stmt))->loop_father)); | |
422 } | |
423 | |
424 typedef struct _bb_vec_info : public vec_info | 627 typedef struct _bb_vec_info : public vec_info |
425 { | 628 { |
426 _bb_vec_info (gimple_stmt_iterator, gimple_stmt_iterator); | 629 _bb_vec_info (gimple_stmt_iterator, gimple_stmt_iterator, vec_info_shared *); |
427 ~_bb_vec_info (); | 630 ~_bb_vec_info (); |
428 | 631 |
429 basic_block bb; | 632 basic_block bb; |
430 gimple_stmt_iterator region_begin; | 633 gimple_stmt_iterator region_begin; |
431 gimple_stmt_iterator region_end; | 634 gimple_stmt_iterator region_end; |
432 } *bb_vec_info; | 635 } *bb_vec_info; |
433 | 636 |
434 #define BB_VINFO_BB(B) (B)->bb | 637 #define BB_VINFO_BB(B) (B)->bb |
435 #define BB_VINFO_GROUPED_STORES(B) (B)->grouped_stores | 638 #define BB_VINFO_GROUPED_STORES(B) (B)->grouped_stores |
436 #define BB_VINFO_SLP_INSTANCES(B) (B)->slp_instances | 639 #define BB_VINFO_SLP_INSTANCES(B) (B)->slp_instances |
437 #define BB_VINFO_DATAREFS(B) (B)->datarefs | 640 #define BB_VINFO_DATAREFS(B) (B)->shared->datarefs |
438 #define BB_VINFO_DDRS(B) (B)->ddrs | 641 #define BB_VINFO_DDRS(B) (B)->shared->ddrs |
439 #define BB_VINFO_TARGET_COST_DATA(B) (B)->target_cost_data | 642 #define BB_VINFO_TARGET_COST_DATA(B) (B)->target_cost_data |
440 | 643 |
441 static inline bb_vec_info | 644 static inline bb_vec_info |
442 vec_info_for_bb (basic_block bb) | 645 vec_info_for_bb (basic_block bb) |
443 { | 646 { |
511 loop_vect = 0, | 714 loop_vect = 0, |
512 pure_slp, | 715 pure_slp, |
513 hybrid | 716 hybrid |
514 }; | 717 }; |
515 | 718 |
719 /* Says whether a statement is a load, a store of a vectorized statement | |
720 result, or a store of an invariant value. */ | |
721 enum vec_load_store_type { | |
722 VLS_LOAD, | |
723 VLS_STORE, | |
724 VLS_STORE_INVARIANT | |
725 }; | |
726 | |
516 /* Describes how we're going to vectorize an individual load or store, | 727 /* Describes how we're going to vectorize an individual load or store, |
517 or a group of loads or stores. */ | 728 or a group of loads or stores. */ |
518 enum vect_memory_access_type { | 729 enum vect_memory_access_type { |
519 /* An access to an invariant address. This is used only for loads. */ | 730 /* An access to an invariant address. This is used only for loads. */ |
520 VMAT_INVARIANT, | 731 VMAT_INVARIANT, |
551 | 762 |
552 /* The access uses gather loads or scatter stores. */ | 763 /* The access uses gather loads or scatter stores. */ |
553 VMAT_GATHER_SCATTER | 764 VMAT_GATHER_SCATTER |
554 }; | 765 }; |
555 | 766 |
767 struct dr_vec_info { | |
768 /* The data reference itself. */ | |
769 data_reference *dr; | |
770 /* The statement that contains the data reference. */ | |
771 stmt_vec_info stmt; | |
772 /* The misalignment in bytes of the reference, or -1 if not known. */ | |
773 int misalignment; | |
774 /* The byte alignment that we'd ideally like the reference to have, | |
775 and the value that misalignment is measured against. */ | |
776 int target_alignment; | |
777 /* If true the alignment of base_decl needs to be increased. */ | |
778 bool base_misaligned; | |
779 tree base_decl; | |
780 }; | |
781 | |
556 typedef struct data_reference *dr_p; | 782 typedef struct data_reference *dr_p; |
557 | 783 |
558 typedef struct _stmt_vec_info { | 784 struct _stmt_vec_info { |
559 | 785 |
560 enum stmt_vec_info_type type; | 786 enum stmt_vec_info_type type; |
561 | 787 |
562 /* Indicates whether this stmts is part of a computation whose result is | 788 /* Indicates whether this stmts is part of a computation whose result is |
563 used outside the loop. */ | 789 used outside the loop. */ |
564 bool live; | 790 bool live; |
565 | 791 |
566 /* Stmt is part of some pattern (computation idiom) */ | 792 /* Stmt is part of some pattern (computation idiom) */ |
567 bool in_pattern_p; | 793 bool in_pattern_p; |
568 | 794 |
795 /* True if the statement was created during pattern recognition as | |
796 part of the replacement for RELATED_STMT. This implies that the | |
797 statement isn't part of any basic block, although for convenience | |
798 its gimple_bb is the same as for RELATED_STMT. */ | |
799 bool pattern_stmt_p; | |
800 | |
569 /* Is this statement vectorizable or should it be skipped in (partial) | 801 /* Is this statement vectorizable or should it be skipped in (partial) |
570 vectorization. */ | 802 vectorization. */ |
571 bool vectorizable; | 803 bool vectorizable; |
572 | 804 |
573 /* The stmt to which this info struct refers to. */ | 805 /* The stmt to which this info struct refers to. */ |
578 | 810 |
579 /* The vector type to be used for the LHS of this statement. */ | 811 /* The vector type to be used for the LHS of this statement. */ |
580 tree vectype; | 812 tree vectype; |
581 | 813 |
582 /* The vectorized version of the stmt. */ | 814 /* The vectorized version of the stmt. */ |
583 gimple *vectorized_stmt; | 815 stmt_vec_info vectorized_stmt; |
584 | 816 |
585 | 817 |
586 /* The following is relevant only for stmts that contain a non-scalar | 818 /* The following is relevant only for stmts that contain a non-scalar |
587 data-ref (array/pointer/struct access). A GIMPLE stmt is expected to have | 819 data-ref (array/pointer/struct access). A GIMPLE stmt is expected to have |
588 at most one such data-ref. */ | 820 at most one such data-ref. */ |
589 | 821 |
590 /* Information about the data-ref (access function, etc), | 822 dr_vec_info dr_aux; |
591 relative to the inner-most containing loop. */ | |
592 struct data_reference *data_ref_info; | |
593 | 823 |
594 /* Information about the data-ref relative to this loop | 824 /* Information about the data-ref relative to this loop |
595 nest (the loop that is being considered for vectorization). */ | 825 nest (the loop that is being considered for vectorization). */ |
596 innermost_loop_behavior dr_wrt_vec_loop; | 826 innermost_loop_behavior dr_wrt_vec_loop; |
597 | 827 |
611 true): S is the "pattern stmt" that represents (and replaces) the | 841 true): S is the "pattern stmt" that represents (and replaces) the |
612 sequence of stmts that constitutes the pattern. Similarly, the | 842 sequence of stmts that constitutes the pattern. Similarly, the |
613 related_stmt of the "pattern stmt" points back to this stmt (which is | 843 related_stmt of the "pattern stmt" points back to this stmt (which is |
614 the last stmt in the original sequence of stmts that constitutes the | 844 the last stmt in the original sequence of stmts that constitutes the |
615 pattern). */ | 845 pattern). */ |
616 gimple *related_stmt; | 846 stmt_vec_info related_stmt; |
617 | 847 |
618 /* Used to keep a sequence of def stmts of a pattern stmt if such exists. */ | 848 /* Used to keep a sequence of def stmts of a pattern stmt if such exists. |
849 The sequence is attached to the original statement rather than the | |
850 pattern statement. */ | |
619 gimple_seq pattern_def_seq; | 851 gimple_seq pattern_def_seq; |
620 | 852 |
621 /* List of datarefs that are known to have the same alignment as the dataref | 853 /* List of datarefs that are known to have the same alignment as the dataref |
622 of this stmt. */ | 854 of this stmt. */ |
623 vec<dr_p> same_align_refs; | 855 vec<dr_p> same_align_refs; |
633 /* Whether the stmt is SLPed, loop-based vectorized, or both. */ | 865 /* Whether the stmt is SLPed, loop-based vectorized, or both. */ |
634 enum slp_vect_type slp_type; | 866 enum slp_vect_type slp_type; |
635 | 867 |
636 /* Interleaving and reduction chains info. */ | 868 /* Interleaving and reduction chains info. */ |
637 /* First element in the group. */ | 869 /* First element in the group. */ |
638 gimple *first_element; | 870 stmt_vec_info first_element; |
639 /* Pointer to the next element in the group. */ | 871 /* Pointer to the next element in the group. */ |
640 gimple *next_element; | 872 stmt_vec_info next_element; |
641 /* For data-refs, in case that two or more stmts share data-ref, this is the | 873 /* For data-refs, in case that two or more stmts share data-ref, this is the |
642 pointer to the previously detected stmt with the same dr. */ | 874 pointer to the previously detected stmt with the same dr. */ |
643 gimple *same_dr_stmt; | 875 stmt_vec_info same_dr_stmt; |
644 /* The size of the group. */ | 876 /* The size of the group. */ |
645 unsigned int size; | 877 unsigned int size; |
646 /* For stores, number of stores from this group seen. We vectorize the last | 878 /* For stores, number of stores from this group seen. We vectorize the last |
647 one. */ | 879 one. */ |
648 unsigned int store_count; | 880 unsigned int store_count; |
683 enum vect_reduction_type reduc_type; | 915 enum vect_reduction_type reduc_type; |
684 | 916 |
685 /* On a reduction PHI the def returned by vect_force_simple_reduction. | 917 /* On a reduction PHI the def returned by vect_force_simple_reduction. |
686 On the def returned by vect_force_simple_reduction the | 918 On the def returned by vect_force_simple_reduction the |
687 corresponding PHI. */ | 919 corresponding PHI. */ |
688 gimple *reduc_def; | 920 stmt_vec_info reduc_def; |
689 | 921 |
690 /* The number of scalar stmt references from active SLP instances. */ | 922 /* The number of scalar stmt references from active SLP instances. */ |
691 unsigned int num_slp_uses; | 923 unsigned int num_slp_uses; |
692 } *stmt_vec_info; | 924 |
925 /* If nonzero, the lhs of the statement could be truncated to this | |
926 many bits without affecting any users of the result. */ | |
927 unsigned int min_output_precision; | |
928 | |
929 /* If nonzero, all non-boolean input operands have the same precision, | |
930 and they could each be truncated to this many bits without changing | |
931 the result. */ | |
932 unsigned int min_input_precision; | |
933 | |
934 /* If OPERATION_BITS is nonzero, the statement could be performed on | |
935 an integer with the sign and number of bits given by OPERATION_SIGN | |
936 and OPERATION_BITS without changing the result. */ | |
937 unsigned int operation_precision; | |
938 signop operation_sign; | |
939 }; | |
693 | 940 |
694 /* Information about a gather/scatter call. */ | 941 /* Information about a gather/scatter call. */ |
695 struct gather_scatter_info { | 942 struct gather_scatter_info { |
696 /* The FUNCTION_DECL for the built-in gather/scatter function. */ | 943 /* The internal function to use for the gather/scatter operation, |
944 or IFN_LAST if a built-in function should be used instead. */ | |
945 internal_fn ifn; | |
946 | |
947 /* The FUNCTION_DECL for the built-in gather/scatter function, | |
948 or null if an internal function should be used instead. */ | |
697 tree decl; | 949 tree decl; |
698 | 950 |
699 /* The loop-invariant base value. */ | 951 /* The loop-invariant base value. */ |
700 tree base; | 952 tree base; |
701 | 953 |
709 /* The definition type for the vectorized offset. */ | 961 /* The definition type for the vectorized offset. */ |
710 enum vect_def_type offset_dt; | 962 enum vect_def_type offset_dt; |
711 | 963 |
712 /* The type of the vectorized offset. */ | 964 /* The type of the vectorized offset. */ |
713 tree offset_vectype; | 965 tree offset_vectype; |
966 | |
967 /* The type of the scalar elements after loading or before storing. */ | |
968 tree element_type; | |
969 | |
970 /* The type of the scalar elements being loaded or stored. */ | |
971 tree memory_type; | |
714 }; | 972 }; |
715 | 973 |
716 /* Access Functions. */ | 974 /* Access Functions. */ |
717 #define STMT_VINFO_TYPE(S) (S)->type | 975 #define STMT_VINFO_TYPE(S) (S)->type |
718 #define STMT_VINFO_STMT(S) (S)->stmt | 976 #define STMT_VINFO_STMT(S) (S)->stmt |
733 #define STMT_VINFO_RELEVANT(S) (S)->relevant | 991 #define STMT_VINFO_RELEVANT(S) (S)->relevant |
734 #define STMT_VINFO_LIVE_P(S) (S)->live | 992 #define STMT_VINFO_LIVE_P(S) (S)->live |
735 #define STMT_VINFO_VECTYPE(S) (S)->vectype | 993 #define STMT_VINFO_VECTYPE(S) (S)->vectype |
736 #define STMT_VINFO_VEC_STMT(S) (S)->vectorized_stmt | 994 #define STMT_VINFO_VEC_STMT(S) (S)->vectorized_stmt |
737 #define STMT_VINFO_VECTORIZABLE(S) (S)->vectorizable | 995 #define STMT_VINFO_VECTORIZABLE(S) (S)->vectorizable |
738 #define STMT_VINFO_DATA_REF(S) (S)->data_ref_info | 996 #define STMT_VINFO_DATA_REF(S) ((S)->dr_aux.dr + 0) |
739 #define STMT_VINFO_GATHER_SCATTER_P(S) (S)->gather_scatter_p | 997 #define STMT_VINFO_GATHER_SCATTER_P(S) (S)->gather_scatter_p |
740 #define STMT_VINFO_STRIDED_P(S) (S)->strided_p | 998 #define STMT_VINFO_STRIDED_P(S) (S)->strided_p |
741 #define STMT_VINFO_MEMORY_ACCESS_TYPE(S) (S)->memory_access_type | 999 #define STMT_VINFO_MEMORY_ACCESS_TYPE(S) (S)->memory_access_type |
742 #define STMT_VINFO_SIMD_LANE_ACCESS_P(S) (S)->simd_lane_access_p | 1000 #define STMT_VINFO_SIMD_LANE_ACCESS_P(S) (S)->simd_lane_access_p |
743 #define STMT_VINFO_VEC_REDUCTION_TYPE(S) (S)->v_reduc_type | 1001 #define STMT_VINFO_VEC_REDUCTION_TYPE(S) (S)->v_reduc_type |
754 #define STMT_VINFO_DR_OFFSET_ALIGNMENT(S) \ | 1012 #define STMT_VINFO_DR_OFFSET_ALIGNMENT(S) \ |
755 (S)->dr_wrt_vec_loop.offset_alignment | 1013 (S)->dr_wrt_vec_loop.offset_alignment |
756 #define STMT_VINFO_DR_STEP_ALIGNMENT(S) \ | 1014 #define STMT_VINFO_DR_STEP_ALIGNMENT(S) \ |
757 (S)->dr_wrt_vec_loop.step_alignment | 1015 (S)->dr_wrt_vec_loop.step_alignment |
758 | 1016 |
1017 #define STMT_VINFO_DR_INFO(S) \ | |
1018 (gcc_checking_assert ((S)->dr_aux.stmt == (S)), &(S)->dr_aux) | |
1019 | |
759 #define STMT_VINFO_IN_PATTERN_P(S) (S)->in_pattern_p | 1020 #define STMT_VINFO_IN_PATTERN_P(S) (S)->in_pattern_p |
760 #define STMT_VINFO_RELATED_STMT(S) (S)->related_stmt | 1021 #define STMT_VINFO_RELATED_STMT(S) (S)->related_stmt |
761 #define STMT_VINFO_PATTERN_DEF_SEQ(S) (S)->pattern_def_seq | 1022 #define STMT_VINFO_PATTERN_DEF_SEQ(S) (S)->pattern_def_seq |
762 #define STMT_VINFO_SAME_ALIGN_REFS(S) (S)->same_align_refs | 1023 #define STMT_VINFO_SAME_ALIGN_REFS(S) (S)->same_align_refs |
763 #define STMT_VINFO_SIMD_CLONE_INFO(S) (S)->simd_clone_info | 1024 #define STMT_VINFO_SIMD_CLONE_INFO(S) (S)->simd_clone_info |
764 #define STMT_VINFO_DEF_TYPE(S) (S)->def_type | 1025 #define STMT_VINFO_DEF_TYPE(S) (S)->def_type |
765 #define STMT_VINFO_GROUP_FIRST_ELEMENT(S) (S)->first_element | 1026 #define STMT_VINFO_GROUPED_ACCESS(S) \ |
766 #define STMT_VINFO_GROUP_NEXT_ELEMENT(S) (S)->next_element | 1027 ((S)->dr_aux.dr && DR_GROUP_FIRST_ELEMENT(S)) |
767 #define STMT_VINFO_GROUP_SIZE(S) (S)->size | |
768 #define STMT_VINFO_GROUP_STORE_COUNT(S) (S)->store_count | |
769 #define STMT_VINFO_GROUP_GAP(S) (S)->gap | |
770 #define STMT_VINFO_GROUP_SAME_DR_STMT(S) (S)->same_dr_stmt | |
771 #define STMT_VINFO_GROUPED_ACCESS(S) ((S)->first_element != NULL && (S)->data_ref_info) | |
772 #define STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED(S) (S)->loop_phi_evolution_base_unchanged | 1028 #define STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED(S) (S)->loop_phi_evolution_base_unchanged |
773 #define STMT_VINFO_LOOP_PHI_EVOLUTION_PART(S) (S)->loop_phi_evolution_part | 1029 #define STMT_VINFO_LOOP_PHI_EVOLUTION_PART(S) (S)->loop_phi_evolution_part |
774 #define STMT_VINFO_MIN_NEG_DIST(S) (S)->min_neg_dist | 1030 #define STMT_VINFO_MIN_NEG_DIST(S) (S)->min_neg_dist |
775 #define STMT_VINFO_NUM_SLP_USES(S) (S)->num_slp_uses | 1031 #define STMT_VINFO_NUM_SLP_USES(S) (S)->num_slp_uses |
776 #define STMT_VINFO_REDUC_TYPE(S) (S)->reduc_type | 1032 #define STMT_VINFO_REDUC_TYPE(S) (S)->reduc_type |
777 #define STMT_VINFO_REDUC_DEF(S) (S)->reduc_def | 1033 #define STMT_VINFO_REDUC_DEF(S) (S)->reduc_def |
778 | 1034 |
779 #define GROUP_FIRST_ELEMENT(S) (S)->first_element | 1035 #define DR_GROUP_FIRST_ELEMENT(S) \ |
780 #define GROUP_NEXT_ELEMENT(S) (S)->next_element | 1036 (gcc_checking_assert ((S)->dr_aux.dr), (S)->first_element) |
781 #define GROUP_SIZE(S) (S)->size | 1037 #define DR_GROUP_NEXT_ELEMENT(S) \ |
782 #define GROUP_STORE_COUNT(S) (S)->store_count | 1038 (gcc_checking_assert ((S)->dr_aux.dr), (S)->next_element) |
783 #define GROUP_GAP(S) (S)->gap | 1039 #define DR_GROUP_SIZE(S) \ |
784 #define GROUP_SAME_DR_STMT(S) (S)->same_dr_stmt | 1040 (gcc_checking_assert ((S)->dr_aux.dr), (S)->size) |
1041 #define DR_GROUP_STORE_COUNT(S) \ | |
1042 (gcc_checking_assert ((S)->dr_aux.dr), (S)->store_count) | |
1043 #define DR_GROUP_GAP(S) \ | |
1044 (gcc_checking_assert ((S)->dr_aux.dr), (S)->gap) | |
1045 #define DR_GROUP_SAME_DR_STMT(S) \ | |
1046 (gcc_checking_assert ((S)->dr_aux.dr), (S)->same_dr_stmt) | |
1047 | |
1048 #define REDUC_GROUP_FIRST_ELEMENT(S) \ | |
1049 (gcc_checking_assert (!(S)->dr_aux.dr), (S)->first_element) | |
1050 #define REDUC_GROUP_NEXT_ELEMENT(S) \ | |
1051 (gcc_checking_assert (!(S)->dr_aux.dr), (S)->next_element) | |
1052 #define REDUC_GROUP_SIZE(S) \ | |
1053 (gcc_checking_assert (!(S)->dr_aux.dr), (S)->size) | |
785 | 1054 |
786 #define STMT_VINFO_RELEVANT_P(S) ((S)->relevant != vect_unused_in_scope) | 1055 #define STMT_VINFO_RELEVANT_P(S) ((S)->relevant != vect_unused_in_scope) |
787 | 1056 |
788 #define HYBRID_SLP_STMT(S) ((S)->slp_type == hybrid) | 1057 #define HYBRID_SLP_STMT(S) ((S)->slp_type == hybrid) |
789 #define PURE_SLP_STMT(S) ((S)->slp_type == pure_slp) | 1058 #define PURE_SLP_STMT(S) ((S)->slp_type == pure_slp) |
790 #define STMT_SLP_TYPE(S) (S)->slp_type | 1059 #define STMT_SLP_TYPE(S) (S)->slp_type |
791 | 1060 |
792 struct dataref_aux { | |
793 /* The misalignment in bytes of the reference, or -1 if not known. */ | |
794 int misalignment; | |
795 /* The byte alignment that we'd ideally like the reference to have, | |
796 and the value that misalignment is measured against. */ | |
797 int target_alignment; | |
798 /* If true the alignment of base_decl needs to be increased. */ | |
799 bool base_misaligned; | |
800 tree base_decl; | |
801 }; | |
802 | |
803 #define DR_VECT_AUX(dr) ((dataref_aux *)(dr)->aux) | |
804 | |
805 #define VECT_MAX_COST 1000 | 1061 #define VECT_MAX_COST 1000 |
806 | 1062 |
807 /* The maximum number of intermediate steps required in multi-step type | 1063 /* The maximum number of intermediate steps required in multi-step type |
808 conversion. */ | 1064 conversion. */ |
809 #define MAX_INTERM_CVT_STEPS 3 | 1065 #define MAX_INTERM_CVT_STEPS 3 |
810 | 1066 |
811 /* The maximum vectorization factor supported by any target (V64QI). */ | 1067 #define MAX_VECTORIZATION_FACTOR INT_MAX |
812 #define MAX_VECTORIZATION_FACTOR 64 | |
813 | 1068 |
814 /* Nonzero if TYPE represents a (scalar) boolean type or type | 1069 /* Nonzero if TYPE represents a (scalar) boolean type or type |
815 in the middle-end compatible with it (unsigned precision 1 integral | 1070 in the middle-end compatible with it (unsigned precision 1 integral |
816 types). Used to determine which types should be vectorized as | 1071 types). Used to determine which types should be vectorized as |
817 VECTOR_BOOLEAN_TYPE_P. */ | 1072 VECTOR_BOOLEAN_TYPE_P. */ |
821 || ((TREE_CODE (TYPE) == INTEGER_TYPE \ | 1076 || ((TREE_CODE (TYPE) == INTEGER_TYPE \ |
822 || TREE_CODE (TYPE) == ENUMERAL_TYPE) \ | 1077 || TREE_CODE (TYPE) == ENUMERAL_TYPE) \ |
823 && TYPE_PRECISION (TYPE) == 1 \ | 1078 && TYPE_PRECISION (TYPE) == 1 \ |
824 && TYPE_UNSIGNED (TYPE))) | 1079 && TYPE_UNSIGNED (TYPE))) |
825 | 1080 |
826 extern vec<stmt_vec_info> stmt_vec_info_vec; | 1081 static inline bool |
827 | 1082 nested_in_vect_loop_p (struct loop *loop, stmt_vec_info stmt_info) |
828 void init_stmt_vec_info_vec (void); | 1083 { |
829 void free_stmt_vec_info_vec (void); | 1084 return (loop->inner |
830 | 1085 && (loop->inner == (gimple_bb (stmt_info->stmt))->loop_father)); |
831 /* Return a stmt_vec_info corresponding to STMT. */ | 1086 } |
1087 | |
1088 /* Return the earlier statement between STMT1_INFO and STMT2_INFO. */ | |
832 | 1089 |
833 static inline stmt_vec_info | 1090 static inline stmt_vec_info |
834 vinfo_for_stmt (gimple *stmt) | 1091 get_earlier_stmt (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info) |
835 { | 1092 { |
836 int uid = gimple_uid (stmt); | 1093 gcc_checking_assert ((STMT_VINFO_IN_PATTERN_P (stmt1_info) |
837 if (uid <= 0) | 1094 || !STMT_VINFO_RELATED_STMT (stmt1_info)) |
838 return NULL; | 1095 && (STMT_VINFO_IN_PATTERN_P (stmt2_info) |
839 | 1096 || !STMT_VINFO_RELATED_STMT (stmt2_info))); |
840 return stmt_vec_info_vec[uid - 1]; | 1097 |
841 } | 1098 if (gimple_uid (stmt1_info->stmt) < gimple_uid (stmt2_info->stmt)) |
842 | 1099 return stmt1_info; |
843 /* Set vectorizer information INFO for STMT. */ | |
844 | |
845 static inline void | |
846 set_vinfo_for_stmt (gimple *stmt, stmt_vec_info info) | |
847 { | |
848 unsigned int uid = gimple_uid (stmt); | |
849 if (uid == 0) | |
850 { | |
851 gcc_checking_assert (info); | |
852 uid = stmt_vec_info_vec.length () + 1; | |
853 gimple_set_uid (stmt, uid); | |
854 stmt_vec_info_vec.safe_push (info); | |
855 } | |
856 else | 1100 else |
857 { | 1101 return stmt2_info; |
858 gcc_checking_assert (info == NULL); | 1102 } |
859 stmt_vec_info_vec[uid - 1] = info; | 1103 |
860 } | 1104 /* Return the later statement between STMT1_INFO and STMT2_INFO. */ |
861 } | 1105 |
862 | 1106 static inline stmt_vec_info |
863 /* Return the earlier statement between STMT1 and STMT2. */ | 1107 get_later_stmt (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info) |
864 | 1108 { |
865 static inline gimple * | 1109 gcc_checking_assert ((STMT_VINFO_IN_PATTERN_P (stmt1_info) |
866 get_earlier_stmt (gimple *stmt1, gimple *stmt2) | 1110 || !STMT_VINFO_RELATED_STMT (stmt1_info)) |
867 { | 1111 && (STMT_VINFO_IN_PATTERN_P (stmt2_info) |
868 unsigned int uid1, uid2; | 1112 || !STMT_VINFO_RELATED_STMT (stmt2_info))); |
869 | 1113 |
870 if (stmt1 == NULL) | 1114 if (gimple_uid (stmt1_info->stmt) > gimple_uid (stmt2_info->stmt)) |
871 return stmt2; | 1115 return stmt1_info; |
872 | |
873 if (stmt2 == NULL) | |
874 return stmt1; | |
875 | |
876 uid1 = gimple_uid (stmt1); | |
877 uid2 = gimple_uid (stmt2); | |
878 | |
879 if (uid1 == 0 || uid2 == 0) | |
880 return NULL; | |
881 | |
882 gcc_checking_assert (uid1 <= stmt_vec_info_vec.length () | |
883 && uid2 <= stmt_vec_info_vec.length ()); | |
884 | |
885 if (uid1 < uid2) | |
886 return stmt1; | |
887 else | 1116 else |
888 return stmt2; | 1117 return stmt2_info; |
889 } | |
890 | |
891 /* Return the later statement between STMT1 and STMT2. */ | |
892 | |
893 static inline gimple * | |
894 get_later_stmt (gimple *stmt1, gimple *stmt2) | |
895 { | |
896 unsigned int uid1, uid2; | |
897 | |
898 if (stmt1 == NULL) | |
899 return stmt2; | |
900 | |
901 if (stmt2 == NULL) | |
902 return stmt1; | |
903 | |
904 uid1 = gimple_uid (stmt1); | |
905 uid2 = gimple_uid (stmt2); | |
906 | |
907 if (uid1 == 0 || uid2 == 0) | |
908 return NULL; | |
909 | |
910 gcc_assert (uid1 <= stmt_vec_info_vec.length ()); | |
911 gcc_assert (uid2 <= stmt_vec_info_vec.length ()); | |
912 | |
913 if (uid1 > uid2) | |
914 return stmt1; | |
915 else | |
916 return stmt2; | |
917 } | 1118 } |
918 | 1119 |
919 /* Return TRUE if a statement represented by STMT_INFO is a part of a | 1120 /* Return TRUE if a statement represented by STMT_INFO is a part of a |
920 pattern. */ | 1121 pattern. */ |
921 | 1122 |
922 static inline bool | 1123 static inline bool |
923 is_pattern_stmt_p (stmt_vec_info stmt_info) | 1124 is_pattern_stmt_p (stmt_vec_info stmt_info) |
924 { | 1125 { |
925 gimple *related_stmt; | 1126 return stmt_info->pattern_stmt_p; |
926 stmt_vec_info related_stmt_info; | 1127 } |
927 | 1128 |
928 related_stmt = STMT_VINFO_RELATED_STMT (stmt_info); | 1129 /* If STMT_INFO is a pattern statement, return the statement that it |
929 if (related_stmt | 1130 replaces, otherwise return STMT_INFO itself. */ |
930 && (related_stmt_info = vinfo_for_stmt (related_stmt)) | 1131 |
931 && STMT_VINFO_IN_PATTERN_P (related_stmt_info)) | 1132 inline stmt_vec_info |
932 return true; | 1133 vect_orig_stmt (stmt_vec_info stmt_info) |
933 | 1134 { |
934 return false; | 1135 if (is_pattern_stmt_p (stmt_info)) |
1136 return STMT_VINFO_RELATED_STMT (stmt_info); | |
1137 return stmt_info; | |
1138 } | |
1139 | |
1140 /* If STMT_INFO has been replaced by a pattern statement, return the | |
1141 replacement statement, otherwise return STMT_INFO itself. */ | |
1142 | |
1143 inline stmt_vec_info | |
1144 vect_stmt_to_vectorize (stmt_vec_info stmt_info) | |
1145 { | |
1146 if (STMT_VINFO_IN_PATTERN_P (stmt_info)) | |
1147 return STMT_VINFO_RELATED_STMT (stmt_info); | |
1148 return stmt_info; | |
935 } | 1149 } |
936 | 1150 |
937 /* Return true if BB is a loop header. */ | 1151 /* Return true if BB is a loop header. */ |
938 | 1152 |
939 static inline bool | 1153 static inline bool |
982 init_cost (struct loop *loop_info) | 1196 init_cost (struct loop *loop_info) |
983 { | 1197 { |
984 return targetm.vectorize.init_cost (loop_info); | 1198 return targetm.vectorize.init_cost (loop_info); |
985 } | 1199 } |
986 | 1200 |
1201 extern void dump_stmt_cost (FILE *, void *, int, enum vect_cost_for_stmt, | |
1202 stmt_vec_info, int, unsigned, | |
1203 enum vect_cost_model_location); | |
1204 | |
987 /* Alias targetm.vectorize.add_stmt_cost. */ | 1205 /* Alias targetm.vectorize.add_stmt_cost. */ |
988 | 1206 |
989 static inline unsigned | 1207 static inline unsigned |
990 add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind, | 1208 add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind, |
991 stmt_vec_info stmt_info, int misalign, | 1209 stmt_vec_info stmt_info, int misalign, |
992 enum vect_cost_model_location where) | 1210 enum vect_cost_model_location where) |
993 { | 1211 { |
994 return targetm.vectorize.add_stmt_cost (data, count, kind, | 1212 unsigned cost = targetm.vectorize.add_stmt_cost (data, count, kind, |
995 stmt_info, misalign, where); | 1213 stmt_info, misalign, where); |
1214 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1215 dump_stmt_cost (dump_file, data, count, kind, stmt_info, misalign, | |
1216 cost, where); | |
1217 return cost; | |
996 } | 1218 } |
997 | 1219 |
998 /* Alias targetm.vectorize.finish_cost. */ | 1220 /* Alias targetm.vectorize.finish_cost. */ |
999 | 1221 |
1000 static inline void | 1222 static inline void |
1010 destroy_cost_data (void *data) | 1232 destroy_cost_data (void *data) |
1011 { | 1233 { |
1012 targetm.vectorize.destroy_cost_data (data); | 1234 targetm.vectorize.destroy_cost_data (data); |
1013 } | 1235 } |
1014 | 1236 |
1237 inline void | |
1238 add_stmt_costs (void *data, stmt_vector_for_cost *cost_vec) | |
1239 { | |
1240 stmt_info_for_cost *cost; | |
1241 unsigned i; | |
1242 FOR_EACH_VEC_ELT (*cost_vec, i, cost) | |
1243 add_stmt_cost (data, cost->count, cost->kind, cost->stmt_info, | |
1244 cost->misalign, cost->where); | |
1245 } | |
1246 | |
1015 /*-----------------------------------------------------------------*/ | 1247 /*-----------------------------------------------------------------*/ |
1016 /* Info on data references alignment. */ | 1248 /* Info on data references alignment. */ |
1017 /*-----------------------------------------------------------------*/ | 1249 /*-----------------------------------------------------------------*/ |
1250 #define DR_MISALIGNMENT_UNKNOWN (-1) | |
1251 #define DR_MISALIGNMENT_UNINITIALIZED (-2) | |
1252 | |
1018 inline void | 1253 inline void |
1019 set_dr_misalignment (struct data_reference *dr, int val) | 1254 set_dr_misalignment (dr_vec_info *dr_info, int val) |
1020 { | 1255 { |
1021 dataref_aux *data_aux = DR_VECT_AUX (dr); | 1256 dr_info->misalignment = val; |
1022 | |
1023 if (!data_aux) | |
1024 { | |
1025 data_aux = XCNEW (dataref_aux); | |
1026 dr->aux = data_aux; | |
1027 } | |
1028 | |
1029 data_aux->misalignment = val; | |
1030 } | 1257 } |
1031 | 1258 |
1032 inline int | 1259 inline int |
1033 dr_misalignment (struct data_reference *dr) | 1260 dr_misalignment (dr_vec_info *dr_info) |
1034 { | 1261 { |
1035 return DR_VECT_AUX (dr)->misalignment; | 1262 int misalign = dr_info->misalignment; |
1263 gcc_assert (misalign != DR_MISALIGNMENT_UNINITIALIZED); | |
1264 return misalign; | |
1036 } | 1265 } |
1037 | 1266 |
1038 /* Reflects actual alignment of first access in the vectorized loop, | 1267 /* Reflects actual alignment of first access in the vectorized loop, |
1039 taking into account peeling/versioning if applied. */ | 1268 taking into account peeling/versioning if applied. */ |
1040 #define DR_MISALIGNMENT(DR) dr_misalignment (DR) | 1269 #define DR_MISALIGNMENT(DR) dr_misalignment (DR) |
1041 #define SET_DR_MISALIGNMENT(DR, VAL) set_dr_misalignment (DR, VAL) | 1270 #define SET_DR_MISALIGNMENT(DR, VAL) set_dr_misalignment (DR, VAL) |
1042 #define DR_MISALIGNMENT_UNKNOWN (-1) | |
1043 | 1271 |
1044 /* Only defined once DR_MISALIGNMENT is defined. */ | 1272 /* Only defined once DR_MISALIGNMENT is defined. */ |
1045 #define DR_TARGET_ALIGNMENT(DR) DR_VECT_AUX (DR)->target_alignment | 1273 #define DR_TARGET_ALIGNMENT(DR) ((DR)->target_alignment) |
1046 | 1274 |
1047 /* Return true if data access DR is aligned to its target alignment | 1275 /* Return true if data access DR_INFO is aligned to its target alignment |
1048 (which may be less than a full vector). */ | 1276 (which may be less than a full vector). */ |
1049 | 1277 |
1050 static inline bool | 1278 static inline bool |
1051 aligned_access_p (struct data_reference *data_ref_info) | 1279 aligned_access_p (dr_vec_info *dr_info) |
1052 { | 1280 { |
1053 return (DR_MISALIGNMENT (data_ref_info) == 0); | 1281 return (DR_MISALIGNMENT (dr_info) == 0); |
1054 } | 1282 } |
1055 | 1283 |
1056 /* Return TRUE if the alignment of the data access is known, and FALSE | 1284 /* Return TRUE if the alignment of the data access is known, and FALSE |
1057 otherwise. */ | 1285 otherwise. */ |
1058 | 1286 |
1059 static inline bool | 1287 static inline bool |
1060 known_alignment_for_access_p (struct data_reference *data_ref_info) | 1288 known_alignment_for_access_p (dr_vec_info *dr_info) |
1061 { | 1289 { |
1062 return (DR_MISALIGNMENT (data_ref_info) != DR_MISALIGNMENT_UNKNOWN); | 1290 return (DR_MISALIGNMENT (dr_info) != DR_MISALIGNMENT_UNKNOWN); |
1063 } | 1291 } |
1064 | 1292 |
1065 /* Return the minimum alignment in bytes that the vectorized version | 1293 /* Return the minimum alignment in bytes that the vectorized version |
1066 of DR is guaranteed to have. */ | 1294 of DR_INFO is guaranteed to have. */ |
1067 | 1295 |
1068 static inline unsigned int | 1296 static inline unsigned int |
1069 vect_known_alignment_in_bytes (struct data_reference *dr) | 1297 vect_known_alignment_in_bytes (dr_vec_info *dr_info) |
1070 { | 1298 { |
1071 if (DR_MISALIGNMENT (dr) == DR_MISALIGNMENT_UNKNOWN) | 1299 if (DR_MISALIGNMENT (dr_info) == DR_MISALIGNMENT_UNKNOWN) |
1072 return TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr))); | 1300 return TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_info->dr))); |
1073 if (DR_MISALIGNMENT (dr) == 0) | 1301 if (DR_MISALIGNMENT (dr_info) == 0) |
1074 return DR_TARGET_ALIGNMENT (dr); | 1302 return DR_TARGET_ALIGNMENT (dr_info); |
1075 return DR_MISALIGNMENT (dr) & -DR_MISALIGNMENT (dr); | 1303 return DR_MISALIGNMENT (dr_info) & -DR_MISALIGNMENT (dr_info); |
1076 } | 1304 } |
1077 | 1305 |
1078 /* Return the behavior of DR with respect to the vectorization context | 1306 /* Return the behavior of DR_INFO with respect to the vectorization context |
1079 (which for outer loop vectorization might not be the behavior recorded | 1307 (which for outer loop vectorization might not be the behavior recorded |
1080 in DR itself). */ | 1308 in DR_INFO itself). */ |
1081 | 1309 |
1082 static inline innermost_loop_behavior * | 1310 static inline innermost_loop_behavior * |
1083 vect_dr_behavior (data_reference *dr) | 1311 vect_dr_behavior (dr_vec_info *dr_info) |
1084 { | 1312 { |
1085 gimple *stmt = DR_STMT (dr); | 1313 stmt_vec_info stmt_info = dr_info->stmt; |
1086 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
1087 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); | 1314 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); |
1088 if (loop_vinfo == NULL | 1315 if (loop_vinfo == NULL |
1089 || !nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), stmt)) | 1316 || !nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), stmt_info)) |
1090 return &DR_INNERMOST (dr); | 1317 return &DR_INNERMOST (dr_info->dr); |
1091 else | 1318 else |
1092 return &STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info); | 1319 return &STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info); |
1093 } | 1320 } |
1094 | 1321 |
1095 /* Return true if the vect cost model is unlimited. */ | 1322 /* Return true if the vect cost model is unlimited. */ |
1100 && flag_simd_cost_model != VECT_COST_MODEL_DEFAULT) | 1327 && flag_simd_cost_model != VECT_COST_MODEL_DEFAULT) |
1101 return flag_simd_cost_model == VECT_COST_MODEL_UNLIMITED; | 1328 return flag_simd_cost_model == VECT_COST_MODEL_UNLIMITED; |
1102 return (flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED); | 1329 return (flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED); |
1103 } | 1330 } |
1104 | 1331 |
1332 /* Return true if the loop described by LOOP_VINFO is fully-masked and | |
1333 if the first iteration should use a partial mask in order to achieve | |
1334 alignment. */ | |
1335 | |
1336 static inline bool | |
1337 vect_use_loop_mask_for_alignment_p (loop_vec_info loop_vinfo) | |
1338 { | |
1339 return (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) | |
1340 && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)); | |
1341 } | |
1342 | |
1343 /* Return the number of vectors of type VECTYPE that are needed to get | |
1344 NUNITS elements. NUNITS should be based on the vectorization factor, | |
1345 so it is always a known multiple of the number of elements in VECTYPE. */ | |
1346 | |
1347 static inline unsigned int | |
1348 vect_get_num_vectors (poly_uint64 nunits, tree vectype) | |
1349 { | |
1350 return exact_div (nunits, TYPE_VECTOR_SUBPARTS (vectype)).to_constant (); | |
1351 } | |
1352 | |
1105 /* Return the number of copies needed for loop vectorization when | 1353 /* Return the number of copies needed for loop vectorization when |
1106 a statement operates on vectors of type VECTYPE. This is the | 1354 a statement operates on vectors of type VECTYPE. This is the |
1107 vectorization factor divided by the number of elements in | 1355 vectorization factor divided by the number of elements in |
1108 VECTYPE and is always known at compile time. */ | 1356 VECTYPE and is always known at compile time. */ |
1109 | 1357 |
1110 static inline unsigned int | 1358 static inline unsigned int |
1111 vect_get_num_copies (loop_vec_info loop_vinfo, tree vectype) | 1359 vect_get_num_copies (loop_vec_info loop_vinfo, tree vectype) |
1112 { | 1360 { |
1113 gcc_checking_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo) | 1361 return vect_get_num_vectors (LOOP_VINFO_VECT_FACTOR (loop_vinfo), vectype); |
1114 % TYPE_VECTOR_SUBPARTS (vectype) == 0); | 1362 } |
1115 return (LOOP_VINFO_VECT_FACTOR (loop_vinfo) | 1363 |
1116 / TYPE_VECTOR_SUBPARTS (vectype)); | 1364 /* Update maximum unit count *MAX_NUNITS so that it accounts for |
1117 } | 1365 the number of units in vector type VECTYPE. *MAX_NUNITS can be 1 |
1118 | 1366 if we haven't yet recorded any vector types. */ |
1119 /* Return the size of the value accessed by unvectorized data reference DR. | 1367 |
1120 This is only valid once STMT_VINFO_VECTYPE has been calculated for the | 1368 static inline void |
1121 associated gimple statement, since that guarantees that DR accesses | 1369 vect_update_max_nunits (poly_uint64 *max_nunits, tree vectype) |
1122 either a scalar or a scalar equivalent. ("Scalar equivalent" here | 1370 { |
1123 includes things like V1SI, which can be vectorized in the same way | 1371 /* All unit counts have the form current_vector_size * X for some |
1372 rational X, so two unit sizes must have a common multiple. | |
1373 Everything is a multiple of the initial value of 1. */ | |
1374 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); | |
1375 *max_nunits = force_common_multiple (*max_nunits, nunits); | |
1376 } | |
1377 | |
1378 /* Return the vectorization factor that should be used for costing | |
1379 purposes while vectorizing the loop described by LOOP_VINFO. | |
1380 Pick a reasonable estimate if the vectorization factor isn't | |
1381 known at compile time. */ | |
1382 | |
1383 static inline unsigned int | |
1384 vect_vf_for_cost (loop_vec_info loop_vinfo) | |
1385 { | |
1386 return estimated_poly_value (LOOP_VINFO_VECT_FACTOR (loop_vinfo)); | |
1387 } | |
1388 | |
1389 /* Estimate the number of elements in VEC_TYPE for costing purposes. | |
1390 Pick a reasonable estimate if the exact number isn't known at | |
1391 compile time. */ | |
1392 | |
1393 static inline unsigned int | |
1394 vect_nunits_for_cost (tree vec_type) | |
1395 { | |
1396 return estimated_poly_value (TYPE_VECTOR_SUBPARTS (vec_type)); | |
1397 } | |
1398 | |
1399 /* Return the maximum possible vectorization factor for LOOP_VINFO. */ | |
1400 | |
1401 static inline unsigned HOST_WIDE_INT | |
1402 vect_max_vf (loop_vec_info loop_vinfo) | |
1403 { | |
1404 unsigned HOST_WIDE_INT vf; | |
1405 if (LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf)) | |
1406 return vf; | |
1407 return MAX_VECTORIZATION_FACTOR; | |
1408 } | |
1409 | |
1410 /* Return the size of the value accessed by unvectorized data reference | |
1411 DR_INFO. This is only valid once STMT_VINFO_VECTYPE has been calculated | |
1412 for the associated gimple statement, since that guarantees that DR_INFO | |
1413 accesses either a scalar or a scalar equivalent. ("Scalar equivalent" | |
1414 here includes things like V1SI, which can be vectorized in the same way | |
1124 as a plain SI.) */ | 1415 as a plain SI.) */ |
1125 | 1416 |
1126 inline unsigned int | 1417 inline unsigned int |
1127 vect_get_scalar_dr_size (struct data_reference *dr) | 1418 vect_get_scalar_dr_size (dr_vec_info *dr_info) |
1128 { | 1419 { |
1129 return tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)))); | 1420 return tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_info->dr)))); |
1130 } | 1421 } |
1131 | 1422 |
1132 /* Source location */ | 1423 /* Source location + hotness information. */ |
1133 extern source_location vect_location; | 1424 extern dump_user_location_t vect_location; |
1425 | |
1426 /* A macro for calling: | |
1427 dump_begin_scope (MSG, vect_location); | |
1428 via an RAII object, thus printing "=== MSG ===\n" to the dumpfile etc, | |
1429 and then calling | |
1430 dump_end_scope (); | |
1431 once the object goes out of scope, thus capturing the nesting of | |
1432 the scopes. | |
1433 | |
1434 These scopes affect dump messages within them: dump messages at the | |
1435 top level implicitly default to MSG_PRIORITY_USER_FACING, whereas those | |
1436 in a nested scope implicitly default to MSG_PRIORITY_INTERNALS. */ | |
1437 | |
1438 #define DUMP_VECT_SCOPE(MSG) \ | |
1439 AUTO_DUMP_SCOPE (MSG, vect_location) | |
1134 | 1440 |
1135 /*-----------------------------------------------------------------*/ | 1441 /*-----------------------------------------------------------------*/ |
1136 /* Function prototypes. */ | 1442 /* Function prototypes. */ |
1137 /*-----------------------------------------------------------------*/ | 1443 /*-----------------------------------------------------------------*/ |
1138 | 1444 |
1139 /* Simple loop peeling and versioning utilities for vectorizer's purposes - | 1445 /* Simple loop peeling and versioning utilities for vectorizer's purposes - |
1140 in tree-vect-loop-manip.c. */ | 1446 in tree-vect-loop-manip.c. */ |
1141 extern void slpeel_make_loop_iterate_ntimes (struct loop *, tree); | 1447 extern void vect_set_loop_condition (struct loop *, loop_vec_info, |
1448 tree, tree, tree, bool); | |
1142 extern bool slpeel_can_duplicate_loop_p (const struct loop *, const_edge); | 1449 extern bool slpeel_can_duplicate_loop_p (const struct loop *, const_edge); |
1143 struct loop *slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *, | 1450 struct loop *slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *, |
1144 struct loop *, edge); | 1451 struct loop *, edge); |
1145 extern void vect_loop_versioning (loop_vec_info, unsigned int, bool); | 1452 extern void vect_loop_versioning (loop_vec_info, unsigned int, bool, |
1453 poly_uint64); | |
1146 extern struct loop *vect_do_peeling (loop_vec_info, tree, tree, | 1454 extern struct loop *vect_do_peeling (loop_vec_info, tree, tree, |
1147 tree *, int, bool, bool); | 1455 tree *, tree *, tree *, int, bool, bool); |
1148 extern source_location find_loop_location (struct loop *); | 1456 extern void vect_prepare_for_masked_peels (loop_vec_info); |
1457 extern dump_user_location_t find_loop_location (struct loop *); | |
1149 extern bool vect_can_advance_ivs_p (loop_vec_info); | 1458 extern bool vect_can_advance_ivs_p (loop_vec_info); |
1150 | 1459 |
1151 /* In tree-vect-stmts.c. */ | 1460 /* In tree-vect-stmts.c. */ |
1152 extern unsigned int current_vector_size; | 1461 extern poly_uint64 current_vector_size; |
1153 extern tree get_vectype_for_scalar_type (tree); | 1462 extern tree get_vectype_for_scalar_type (tree); |
1463 extern tree get_vectype_for_scalar_type_and_size (tree, poly_uint64); | |
1154 extern tree get_mask_type_for_scalar_type (tree); | 1464 extern tree get_mask_type_for_scalar_type (tree); |
1155 extern tree get_same_sized_vectype (tree, tree); | 1465 extern tree get_same_sized_vectype (tree, tree); |
1156 extern bool vect_is_simple_use (tree, vec_info *, gimple **, | 1466 extern bool vect_get_loop_mask_type (loop_vec_info); |
1157 enum vect_def_type *); | 1467 extern bool vect_is_simple_use (tree, vec_info *, enum vect_def_type *, |
1158 extern bool vect_is_simple_use (tree, vec_info *, gimple **, | 1468 stmt_vec_info * = NULL, gimple ** = NULL); |
1159 enum vect_def_type *, tree *); | 1469 extern bool vect_is_simple_use (tree, vec_info *, enum vect_def_type *, |
1160 extern bool supportable_widening_operation (enum tree_code, gimple *, tree, | 1470 tree *, stmt_vec_info * = NULL, |
1161 tree, enum tree_code *, | 1471 gimple ** = NULL); |
1472 extern bool supportable_widening_operation (enum tree_code, stmt_vec_info, | |
1473 tree, tree, enum tree_code *, | |
1162 enum tree_code *, int *, | 1474 enum tree_code *, int *, |
1163 vec<tree> *); | 1475 vec<tree> *); |
1164 extern bool supportable_narrowing_operation (enum tree_code, tree, tree, | 1476 extern bool supportable_narrowing_operation (enum tree_code, tree, tree, |
1165 enum tree_code *, | 1477 enum tree_code *, |
1166 int *, vec<tree> *); | 1478 int *, vec<tree> *); |
1167 extern stmt_vec_info new_stmt_vec_info (gimple *stmt, vec_info *); | |
1168 extern void free_stmt_vec_info (gimple *stmt); | |
1169 extern void vect_model_simple_cost (stmt_vec_info, int, enum vect_def_type *, | |
1170 int, stmt_vector_for_cost *, | |
1171 stmt_vector_for_cost *); | |
1172 extern void vect_model_store_cost (stmt_vec_info, int, vect_memory_access_type, | |
1173 enum vect_def_type, slp_tree, | |
1174 stmt_vector_for_cost *, | |
1175 stmt_vector_for_cost *); | |
1176 extern void vect_model_load_cost (stmt_vec_info, int, vect_memory_access_type, | |
1177 slp_tree, stmt_vector_for_cost *, | |
1178 stmt_vector_for_cost *); | |
1179 extern unsigned record_stmt_cost (stmt_vector_for_cost *, int, | 1479 extern unsigned record_stmt_cost (stmt_vector_for_cost *, int, |
1180 enum vect_cost_for_stmt, stmt_vec_info, | 1480 enum vect_cost_for_stmt, stmt_vec_info, |
1181 int, enum vect_cost_model_location); | 1481 int, enum vect_cost_model_location); |
1182 extern void vect_finish_stmt_generation (gimple *, gimple *, | 1482 extern stmt_vec_info vect_finish_replace_stmt (stmt_vec_info, gimple *); |
1183 gimple_stmt_iterator *); | 1483 extern stmt_vec_info vect_finish_stmt_generation (stmt_vec_info, gimple *, |
1184 extern bool vect_mark_stmts_to_be_vectorized (loop_vec_info); | 1484 gimple_stmt_iterator *); |
1185 extern tree vect_get_vec_def_for_operand_1 (gimple *, enum vect_def_type); | 1485 extern opt_result vect_mark_stmts_to_be_vectorized (loop_vec_info); |
1186 extern tree vect_get_vec_def_for_operand (tree, gimple *, tree = NULL); | 1486 extern tree vect_get_store_rhs (stmt_vec_info); |
1187 extern void vect_get_vec_defs (tree, tree, gimple *, vec<tree> *, | 1487 extern tree vect_get_vec_def_for_operand_1 (stmt_vec_info, enum vect_def_type); |
1488 extern tree vect_get_vec_def_for_operand (tree, stmt_vec_info, tree = NULL); | |
1489 extern void vect_get_vec_defs (tree, tree, stmt_vec_info, vec<tree> *, | |
1188 vec<tree> *, slp_tree); | 1490 vec<tree> *, slp_tree); |
1189 extern void vect_get_vec_defs_for_stmt_copy (enum vect_def_type *, | 1491 extern void vect_get_vec_defs_for_stmt_copy (vec_info *, |
1190 vec<tree> *, vec<tree> *); | 1492 vec<tree> *, vec<tree> *); |
1191 extern tree vect_init_vector (gimple *, tree, tree, | 1493 extern tree vect_init_vector (stmt_vec_info, tree, tree, |
1192 gimple_stmt_iterator *); | 1494 gimple_stmt_iterator *); |
1193 extern tree vect_get_vec_def_for_stmt_copy (enum vect_def_type, tree); | 1495 extern tree vect_get_vec_def_for_stmt_copy (vec_info *, tree); |
1194 extern bool vect_transform_stmt (gimple *, gimple_stmt_iterator *, | 1496 extern bool vect_transform_stmt (stmt_vec_info, gimple_stmt_iterator *, |
1195 bool *, slp_tree, slp_instance); | 1497 slp_tree, slp_instance); |
1196 extern void vect_remove_stores (gimple *); | 1498 extern void vect_remove_stores (stmt_vec_info); |
1197 extern bool vect_analyze_stmt (gimple *, bool *, slp_tree, slp_instance); | 1499 extern opt_result vect_analyze_stmt (stmt_vec_info, bool *, slp_tree, |
1198 extern bool vectorizable_condition (gimple *, gimple_stmt_iterator *, | 1500 slp_instance, stmt_vector_for_cost *); |
1199 gimple **, tree, int, slp_tree); | 1501 extern bool vectorizable_condition (stmt_vec_info, gimple_stmt_iterator *, |
1200 extern void vect_get_load_cost (struct data_reference *, int, bool, | 1502 stmt_vec_info *, tree, int, slp_tree, |
1503 stmt_vector_for_cost *); | |
1504 extern void vect_get_load_cost (stmt_vec_info, int, bool, | |
1201 unsigned int *, unsigned int *, | 1505 unsigned int *, unsigned int *, |
1202 stmt_vector_for_cost *, | 1506 stmt_vector_for_cost *, |
1203 stmt_vector_for_cost *, bool); | 1507 stmt_vector_for_cost *, bool); |
1204 extern void vect_get_store_cost (struct data_reference *, int, | 1508 extern void vect_get_store_cost (stmt_vec_info, int, |
1205 unsigned int *, stmt_vector_for_cost *); | 1509 unsigned int *, stmt_vector_for_cost *); |
1206 extern bool vect_supportable_shift (enum tree_code, tree); | 1510 extern bool vect_supportable_shift (enum tree_code, tree); |
1207 extern tree vect_gen_perm_mask_any (tree, vec_perm_indices); | 1511 extern tree vect_gen_perm_mask_any (tree, const vec_perm_indices &); |
1208 extern tree vect_gen_perm_mask_checked (tree, vec_perm_indices); | 1512 extern tree vect_gen_perm_mask_checked (tree, const vec_perm_indices &); |
1209 extern void optimize_mask_stores (struct loop*); | 1513 extern void optimize_mask_stores (struct loop*); |
1514 extern gcall *vect_gen_while (tree, tree, tree); | |
1515 extern tree vect_gen_while_not (gimple_seq *, tree, tree, tree); | |
1516 extern opt_result vect_get_vector_types_for_stmt (stmt_vec_info, tree *, | |
1517 tree *); | |
1518 extern opt_tree vect_get_mask_type_for_stmt (stmt_vec_info); | |
1210 | 1519 |
1211 /* In tree-vect-data-refs.c. */ | 1520 /* In tree-vect-data-refs.c. */ |
1212 extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int); | 1521 extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int); |
1213 extern enum dr_alignment_support vect_supportable_dr_alignment | 1522 extern enum dr_alignment_support vect_supportable_dr_alignment |
1214 (struct data_reference *, bool); | 1523 (dr_vec_info *, bool); |
1215 extern tree vect_get_smallest_scalar_type (gimple *, HOST_WIDE_INT *, | 1524 extern tree vect_get_smallest_scalar_type (stmt_vec_info, HOST_WIDE_INT *, |
1216 HOST_WIDE_INT *); | 1525 HOST_WIDE_INT *); |
1217 extern bool vect_analyze_data_ref_dependences (loop_vec_info, int *); | 1526 extern opt_result vect_analyze_data_ref_dependences (loop_vec_info, unsigned int *); |
1218 extern bool vect_slp_analyze_instance_dependence (slp_instance); | 1527 extern bool vect_slp_analyze_instance_dependence (slp_instance); |
1219 extern bool vect_enhance_data_refs_alignment (loop_vec_info); | 1528 extern opt_result vect_enhance_data_refs_alignment (loop_vec_info); |
1220 extern bool vect_analyze_data_refs_alignment (loop_vec_info); | 1529 extern opt_result vect_analyze_data_refs_alignment (loop_vec_info); |
1221 extern bool vect_verify_datarefs_alignment (loop_vec_info); | 1530 extern opt_result vect_verify_datarefs_alignment (loop_vec_info); |
1222 extern bool vect_slp_analyze_and_verify_instance_alignment (slp_instance); | 1531 extern bool vect_slp_analyze_and_verify_instance_alignment (slp_instance); |
1223 extern bool vect_analyze_data_ref_accesses (vec_info *); | 1532 extern opt_result vect_analyze_data_ref_accesses (vec_info *); |
1224 extern bool vect_prune_runtime_alias_test_list (loop_vec_info); | 1533 extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info); |
1225 extern bool vect_check_gather_scatter (gimple *, loop_vec_info, | 1534 extern bool vect_gather_scatter_fn_p (bool, bool, tree, tree, unsigned int, |
1535 signop, int, internal_fn *, tree *); | |
1536 extern bool vect_check_gather_scatter (stmt_vec_info, loop_vec_info, | |
1226 gather_scatter_info *); | 1537 gather_scatter_info *); |
1227 extern bool vect_analyze_data_refs (vec_info *, int *); | 1538 extern opt_result vect_find_stmt_data_reference (loop_p, gimple *, |
1539 vec<data_reference_p> *); | |
1540 extern opt_result vect_analyze_data_refs (vec_info *, poly_uint64 *); | |
1228 extern void vect_record_base_alignments (vec_info *); | 1541 extern void vect_record_base_alignments (vec_info *); |
1229 extern tree vect_create_data_ref_ptr (gimple *, tree, struct loop *, tree, | 1542 extern tree vect_create_data_ref_ptr (stmt_vec_info, tree, struct loop *, tree, |
1230 tree *, gimple_stmt_iterator *, | 1543 tree *, gimple_stmt_iterator *, |
1231 gimple **, bool, bool *, | 1544 gimple **, bool, |
1232 tree = NULL_TREE); | 1545 tree = NULL_TREE, tree = NULL_TREE); |
1233 extern tree bump_vector_ptr (tree, gimple *, gimple_stmt_iterator *, gimple *, | 1546 extern tree bump_vector_ptr (tree, gimple *, gimple_stmt_iterator *, |
1234 tree); | 1547 stmt_vec_info, tree); |
1548 extern void vect_copy_ref_info (tree, tree); | |
1235 extern tree vect_create_destination_var (tree, tree); | 1549 extern tree vect_create_destination_var (tree, tree); |
1236 extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT); | 1550 extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT); |
1237 extern bool vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT); | 1551 extern bool vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT, bool); |
1238 extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT); | 1552 extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT); |
1239 extern bool vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT); | 1553 extern bool vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT, bool); |
1240 extern void vect_permute_store_chain (vec<tree> ,unsigned int, gimple *, | 1554 extern void vect_permute_store_chain (vec<tree> ,unsigned int, stmt_vec_info, |
1241 gimple_stmt_iterator *, vec<tree> *); | 1555 gimple_stmt_iterator *, vec<tree> *); |
1242 extern tree vect_setup_realignment (gimple *, gimple_stmt_iterator *, tree *, | 1556 extern tree vect_setup_realignment (stmt_vec_info, gimple_stmt_iterator *, |
1243 enum dr_alignment_support, tree, | 1557 tree *, enum dr_alignment_support, tree, |
1244 struct loop **); | 1558 struct loop **); |
1245 extern void vect_transform_grouped_load (gimple *, vec<tree> , int, | 1559 extern void vect_transform_grouped_load (stmt_vec_info, vec<tree> , int, |
1246 gimple_stmt_iterator *); | 1560 gimple_stmt_iterator *); |
1247 extern void vect_record_grouped_load_vectors (gimple *, vec<tree> ); | 1561 extern void vect_record_grouped_load_vectors (stmt_vec_info, vec<tree>); |
1248 extern tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *); | 1562 extern tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *); |
1249 extern tree vect_get_new_ssa_name (tree, enum vect_var_kind, | 1563 extern tree vect_get_new_ssa_name (tree, enum vect_var_kind, |
1250 const char * = NULL); | 1564 const char * = NULL); |
1251 extern tree vect_create_addr_base_for_vector_ref (gimple *, gimple_seq *, | 1565 extern tree vect_create_addr_base_for_vector_ref (stmt_vec_info, gimple_seq *, |
1252 tree, tree = NULL_TREE); | 1566 tree, tree = NULL_TREE); |
1253 | 1567 |
1254 /* In tree-vect-loop.c. */ | 1568 /* In tree-vect-loop.c. */ |
1255 /* FORNOW: Used in tree-parloops.c. */ | 1569 /* FORNOW: Used in tree-parloops.c. */ |
1256 extern gimple *vect_force_simple_reduction (loop_vec_info, gimple *, | 1570 extern stmt_vec_info vect_force_simple_reduction (loop_vec_info, stmt_vec_info, |
1257 bool *, bool); | 1571 bool *, bool); |
1572 /* Used in gimple-loop-interchange.c. */ | |
1573 extern bool check_reduction_path (dump_user_location_t, loop_p, gphi *, tree, | |
1574 enum tree_code); | |
1258 /* Drive for loop analysis stage. */ | 1575 /* Drive for loop analysis stage. */ |
1259 extern loop_vec_info vect_analyze_loop (struct loop *, loop_vec_info); | 1576 extern opt_loop_vec_info vect_analyze_loop (struct loop *, |
1577 loop_vec_info, | |
1578 vec_info_shared *); | |
1260 extern tree vect_build_loop_niters (loop_vec_info, bool * = NULL); | 1579 extern tree vect_build_loop_niters (loop_vec_info, bool * = NULL); |
1261 extern void vect_gen_vector_loop_niters (loop_vec_info, tree, tree *, bool); | 1580 extern void vect_gen_vector_loop_niters (loop_vec_info, tree, tree *, |
1581 tree *, bool); | |
1582 extern tree vect_halve_mask_nunits (tree); | |
1583 extern tree vect_double_mask_nunits (tree); | |
1584 extern void vect_record_loop_mask (loop_vec_info, vec_loop_masks *, | |
1585 unsigned int, tree); | |
1586 extern tree vect_get_loop_mask (gimple_stmt_iterator *, vec_loop_masks *, | |
1587 unsigned int, tree, unsigned int); | |
1588 | |
1262 /* Drive for loop transformation stage. */ | 1589 /* Drive for loop transformation stage. */ |
1263 extern struct loop *vect_transform_loop (loop_vec_info); | 1590 extern struct loop *vect_transform_loop (loop_vec_info); |
1264 extern loop_vec_info vect_analyze_loop_form (struct loop *); | 1591 extern opt_loop_vec_info vect_analyze_loop_form (struct loop *, |
1265 extern bool vectorizable_live_operation (gimple *, gimple_stmt_iterator *, | 1592 vec_info_shared *); |
1266 slp_tree, int, gimple **); | 1593 extern bool vectorizable_live_operation (stmt_vec_info, gimple_stmt_iterator *, |
1267 extern bool vectorizable_reduction (gimple *, gimple_stmt_iterator *, | 1594 slp_tree, int, stmt_vec_info *, |
1268 gimple **, slp_tree, slp_instance); | 1595 stmt_vector_for_cost *); |
1269 extern bool vectorizable_induction (gimple *, gimple_stmt_iterator *, | 1596 extern bool vectorizable_reduction (stmt_vec_info, gimple_stmt_iterator *, |
1270 gimple **, slp_tree); | 1597 stmt_vec_info *, slp_tree, slp_instance, |
1271 extern tree get_initial_def_for_reduction (gimple *, tree, tree *); | 1598 stmt_vector_for_cost *); |
1599 extern bool vectorizable_induction (stmt_vec_info, gimple_stmt_iterator *, | |
1600 stmt_vec_info *, slp_tree, | |
1601 stmt_vector_for_cost *); | |
1602 extern tree get_initial_def_for_reduction (stmt_vec_info, tree, tree *); | |
1272 extern bool vect_worthwhile_without_simd_p (vec_info *, tree_code); | 1603 extern bool vect_worthwhile_without_simd_p (vec_info *, tree_code); |
1273 extern int vect_get_known_peeling_cost (loop_vec_info, int, int *, | 1604 extern int vect_get_known_peeling_cost (loop_vec_info, int, int *, |
1274 stmt_vector_for_cost *, | 1605 stmt_vector_for_cost *, |
1275 stmt_vector_for_cost *, | 1606 stmt_vector_for_cost *, |
1276 stmt_vector_for_cost *); | 1607 stmt_vector_for_cost *); |
1608 extern tree cse_and_gimplify_to_preheader (loop_vec_info, tree); | |
1277 | 1609 |
1278 /* In tree-vect-slp.c. */ | 1610 /* In tree-vect-slp.c. */ |
1279 extern void vect_free_slp_instance (slp_instance); | 1611 extern void vect_free_slp_instance (slp_instance, bool); |
1280 extern bool vect_transform_slp_perm_load (slp_tree, vec<tree> , | 1612 extern bool vect_transform_slp_perm_load (slp_tree, vec<tree> , |
1281 gimple_stmt_iterator *, int, | 1613 gimple_stmt_iterator *, poly_uint64, |
1282 slp_instance, bool, unsigned *); | 1614 slp_instance, bool, unsigned *); |
1283 extern bool vect_slp_analyze_operations (vec_info *); | 1615 extern bool vect_slp_analyze_operations (vec_info *); |
1284 extern bool vect_schedule_slp (vec_info *); | 1616 extern void vect_schedule_slp (vec_info *); |
1285 extern bool vect_analyze_slp (vec_info *, unsigned); | 1617 extern opt_result vect_analyze_slp (vec_info *, unsigned); |
1286 extern bool vect_make_slp_decision (loop_vec_info); | 1618 extern bool vect_make_slp_decision (loop_vec_info); |
1287 extern void vect_detect_hybrid_slp (loop_vec_info); | 1619 extern void vect_detect_hybrid_slp (loop_vec_info); |
1288 extern void vect_get_slp_defs (vec<tree> , slp_tree, vec<vec<tree> > *); | 1620 extern void vect_get_slp_defs (vec<tree> , slp_tree, vec<vec<tree> > *); |
1289 extern bool vect_slp_bb (basic_block); | 1621 extern bool vect_slp_bb (basic_block); |
1290 extern gimple *vect_find_last_scalar_stmt_in_slp (slp_tree); | 1622 extern stmt_vec_info vect_find_last_scalar_stmt_in_slp (slp_tree); |
1291 extern bool is_simple_and_all_uses_invariant (gimple *, loop_vec_info); | 1623 extern bool is_simple_and_all_uses_invariant (stmt_vec_info, loop_vec_info); |
1624 extern bool can_duplicate_and_interleave_p (unsigned int, machine_mode, | |
1625 unsigned int * = NULL, | |
1626 tree * = NULL, tree * = NULL); | |
1627 extern void duplicate_and_interleave (gimple_seq *, tree, vec<tree>, | |
1628 unsigned int, vec<tree> &); | |
1629 extern int vect_get_place_in_interleaving_chain (stmt_vec_info, stmt_vec_info); | |
1292 | 1630 |
1293 /* In tree-vect-patterns.c. */ | 1631 /* In tree-vect-patterns.c. */ |
1294 /* Pattern recognition functions. | 1632 /* Pattern recognition functions. |
1295 Additional pattern recognition functions can (and will) be added | 1633 Additional pattern recognition functions can (and will) be added |
1296 in the future. */ | 1634 in the future. */ |
1297 typedef gimple *(* vect_recog_func_ptr) (vec<gimple *> *, tree *, tree *); | |
1298 #define NUM_PATTERNS 14 | |
1299 void vect_pattern_recog (vec_info *); | 1635 void vect_pattern_recog (vec_info *); |
1300 | 1636 |
1301 /* In tree-vectorizer.c. */ | 1637 /* In tree-vectorizer.c. */ |
1302 unsigned vectorize_loops (void); | 1638 unsigned vectorize_loops (void); |
1303 bool vect_stmt_in_region_p (vec_info *, gimple *); | |
1304 void vect_free_loop_info_assumptions (struct loop *); | 1639 void vect_free_loop_info_assumptions (struct loop *); |
1305 | 1640 |
1306 #endif /* GCC_TREE_VECTORIZER_H */ | 1641 #endif /* GCC_TREE_VECTORIZER_H */ |