comparison gcc/tree-vectorizer.h @ 131:84e7813d76e9

gcc-8.2
author mir3636
date Thu, 25 Oct 2018 07:37:49 +0900
parents 04ced10e8804
children 1830386684a0
comparison
equal deleted inserted replaced
111:04ced10e8804 131:84e7813d76e9
1 /* Vectorizer 1 /* Vectorizer
2 Copyright (C) 2003-2017 Free Software Foundation, Inc. 2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com> 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 4
5 This file is part of GCC. 5 This file is part of GCC.
6 6
7 GCC is free software; you can redistribute it and/or modify it under 7 GCC is free software; you can redistribute it and/or modify it under
18 along with GCC; see the file COPYING3. If not see 18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */ 19 <http://www.gnu.org/licenses/>. */
20 20
21 #ifndef GCC_TREE_VECTORIZER_H 21 #ifndef GCC_TREE_VECTORIZER_H
22 #define GCC_TREE_VECTORIZER_H 22 #define GCC_TREE_VECTORIZER_H
23
24 typedef struct _stmt_vec_info *stmt_vec_info;
23 25
24 #include "tree-data-ref.h" 26 #include "tree-data-ref.h"
25 #include "tree-hash-traits.h" 27 #include "tree-hash-traits.h"
26 #include "target.h" 28 #include "target.h"
27 29
65 /* Define type of reduction. */ 67 /* Define type of reduction. */
66 enum vect_reduction_type { 68 enum vect_reduction_type {
67 TREE_CODE_REDUCTION, 69 TREE_CODE_REDUCTION,
68 COND_REDUCTION, 70 COND_REDUCTION,
69 INTEGER_INDUC_COND_REDUCTION, 71 INTEGER_INDUC_COND_REDUCTION,
70 CONST_COND_REDUCTION 72 CONST_COND_REDUCTION,
73
74 /* Retain a scalar phi and use a FOLD_EXTRACT_LAST within the loop
75 to implement:
76
77 for (int i = 0; i < VF; ++i)
78 res = cond[i] ? val[i] : res; */
79 EXTRACT_LAST_REDUCTION,
80
81 /* Use a folding reduction within the loop to implement:
82
83 for (int i = 0; i < VF; ++i)
84 res = res OP val[i];
85
86 (with no reassocation). */
87 FOLD_LEFT_REDUCTION
71 }; 88 };
72 89
73 #define VECTORIZABLE_CYCLE_DEF(D) (((D) == vect_reduction_def) \ 90 #define VECTORIZABLE_CYCLE_DEF(D) (((D) == vect_reduction_def) \
74 || ((D) == vect_double_reduction_def) \ 91 || ((D) == vect_double_reduction_def) \
75 || ((D) == vect_nested_cycle)) 92 || ((D) == vect_nested_cycle))
77 /* Structure to encapsulate information about a group of like 94 /* Structure to encapsulate information about a group of like
78 instructions to be presented to the target cost model. */ 95 instructions to be presented to the target cost model. */
79 struct stmt_info_for_cost { 96 struct stmt_info_for_cost {
80 int count; 97 int count;
81 enum vect_cost_for_stmt kind; 98 enum vect_cost_for_stmt kind;
82 gimple *stmt; 99 enum vect_cost_model_location where;
100 stmt_vec_info stmt_info;
83 int misalign; 101 int misalign;
84 }; 102 };
85 103
86 typedef vec<stmt_info_for_cost> stmt_vector_for_cost; 104 typedef vec<stmt_info_for_cost> stmt_vector_for_cost;
87 105
99 stmts to be packed in a SIMD stmt. */ 117 stmts to be packed in a SIMD stmt. */
100 struct _slp_tree { 118 struct _slp_tree {
101 /* Nodes that contain def-stmts of this node statements operands. */ 119 /* Nodes that contain def-stmts of this node statements operands. */
102 vec<slp_tree> children; 120 vec<slp_tree> children;
103 /* A group of scalar stmts to be vectorized together. */ 121 /* A group of scalar stmts to be vectorized together. */
104 vec<gimple *> stmts; 122 vec<stmt_vec_info> stmts;
105 /* Load permutation relative to the stores, NULL if there is no 123 /* Load permutation relative to the stores, NULL if there is no
106 permutation. */ 124 permutation. */
107 vec<unsigned> load_permutation; 125 vec<unsigned> load_permutation;
108 /* Vectorized stmt/s. */ 126 /* Vectorized stmt/s. */
109 vec<gimple *> vec_stmts; 127 vec<stmt_vec_info> vec_stmts;
110 /* Number of vector stmts that are created to replace the group of scalar 128 /* Number of vector stmts that are created to replace the group of scalar
111 stmts. It is calculated during the transformation phase as the number of 129 stmts. It is calculated during the transformation phase as the number of
112 scalar elements in one scalar iteration (GROUP_SIZE) multiplied by VF 130 scalar elements in one scalar iteration (GROUP_SIZE) multiplied by VF
113 divided by vector size. */ 131 divided by vector size. */
114 unsigned int vec_stmts_size; 132 unsigned int vec_stmts_size;
127 145
128 /* Size of groups of scalar stmts that will be replaced by SIMD stmt/s. */ 146 /* Size of groups of scalar stmts that will be replaced by SIMD stmt/s. */
129 unsigned int group_size; 147 unsigned int group_size;
130 148
131 /* The unrolling factor required to vectorized this SLP instance. */ 149 /* The unrolling factor required to vectorized this SLP instance. */
132 unsigned int unrolling_factor; 150 poly_uint64 unrolling_factor;
133 151
134 /* The group of nodes that contain loads of this SLP instance. */ 152 /* The group of nodes that contain loads of this SLP instance. */
135 vec<slp_tree> loads; 153 vec<slp_tree> loads;
136 154
137 /* The SLP node containing the reduction PHIs. */ 155 /* The SLP node containing the reduction PHIs. */
157 175
158 /* Describes two objects whose addresses must be unequal for the vectorized 176 /* Describes two objects whose addresses must be unequal for the vectorized
159 loop to be valid. */ 177 loop to be valid. */
160 typedef std::pair<tree, tree> vec_object_pair; 178 typedef std::pair<tree, tree> vec_object_pair;
161 179
180 /* Records that vectorization is only possible if abs (EXPR) >= MIN_VALUE.
181 UNSIGNED_P is true if we can assume that abs (EXPR) == EXPR. */
182 struct vec_lower_bound {
183 vec_lower_bound () {}
184 vec_lower_bound (tree e, bool u, poly_uint64 m)
185 : expr (e), unsigned_p (u), min_value (m) {}
186
187 tree expr;
188 bool unsigned_p;
189 poly_uint64 min_value;
190 };
191
192 /* Vectorizer state shared between different analyses like vector sizes
193 of the same CFG region. */
194 struct vec_info_shared {
195 vec_info_shared();
196 ~vec_info_shared();
197
198 void save_datarefs();
199 void check_datarefs();
200
201 /* All data references. Freed by free_data_refs, so not an auto_vec. */
202 vec<data_reference_p> datarefs;
203 vec<data_reference> datarefs_copy;
204
205 /* The loop nest in which the data dependences are computed. */
206 auto_vec<loop_p> loop_nest;
207
208 /* All data dependences. Freed by free_dependence_relations, so not
209 an auto_vec. */
210 vec<ddr_p> ddrs;
211 };
212
162 /* Vectorizer state common between loop and basic-block vectorization. */ 213 /* Vectorizer state common between loop and basic-block vectorization. */
163 struct vec_info { 214 struct vec_info {
164 enum vec_kind { bb, loop }; 215 enum vec_kind { bb, loop };
165 216
166 vec_info (vec_kind, void *); 217 vec_info (vec_kind, void *, vec_info_shared *);
167 ~vec_info (); 218 ~vec_info ();
219
220 stmt_vec_info add_stmt (gimple *);
221 stmt_vec_info lookup_stmt (gimple *);
222 stmt_vec_info lookup_def (tree);
223 stmt_vec_info lookup_single_use (tree);
224 struct dr_vec_info *lookup_dr (data_reference *);
225 void move_dr (stmt_vec_info, stmt_vec_info);
226 void remove_stmt (stmt_vec_info);
227 void replace_stmt (gimple_stmt_iterator *, stmt_vec_info, gimple *);
168 228
169 /* The type of vectorization. */ 229 /* The type of vectorization. */
170 vec_kind kind; 230 vec_kind kind;
171 231
232 /* Shared vectorizer state. */
233 vec_info_shared *shared;
234
235 /* The mapping of GIMPLE UID to stmt_vec_info. */
236 vec<stmt_vec_info> stmt_vec_infos;
237
172 /* All SLP instances. */ 238 /* All SLP instances. */
173 auto_vec<slp_instance> slp_instances; 239 auto_vec<slp_instance> slp_instances;
174
175 /* All data references. Freed by free_data_refs, so not an auto_vec. */
176 vec<data_reference_p> datarefs;
177 240
178 /* Maps base addresses to an innermost_loop_behavior that gives the maximum 241 /* Maps base addresses to an innermost_loop_behavior that gives the maximum
179 known alignment for that base. */ 242 known alignment for that base. */
180 vec_base_alignments base_alignments; 243 vec_base_alignments base_alignments;
181 244
182 /* All data dependences. Freed by free_dependence_relations, so not
183 an auto_vec. */
184 vec<ddr_p> ddrs;
185
186 /* All interleaving chains of stores, represented by the first 245 /* All interleaving chains of stores, represented by the first
187 stmt in the chain. */ 246 stmt in the chain. */
188 auto_vec<gimple *> grouped_stores; 247 auto_vec<stmt_vec_info> grouped_stores;
189 248
190 /* Cost data used by the target cost model. */ 249 /* Cost data used by the target cost model. */
191 void *target_cost_data; 250 void *target_cost_data;
251
252 private:
253 stmt_vec_info new_stmt_vec_info (gimple *stmt);
254 void set_vinfo_for_stmt (gimple *, stmt_vec_info);
255 void free_stmt_vec_infos ();
256 void free_stmt_vec_info (stmt_vec_info);
192 }; 257 };
193 258
194 struct _loop_vec_info; 259 struct _loop_vec_info;
195 struct _bb_vec_info; 260 struct _bb_vec_info;
196 261
209 { 274 {
210 return i->kind == vec_info::bb; 275 return i->kind == vec_info::bb;
211 } 276 }
212 277
213 278
279 /* In general, we can divide the vector statements in a vectorized loop
280 into related groups ("rgroups") and say that for each rgroup there is
281 some nS such that the rgroup operates on nS values from one scalar
282 iteration followed by nS values from the next. That is, if VF is the
283 vectorization factor of the loop, the rgroup operates on a sequence:
284
285 (1,1) (1,2) ... (1,nS) (2,1) ... (2,nS) ... (VF,1) ... (VF,nS)
286
287 where (i,j) represents a scalar value with index j in a scalar
288 iteration with index i.
289
290 [ We use the term "rgroup" to emphasise that this grouping isn't
291 necessarily the same as the grouping of statements used elsewhere.
292 For example, if we implement a group of scalar loads using gather
293 loads, we'll use a separate gather load for each scalar load, and
294 thus each gather load will belong to its own rgroup. ]
295
296 In general this sequence will occupy nV vectors concatenated
297 together. If these vectors have nL lanes each, the total number
298 of scalar values N is given by:
299
300 N = nS * VF = nV * nL
301
302 None of nS, VF, nV and nL are required to be a power of 2. nS and nV
303 are compile-time constants but VF and nL can be variable (if the target
304 supports variable-length vectors).
305
306 In classical vectorization, each iteration of the vector loop would
307 handle exactly VF iterations of the original scalar loop. However,
308 in a fully-masked loop, a particular iteration of the vector loop
309 might handle fewer than VF iterations of the scalar loop. The vector
310 lanes that correspond to iterations of the scalar loop are said to be
311 "active" and the other lanes are said to be "inactive".
312
313 In a fully-masked loop, many rgroups need to be masked to ensure that
314 they have no effect for the inactive lanes. Each such rgroup needs a
315 sequence of booleans in the same order as above, but with each (i,j)
316 replaced by a boolean that indicates whether iteration i is active.
317 This sequence occupies nV vector masks that again have nL lanes each.
318 Thus the mask sequence as a whole consists of VF independent booleans
319 that are each repeated nS times.
320
321 We make the simplifying assumption that if a sequence of nV masks is
322 suitable for one (nS,nL) pair, we can reuse it for (nS/2,nL/2) by
323 VIEW_CONVERTing it. This holds for all current targets that support
324 fully-masked loops. For example, suppose the scalar loop is:
325
326 float *f;
327 double *d;
328 for (int i = 0; i < n; ++i)
329 {
330 f[i * 2 + 0] += 1.0f;
331 f[i * 2 + 1] += 2.0f;
332 d[i] += 3.0;
333 }
334
335 and suppose that vectors have 256 bits. The vectorized f accesses
336 will belong to one rgroup and the vectorized d access to another:
337
338 f rgroup: nS = 2, nV = 1, nL = 8
339 d rgroup: nS = 1, nV = 1, nL = 4
340 VF = 4
341
342 [ In this simple example the rgroups do correspond to the normal
343 SLP grouping scheme. ]
344
345 If only the first three lanes are active, the masks we need are:
346
347 f rgroup: 1 1 | 1 1 | 1 1 | 0 0
348 d rgroup: 1 | 1 | 1 | 0
349
350 Here we can use a mask calculated for f's rgroup for d's, but not
351 vice versa.
352
353 Thus for each value of nV, it is enough to provide nV masks, with the
354 mask being calculated based on the highest nL (or, equivalently, based
355 on the highest nS) required by any rgroup with that nV. We therefore
356 represent the entire collection of masks as a two-level table, with the
357 first level being indexed by nV - 1 (since nV == 0 doesn't exist) and
358 the second being indexed by the mask index 0 <= i < nV. */
359
360 /* The masks needed by rgroups with nV vectors, according to the
361 description above. */
362 struct rgroup_masks {
363 /* The largest nS for all rgroups that use these masks. */
364 unsigned int max_nscalars_per_iter;
365
366 /* The type of mask to use, based on the highest nS recorded above. */
367 tree mask_type;
368
369 /* A vector of nV masks, in iteration order. */
370 vec<tree> masks;
371 };
372
373 typedef auto_vec<rgroup_masks> vec_loop_masks;
374
214 /*-----------------------------------------------------------------*/ 375 /*-----------------------------------------------------------------*/
215 /* Info on vectorized loops. */ 376 /* Info on vectorized loops. */
216 /*-----------------------------------------------------------------*/ 377 /*-----------------------------------------------------------------*/
217 typedef struct _loop_vec_info : public vec_info { 378 typedef struct _loop_vec_info : public vec_info {
218 _loop_vec_info (struct loop *); 379 _loop_vec_info (struct loop *, vec_info_shared *);
219 ~_loop_vec_info (); 380 ~_loop_vec_info ();
220 381
221 /* The loop to which this info struct refers to. */ 382 /* The loop to which this info struct refers to. */
222 struct loop *loop; 383 struct loop *loop;
223 384
236 /* Threshold of number of iterations below which vectorzation will not be 397 /* Threshold of number of iterations below which vectorzation will not be
237 performed. It is calculated from MIN_PROFITABLE_ITERS and 398 performed. It is calculated from MIN_PROFITABLE_ITERS and
238 PARAM_MIN_VECT_LOOP_BOUND. */ 399 PARAM_MIN_VECT_LOOP_BOUND. */
239 unsigned int th; 400 unsigned int th;
240 401
402 /* When applying loop versioning, the vector form should only be used
403 if the number of scalar iterations is >= this value, on top of all
404 the other requirements. Ignored when loop versioning is not being
405 used. */
406 poly_uint64 versioning_threshold;
407
241 /* Unrolling factor */ 408 /* Unrolling factor */
242 int vectorization_factor; 409 poly_uint64 vectorization_factor;
243 410
244 /* Maximum runtime vectorization factor, or MAX_VECTORIZATION_FACTOR 411 /* Maximum runtime vectorization factor, or MAX_VECTORIZATION_FACTOR
245 if there is no particular limit. */ 412 if there is no particular limit. */
246 unsigned HOST_WIDE_INT max_vectorization_factor; 413 unsigned HOST_WIDE_INT max_vectorization_factor;
247 414
415 /* The masks that a fully-masked loop should use to avoid operating
416 on inactive scalars. */
417 vec_loop_masks masks;
418
419 /* If we are using a loop mask to align memory addresses, this variable
420 contains the number of vector elements that we should skip in the
421 first iteration of the vector loop (i.e. the number of leading
422 elements that should be false in the first mask). */
423 tree mask_skip_niters;
424
425 /* Type of the variables to use in the WHILE_ULT call for fully-masked
426 loops. */
427 tree mask_compare_type;
428
248 /* Unknown DRs according to which loop was peeled. */ 429 /* Unknown DRs according to which loop was peeled. */
249 struct data_reference *unaligned_dr; 430 struct dr_vec_info *unaligned_dr;
250 431
251 /* peeling_for_alignment indicates whether peeling for alignment will take 432 /* peeling_for_alignment indicates whether peeling for alignment will take
252 place, and what the peeling factor should be: 433 place, and what the peeling factor should be:
253 peeling_for_alignment = X means: 434 peeling_for_alignment = X means:
254 If X=0: Peeling for alignment will not be applied. 435 If X=0: Peeling for alignment will not be applied.
259 int peeling_for_alignment; 440 int peeling_for_alignment;
260 441
261 /* The mask used to check the alignment of pointers or arrays. */ 442 /* The mask used to check the alignment of pointers or arrays. */
262 int ptr_mask; 443 int ptr_mask;
263 444
264 /* The loop nest in which the data dependences are computed. */
265 auto_vec<loop_p> loop_nest;
266
267 /* Data Dependence Relations defining address ranges that are candidates 445 /* Data Dependence Relations defining address ranges that are candidates
268 for a run-time aliasing check. */ 446 for a run-time aliasing check. */
269 auto_vec<ddr_p> may_alias_ddrs; 447 auto_vec<ddr_p> may_alias_ddrs;
270 448
271 /* Data Dependence Relations defining address ranges together with segment 449 /* Data Dependence Relations defining address ranges together with segment
273 auto_vec<dr_with_seg_len_pair_t> comp_alias_ddrs; 451 auto_vec<dr_with_seg_len_pair_t> comp_alias_ddrs;
274 452
275 /* Check that the addresses of each pair of objects is unequal. */ 453 /* Check that the addresses of each pair of objects is unequal. */
276 auto_vec<vec_object_pair> check_unequal_addrs; 454 auto_vec<vec_object_pair> check_unequal_addrs;
277 455
456 /* List of values that are required to be nonzero. This is used to check
457 whether things like "x[i * n] += 1;" are safe and eventually gets added
458 to the checks for lower bounds below. */
459 auto_vec<tree> check_nonzero;
460
461 /* List of values that need to be checked for a minimum value. */
462 auto_vec<vec_lower_bound> lower_bounds;
463
278 /* Statements in the loop that have data references that are candidates for a 464 /* Statements in the loop that have data references that are candidates for a
279 runtime (loop versioning) misalignment check. */ 465 runtime (loop versioning) misalignment check. */
280 auto_vec<gimple *> may_misalign_stmts; 466 auto_vec<stmt_vec_info> may_misalign_stmts;
281 467
282 /* Reduction cycles detected in the loop. Used in loop-aware SLP. */ 468 /* Reduction cycles detected in the loop. Used in loop-aware SLP. */
283 auto_vec<gimple *> reductions; 469 auto_vec<stmt_vec_info> reductions;
284 470
285 /* All reduction chains in the loop, represented by the first 471 /* All reduction chains in the loop, represented by the first
286 stmt in the chain. */ 472 stmt in the chain. */
287 auto_vec<gimple *> reduction_chains; 473 auto_vec<stmt_vec_info> reduction_chains;
288 474
289 /* Cost vector for a single scalar iteration. */ 475 /* Cost vector for a single scalar iteration. */
290 auto_vec<stmt_info_for_cost> scalar_cost_vec; 476 auto_vec<stmt_info_for_cost> scalar_cost_vec;
291 477
478 /* Map of IV base/step expressions to inserted name in the preheader. */
479 hash_map<tree_operand_hash, tree> *ivexpr_map;
480
292 /* The unrolling factor needed to SLP the loop. In case of that pure SLP is 481 /* The unrolling factor needed to SLP the loop. In case of that pure SLP is
293 applied to the loop, i.e., no unrolling is needed, this is 1. */ 482 applied to the loop, i.e., no unrolling is needed, this is 1. */
294 unsigned slp_unrolling_factor; 483 poly_uint64 slp_unrolling_factor;
295 484
296 /* Cost of a single scalar iteration. */ 485 /* Cost of a single scalar iteration. */
297 int single_scalar_iteration_cost; 486 int single_scalar_iteration_cost;
298 487
299 /* Is the loop vectorizable? */ 488 /* Is the loop vectorizable? */
300 bool vectorizable; 489 bool vectorizable;
490
491 /* Records whether we still have the option of using a fully-masked loop. */
492 bool can_fully_mask_p;
493
494 /* True if have decided to use a fully-masked loop. */
495 bool fully_masked_p;
301 496
302 /* When we have grouped data accesses with gaps, we may introduce invalid 497 /* When we have grouped data accesses with gaps, we may introduce invalid
303 memory accesses. We peel the last iteration of the loop to prevent 498 memory accesses. We peel the last iteration of the loop to prevent
304 this. */ 499 this. */
305 bool peeling_for_gaps; 500 bool peeling_for_gaps;
355 prologue peeling retain total unchanged scalar loop iterations for 550 prologue peeling retain total unchanged scalar loop iterations for
356 cost model. */ 551 cost model. */
357 #define LOOP_VINFO_NITERS_UNCHANGED(L) (L)->num_iters_unchanged 552 #define LOOP_VINFO_NITERS_UNCHANGED(L) (L)->num_iters_unchanged
358 #define LOOP_VINFO_NITERS_ASSUMPTIONS(L) (L)->num_iters_assumptions 553 #define LOOP_VINFO_NITERS_ASSUMPTIONS(L) (L)->num_iters_assumptions
359 #define LOOP_VINFO_COST_MODEL_THRESHOLD(L) (L)->th 554 #define LOOP_VINFO_COST_MODEL_THRESHOLD(L) (L)->th
555 #define LOOP_VINFO_VERSIONING_THRESHOLD(L) (L)->versioning_threshold
360 #define LOOP_VINFO_VECTORIZABLE_P(L) (L)->vectorizable 556 #define LOOP_VINFO_VECTORIZABLE_P(L) (L)->vectorizable
557 #define LOOP_VINFO_CAN_FULLY_MASK_P(L) (L)->can_fully_mask_p
558 #define LOOP_VINFO_FULLY_MASKED_P(L) (L)->fully_masked_p
361 #define LOOP_VINFO_VECT_FACTOR(L) (L)->vectorization_factor 559 #define LOOP_VINFO_VECT_FACTOR(L) (L)->vectorization_factor
362 #define LOOP_VINFO_MAX_VECT_FACTOR(L) (L)->max_vectorization_factor 560 #define LOOP_VINFO_MAX_VECT_FACTOR(L) (L)->max_vectorization_factor
561 #define LOOP_VINFO_MASKS(L) (L)->masks
562 #define LOOP_VINFO_MASK_SKIP_NITERS(L) (L)->mask_skip_niters
563 #define LOOP_VINFO_MASK_COMPARE_TYPE(L) (L)->mask_compare_type
363 #define LOOP_VINFO_PTR_MASK(L) (L)->ptr_mask 564 #define LOOP_VINFO_PTR_MASK(L) (L)->ptr_mask
364 #define LOOP_VINFO_LOOP_NEST(L) (L)->loop_nest 565 #define LOOP_VINFO_LOOP_NEST(L) (L)->shared->loop_nest
365 #define LOOP_VINFO_DATAREFS(L) (L)->datarefs 566 #define LOOP_VINFO_DATAREFS(L) (L)->shared->datarefs
366 #define LOOP_VINFO_DDRS(L) (L)->ddrs 567 #define LOOP_VINFO_DDRS(L) (L)->shared->ddrs
367 #define LOOP_VINFO_INT_NITERS(L) (TREE_INT_CST_LOW ((L)->num_iters)) 568 #define LOOP_VINFO_INT_NITERS(L) (TREE_INT_CST_LOW ((L)->num_iters))
368 #define LOOP_VINFO_PEELING_FOR_ALIGNMENT(L) (L)->peeling_for_alignment 569 #define LOOP_VINFO_PEELING_FOR_ALIGNMENT(L) (L)->peeling_for_alignment
369 #define LOOP_VINFO_UNALIGNED_DR(L) (L)->unaligned_dr 570 #define LOOP_VINFO_UNALIGNED_DR(L) (L)->unaligned_dr
370 #define LOOP_VINFO_MAY_MISALIGN_STMTS(L) (L)->may_misalign_stmts 571 #define LOOP_VINFO_MAY_MISALIGN_STMTS(L) (L)->may_misalign_stmts
371 #define LOOP_VINFO_MAY_ALIAS_DDRS(L) (L)->may_alias_ddrs 572 #define LOOP_VINFO_MAY_ALIAS_DDRS(L) (L)->may_alias_ddrs
372 #define LOOP_VINFO_COMP_ALIAS_DDRS(L) (L)->comp_alias_ddrs 573 #define LOOP_VINFO_COMP_ALIAS_DDRS(L) (L)->comp_alias_ddrs
373 #define LOOP_VINFO_CHECK_UNEQUAL_ADDRS(L) (L)->check_unequal_addrs 574 #define LOOP_VINFO_CHECK_UNEQUAL_ADDRS(L) (L)->check_unequal_addrs
575 #define LOOP_VINFO_CHECK_NONZERO(L) (L)->check_nonzero
576 #define LOOP_VINFO_LOWER_BOUNDS(L) (L)->lower_bounds
374 #define LOOP_VINFO_GROUPED_STORES(L) (L)->grouped_stores 577 #define LOOP_VINFO_GROUPED_STORES(L) (L)->grouped_stores
375 #define LOOP_VINFO_SLP_INSTANCES(L) (L)->slp_instances 578 #define LOOP_VINFO_SLP_INSTANCES(L) (L)->slp_instances
376 #define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor 579 #define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor
377 #define LOOP_VINFO_REDUCTIONS(L) (L)->reductions 580 #define LOOP_VINFO_REDUCTIONS(L) (L)->reductions
378 #define LOOP_VINFO_REDUCTION_CHAINS(L) (L)->reduction_chains 581 #define LOOP_VINFO_REDUCTION_CHAINS(L) (L)->reduction_chains
389 592
390 #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \ 593 #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \
391 ((L)->may_misalign_stmts.length () > 0) 594 ((L)->may_misalign_stmts.length () > 0)
392 #define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L) \ 595 #define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L) \
393 ((L)->comp_alias_ddrs.length () > 0 \ 596 ((L)->comp_alias_ddrs.length () > 0 \
394 || (L)->check_unequal_addrs.length () > 0) 597 || (L)->check_unequal_addrs.length () > 0 \
598 || (L)->lower_bounds.length () > 0)
395 #define LOOP_REQUIRES_VERSIONING_FOR_NITERS(L) \ 599 #define LOOP_REQUIRES_VERSIONING_FOR_NITERS(L) \
396 (LOOP_VINFO_NITERS_ASSUMPTIONS (L)) 600 (LOOP_VINFO_NITERS_ASSUMPTIONS (L))
397 #define LOOP_REQUIRES_VERSIONING(L) \ 601 #define LOOP_REQUIRES_VERSIONING(L) \
398 (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (L) \ 602 (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (L) \
399 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (L) \ 603 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (L) \
406 (LOOP_VINFO_ORIG_LOOP_INFO (L) != NULL) 610 (LOOP_VINFO_ORIG_LOOP_INFO (L) != NULL)
407 611
408 #define LOOP_VINFO_ORIG_MAX_VECT_FACTOR(L) \ 612 #define LOOP_VINFO_ORIG_MAX_VECT_FACTOR(L) \
409 (LOOP_VINFO_MAX_VECT_FACTOR (LOOP_VINFO_ORIG_LOOP_INFO (L))) 613 (LOOP_VINFO_MAX_VECT_FACTOR (LOOP_VINFO_ORIG_LOOP_INFO (L)))
410 614
615 /* Wrapper for loop_vec_info, for tracking success/failure, where a non-NULL
616 value signifies success, and a NULL value signifies failure, supporting
617 propagating an opt_problem * describing the failure back up the call
618 stack. */
619 typedef opt_pointer_wrapper <loop_vec_info> opt_loop_vec_info;
620
411 static inline loop_vec_info 621 static inline loop_vec_info
412 loop_vec_info_for_loop (struct loop *loop) 622 loop_vec_info_for_loop (struct loop *loop)
413 { 623 {
414 return (loop_vec_info) loop->aux; 624 return (loop_vec_info) loop->aux;
415 } 625 }
416 626
417 static inline bool
418 nested_in_vect_loop_p (struct loop *loop, gimple *stmt)
419 {
420 return (loop->inner
421 && (loop->inner == (gimple_bb (stmt))->loop_father));
422 }
423
424 typedef struct _bb_vec_info : public vec_info 627 typedef struct _bb_vec_info : public vec_info
425 { 628 {
426 _bb_vec_info (gimple_stmt_iterator, gimple_stmt_iterator); 629 _bb_vec_info (gimple_stmt_iterator, gimple_stmt_iterator, vec_info_shared *);
427 ~_bb_vec_info (); 630 ~_bb_vec_info ();
428 631
429 basic_block bb; 632 basic_block bb;
430 gimple_stmt_iterator region_begin; 633 gimple_stmt_iterator region_begin;
431 gimple_stmt_iterator region_end; 634 gimple_stmt_iterator region_end;
432 } *bb_vec_info; 635 } *bb_vec_info;
433 636
434 #define BB_VINFO_BB(B) (B)->bb 637 #define BB_VINFO_BB(B) (B)->bb
435 #define BB_VINFO_GROUPED_STORES(B) (B)->grouped_stores 638 #define BB_VINFO_GROUPED_STORES(B) (B)->grouped_stores
436 #define BB_VINFO_SLP_INSTANCES(B) (B)->slp_instances 639 #define BB_VINFO_SLP_INSTANCES(B) (B)->slp_instances
437 #define BB_VINFO_DATAREFS(B) (B)->datarefs 640 #define BB_VINFO_DATAREFS(B) (B)->shared->datarefs
438 #define BB_VINFO_DDRS(B) (B)->ddrs 641 #define BB_VINFO_DDRS(B) (B)->shared->ddrs
439 #define BB_VINFO_TARGET_COST_DATA(B) (B)->target_cost_data 642 #define BB_VINFO_TARGET_COST_DATA(B) (B)->target_cost_data
440 643
441 static inline bb_vec_info 644 static inline bb_vec_info
442 vec_info_for_bb (basic_block bb) 645 vec_info_for_bb (basic_block bb)
443 { 646 {
511 loop_vect = 0, 714 loop_vect = 0,
512 pure_slp, 715 pure_slp,
513 hybrid 716 hybrid
514 }; 717 };
515 718
719 /* Says whether a statement is a load, a store of a vectorized statement
720 result, or a store of an invariant value. */
721 enum vec_load_store_type {
722 VLS_LOAD,
723 VLS_STORE,
724 VLS_STORE_INVARIANT
725 };
726
516 /* Describes how we're going to vectorize an individual load or store, 727 /* Describes how we're going to vectorize an individual load or store,
517 or a group of loads or stores. */ 728 or a group of loads or stores. */
518 enum vect_memory_access_type { 729 enum vect_memory_access_type {
519 /* An access to an invariant address. This is used only for loads. */ 730 /* An access to an invariant address. This is used only for loads. */
520 VMAT_INVARIANT, 731 VMAT_INVARIANT,
551 762
552 /* The access uses gather loads or scatter stores. */ 763 /* The access uses gather loads or scatter stores. */
553 VMAT_GATHER_SCATTER 764 VMAT_GATHER_SCATTER
554 }; 765 };
555 766
767 struct dr_vec_info {
768 /* The data reference itself. */
769 data_reference *dr;
770 /* The statement that contains the data reference. */
771 stmt_vec_info stmt;
772 /* The misalignment in bytes of the reference, or -1 if not known. */
773 int misalignment;
774 /* The byte alignment that we'd ideally like the reference to have,
775 and the value that misalignment is measured against. */
776 int target_alignment;
777 /* If true the alignment of base_decl needs to be increased. */
778 bool base_misaligned;
779 tree base_decl;
780 };
781
556 typedef struct data_reference *dr_p; 782 typedef struct data_reference *dr_p;
557 783
558 typedef struct _stmt_vec_info { 784 struct _stmt_vec_info {
559 785
560 enum stmt_vec_info_type type; 786 enum stmt_vec_info_type type;
561 787
562 /* Indicates whether this stmts is part of a computation whose result is 788 /* Indicates whether this stmts is part of a computation whose result is
563 used outside the loop. */ 789 used outside the loop. */
564 bool live; 790 bool live;
565 791
566 /* Stmt is part of some pattern (computation idiom) */ 792 /* Stmt is part of some pattern (computation idiom) */
567 bool in_pattern_p; 793 bool in_pattern_p;
568 794
795 /* True if the statement was created during pattern recognition as
796 part of the replacement for RELATED_STMT. This implies that the
797 statement isn't part of any basic block, although for convenience
798 its gimple_bb is the same as for RELATED_STMT. */
799 bool pattern_stmt_p;
800
569 /* Is this statement vectorizable or should it be skipped in (partial) 801 /* Is this statement vectorizable or should it be skipped in (partial)
570 vectorization. */ 802 vectorization. */
571 bool vectorizable; 803 bool vectorizable;
572 804
573 /* The stmt to which this info struct refers to. */ 805 /* The stmt to which this info struct refers to. */
578 810
579 /* The vector type to be used for the LHS of this statement. */ 811 /* The vector type to be used for the LHS of this statement. */
580 tree vectype; 812 tree vectype;
581 813
582 /* The vectorized version of the stmt. */ 814 /* The vectorized version of the stmt. */
583 gimple *vectorized_stmt; 815 stmt_vec_info vectorized_stmt;
584 816
585 817
586 /* The following is relevant only for stmts that contain a non-scalar 818 /* The following is relevant only for stmts that contain a non-scalar
587 data-ref (array/pointer/struct access). A GIMPLE stmt is expected to have 819 data-ref (array/pointer/struct access). A GIMPLE stmt is expected to have
588 at most one such data-ref. */ 820 at most one such data-ref. */
589 821
590 /* Information about the data-ref (access function, etc), 822 dr_vec_info dr_aux;
591 relative to the inner-most containing loop. */
592 struct data_reference *data_ref_info;
593 823
594 /* Information about the data-ref relative to this loop 824 /* Information about the data-ref relative to this loop
595 nest (the loop that is being considered for vectorization). */ 825 nest (the loop that is being considered for vectorization). */
596 innermost_loop_behavior dr_wrt_vec_loop; 826 innermost_loop_behavior dr_wrt_vec_loop;
597 827
611 true): S is the "pattern stmt" that represents (and replaces) the 841 true): S is the "pattern stmt" that represents (and replaces) the
612 sequence of stmts that constitutes the pattern. Similarly, the 842 sequence of stmts that constitutes the pattern. Similarly, the
613 related_stmt of the "pattern stmt" points back to this stmt (which is 843 related_stmt of the "pattern stmt" points back to this stmt (which is
614 the last stmt in the original sequence of stmts that constitutes the 844 the last stmt in the original sequence of stmts that constitutes the
615 pattern). */ 845 pattern). */
616 gimple *related_stmt; 846 stmt_vec_info related_stmt;
617 847
618 /* Used to keep a sequence of def stmts of a pattern stmt if such exists. */ 848 /* Used to keep a sequence of def stmts of a pattern stmt if such exists.
849 The sequence is attached to the original statement rather than the
850 pattern statement. */
619 gimple_seq pattern_def_seq; 851 gimple_seq pattern_def_seq;
620 852
621 /* List of datarefs that are known to have the same alignment as the dataref 853 /* List of datarefs that are known to have the same alignment as the dataref
622 of this stmt. */ 854 of this stmt. */
623 vec<dr_p> same_align_refs; 855 vec<dr_p> same_align_refs;
633 /* Whether the stmt is SLPed, loop-based vectorized, or both. */ 865 /* Whether the stmt is SLPed, loop-based vectorized, or both. */
634 enum slp_vect_type slp_type; 866 enum slp_vect_type slp_type;
635 867
636 /* Interleaving and reduction chains info. */ 868 /* Interleaving and reduction chains info. */
637 /* First element in the group. */ 869 /* First element in the group. */
638 gimple *first_element; 870 stmt_vec_info first_element;
639 /* Pointer to the next element in the group. */ 871 /* Pointer to the next element in the group. */
640 gimple *next_element; 872 stmt_vec_info next_element;
641 /* For data-refs, in case that two or more stmts share data-ref, this is the 873 /* For data-refs, in case that two or more stmts share data-ref, this is the
642 pointer to the previously detected stmt with the same dr. */ 874 pointer to the previously detected stmt with the same dr. */
643 gimple *same_dr_stmt; 875 stmt_vec_info same_dr_stmt;
644 /* The size of the group. */ 876 /* The size of the group. */
645 unsigned int size; 877 unsigned int size;
646 /* For stores, number of stores from this group seen. We vectorize the last 878 /* For stores, number of stores from this group seen. We vectorize the last
647 one. */ 879 one. */
648 unsigned int store_count; 880 unsigned int store_count;
683 enum vect_reduction_type reduc_type; 915 enum vect_reduction_type reduc_type;
684 916
685 /* On a reduction PHI the def returned by vect_force_simple_reduction. 917 /* On a reduction PHI the def returned by vect_force_simple_reduction.
686 On the def returned by vect_force_simple_reduction the 918 On the def returned by vect_force_simple_reduction the
687 corresponding PHI. */ 919 corresponding PHI. */
688 gimple *reduc_def; 920 stmt_vec_info reduc_def;
689 921
690 /* The number of scalar stmt references from active SLP instances. */ 922 /* The number of scalar stmt references from active SLP instances. */
691 unsigned int num_slp_uses; 923 unsigned int num_slp_uses;
692 } *stmt_vec_info; 924
925 /* If nonzero, the lhs of the statement could be truncated to this
926 many bits without affecting any users of the result. */
927 unsigned int min_output_precision;
928
929 /* If nonzero, all non-boolean input operands have the same precision,
930 and they could each be truncated to this many bits without changing
931 the result. */
932 unsigned int min_input_precision;
933
934 /* If OPERATION_BITS is nonzero, the statement could be performed on
935 an integer with the sign and number of bits given by OPERATION_SIGN
936 and OPERATION_BITS without changing the result. */
937 unsigned int operation_precision;
938 signop operation_sign;
939 };
693 940
694 /* Information about a gather/scatter call. */ 941 /* Information about a gather/scatter call. */
695 struct gather_scatter_info { 942 struct gather_scatter_info {
696 /* The FUNCTION_DECL for the built-in gather/scatter function. */ 943 /* The internal function to use for the gather/scatter operation,
944 or IFN_LAST if a built-in function should be used instead. */
945 internal_fn ifn;
946
947 /* The FUNCTION_DECL for the built-in gather/scatter function,
948 or null if an internal function should be used instead. */
697 tree decl; 949 tree decl;
698 950
699 /* The loop-invariant base value. */ 951 /* The loop-invariant base value. */
700 tree base; 952 tree base;
701 953
709 /* The definition type for the vectorized offset. */ 961 /* The definition type for the vectorized offset. */
710 enum vect_def_type offset_dt; 962 enum vect_def_type offset_dt;
711 963
712 /* The type of the vectorized offset. */ 964 /* The type of the vectorized offset. */
713 tree offset_vectype; 965 tree offset_vectype;
966
967 /* The type of the scalar elements after loading or before storing. */
968 tree element_type;
969
970 /* The type of the scalar elements being loaded or stored. */
971 tree memory_type;
714 }; 972 };
715 973
716 /* Access Functions. */ 974 /* Access Functions. */
717 #define STMT_VINFO_TYPE(S) (S)->type 975 #define STMT_VINFO_TYPE(S) (S)->type
718 #define STMT_VINFO_STMT(S) (S)->stmt 976 #define STMT_VINFO_STMT(S) (S)->stmt
733 #define STMT_VINFO_RELEVANT(S) (S)->relevant 991 #define STMT_VINFO_RELEVANT(S) (S)->relevant
734 #define STMT_VINFO_LIVE_P(S) (S)->live 992 #define STMT_VINFO_LIVE_P(S) (S)->live
735 #define STMT_VINFO_VECTYPE(S) (S)->vectype 993 #define STMT_VINFO_VECTYPE(S) (S)->vectype
736 #define STMT_VINFO_VEC_STMT(S) (S)->vectorized_stmt 994 #define STMT_VINFO_VEC_STMT(S) (S)->vectorized_stmt
737 #define STMT_VINFO_VECTORIZABLE(S) (S)->vectorizable 995 #define STMT_VINFO_VECTORIZABLE(S) (S)->vectorizable
738 #define STMT_VINFO_DATA_REF(S) (S)->data_ref_info 996 #define STMT_VINFO_DATA_REF(S) ((S)->dr_aux.dr + 0)
739 #define STMT_VINFO_GATHER_SCATTER_P(S) (S)->gather_scatter_p 997 #define STMT_VINFO_GATHER_SCATTER_P(S) (S)->gather_scatter_p
740 #define STMT_VINFO_STRIDED_P(S) (S)->strided_p 998 #define STMT_VINFO_STRIDED_P(S) (S)->strided_p
741 #define STMT_VINFO_MEMORY_ACCESS_TYPE(S) (S)->memory_access_type 999 #define STMT_VINFO_MEMORY_ACCESS_TYPE(S) (S)->memory_access_type
742 #define STMT_VINFO_SIMD_LANE_ACCESS_P(S) (S)->simd_lane_access_p 1000 #define STMT_VINFO_SIMD_LANE_ACCESS_P(S) (S)->simd_lane_access_p
743 #define STMT_VINFO_VEC_REDUCTION_TYPE(S) (S)->v_reduc_type 1001 #define STMT_VINFO_VEC_REDUCTION_TYPE(S) (S)->v_reduc_type
754 #define STMT_VINFO_DR_OFFSET_ALIGNMENT(S) \ 1012 #define STMT_VINFO_DR_OFFSET_ALIGNMENT(S) \
755 (S)->dr_wrt_vec_loop.offset_alignment 1013 (S)->dr_wrt_vec_loop.offset_alignment
756 #define STMT_VINFO_DR_STEP_ALIGNMENT(S) \ 1014 #define STMT_VINFO_DR_STEP_ALIGNMENT(S) \
757 (S)->dr_wrt_vec_loop.step_alignment 1015 (S)->dr_wrt_vec_loop.step_alignment
758 1016
1017 #define STMT_VINFO_DR_INFO(S) \
1018 (gcc_checking_assert ((S)->dr_aux.stmt == (S)), &(S)->dr_aux)
1019
759 #define STMT_VINFO_IN_PATTERN_P(S) (S)->in_pattern_p 1020 #define STMT_VINFO_IN_PATTERN_P(S) (S)->in_pattern_p
760 #define STMT_VINFO_RELATED_STMT(S) (S)->related_stmt 1021 #define STMT_VINFO_RELATED_STMT(S) (S)->related_stmt
761 #define STMT_VINFO_PATTERN_DEF_SEQ(S) (S)->pattern_def_seq 1022 #define STMT_VINFO_PATTERN_DEF_SEQ(S) (S)->pattern_def_seq
762 #define STMT_VINFO_SAME_ALIGN_REFS(S) (S)->same_align_refs 1023 #define STMT_VINFO_SAME_ALIGN_REFS(S) (S)->same_align_refs
763 #define STMT_VINFO_SIMD_CLONE_INFO(S) (S)->simd_clone_info 1024 #define STMT_VINFO_SIMD_CLONE_INFO(S) (S)->simd_clone_info
764 #define STMT_VINFO_DEF_TYPE(S) (S)->def_type 1025 #define STMT_VINFO_DEF_TYPE(S) (S)->def_type
765 #define STMT_VINFO_GROUP_FIRST_ELEMENT(S) (S)->first_element 1026 #define STMT_VINFO_GROUPED_ACCESS(S) \
766 #define STMT_VINFO_GROUP_NEXT_ELEMENT(S) (S)->next_element 1027 ((S)->dr_aux.dr && DR_GROUP_FIRST_ELEMENT(S))
767 #define STMT_VINFO_GROUP_SIZE(S) (S)->size
768 #define STMT_VINFO_GROUP_STORE_COUNT(S) (S)->store_count
769 #define STMT_VINFO_GROUP_GAP(S) (S)->gap
770 #define STMT_VINFO_GROUP_SAME_DR_STMT(S) (S)->same_dr_stmt
771 #define STMT_VINFO_GROUPED_ACCESS(S) ((S)->first_element != NULL && (S)->data_ref_info)
772 #define STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED(S) (S)->loop_phi_evolution_base_unchanged 1028 #define STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED(S) (S)->loop_phi_evolution_base_unchanged
773 #define STMT_VINFO_LOOP_PHI_EVOLUTION_PART(S) (S)->loop_phi_evolution_part 1029 #define STMT_VINFO_LOOP_PHI_EVOLUTION_PART(S) (S)->loop_phi_evolution_part
774 #define STMT_VINFO_MIN_NEG_DIST(S) (S)->min_neg_dist 1030 #define STMT_VINFO_MIN_NEG_DIST(S) (S)->min_neg_dist
775 #define STMT_VINFO_NUM_SLP_USES(S) (S)->num_slp_uses 1031 #define STMT_VINFO_NUM_SLP_USES(S) (S)->num_slp_uses
776 #define STMT_VINFO_REDUC_TYPE(S) (S)->reduc_type 1032 #define STMT_VINFO_REDUC_TYPE(S) (S)->reduc_type
777 #define STMT_VINFO_REDUC_DEF(S) (S)->reduc_def 1033 #define STMT_VINFO_REDUC_DEF(S) (S)->reduc_def
778 1034
779 #define GROUP_FIRST_ELEMENT(S) (S)->first_element 1035 #define DR_GROUP_FIRST_ELEMENT(S) \
780 #define GROUP_NEXT_ELEMENT(S) (S)->next_element 1036 (gcc_checking_assert ((S)->dr_aux.dr), (S)->first_element)
781 #define GROUP_SIZE(S) (S)->size 1037 #define DR_GROUP_NEXT_ELEMENT(S) \
782 #define GROUP_STORE_COUNT(S) (S)->store_count 1038 (gcc_checking_assert ((S)->dr_aux.dr), (S)->next_element)
783 #define GROUP_GAP(S) (S)->gap 1039 #define DR_GROUP_SIZE(S) \
784 #define GROUP_SAME_DR_STMT(S) (S)->same_dr_stmt 1040 (gcc_checking_assert ((S)->dr_aux.dr), (S)->size)
1041 #define DR_GROUP_STORE_COUNT(S) \
1042 (gcc_checking_assert ((S)->dr_aux.dr), (S)->store_count)
1043 #define DR_GROUP_GAP(S) \
1044 (gcc_checking_assert ((S)->dr_aux.dr), (S)->gap)
1045 #define DR_GROUP_SAME_DR_STMT(S) \
1046 (gcc_checking_assert ((S)->dr_aux.dr), (S)->same_dr_stmt)
1047
1048 #define REDUC_GROUP_FIRST_ELEMENT(S) \
1049 (gcc_checking_assert (!(S)->dr_aux.dr), (S)->first_element)
1050 #define REDUC_GROUP_NEXT_ELEMENT(S) \
1051 (gcc_checking_assert (!(S)->dr_aux.dr), (S)->next_element)
1052 #define REDUC_GROUP_SIZE(S) \
1053 (gcc_checking_assert (!(S)->dr_aux.dr), (S)->size)
785 1054
786 #define STMT_VINFO_RELEVANT_P(S) ((S)->relevant != vect_unused_in_scope) 1055 #define STMT_VINFO_RELEVANT_P(S) ((S)->relevant != vect_unused_in_scope)
787 1056
788 #define HYBRID_SLP_STMT(S) ((S)->slp_type == hybrid) 1057 #define HYBRID_SLP_STMT(S) ((S)->slp_type == hybrid)
789 #define PURE_SLP_STMT(S) ((S)->slp_type == pure_slp) 1058 #define PURE_SLP_STMT(S) ((S)->slp_type == pure_slp)
790 #define STMT_SLP_TYPE(S) (S)->slp_type 1059 #define STMT_SLP_TYPE(S) (S)->slp_type
791 1060
792 struct dataref_aux {
793 /* The misalignment in bytes of the reference, or -1 if not known. */
794 int misalignment;
795 /* The byte alignment that we'd ideally like the reference to have,
796 and the value that misalignment is measured against. */
797 int target_alignment;
798 /* If true the alignment of base_decl needs to be increased. */
799 bool base_misaligned;
800 tree base_decl;
801 };
802
803 #define DR_VECT_AUX(dr) ((dataref_aux *)(dr)->aux)
804
805 #define VECT_MAX_COST 1000 1061 #define VECT_MAX_COST 1000
806 1062
807 /* The maximum number of intermediate steps required in multi-step type 1063 /* The maximum number of intermediate steps required in multi-step type
808 conversion. */ 1064 conversion. */
809 #define MAX_INTERM_CVT_STEPS 3 1065 #define MAX_INTERM_CVT_STEPS 3
810 1066
811 /* The maximum vectorization factor supported by any target (V64QI). */ 1067 #define MAX_VECTORIZATION_FACTOR INT_MAX
812 #define MAX_VECTORIZATION_FACTOR 64
813 1068
814 /* Nonzero if TYPE represents a (scalar) boolean type or type 1069 /* Nonzero if TYPE represents a (scalar) boolean type or type
815 in the middle-end compatible with it (unsigned precision 1 integral 1070 in the middle-end compatible with it (unsigned precision 1 integral
816 types). Used to determine which types should be vectorized as 1071 types). Used to determine which types should be vectorized as
817 VECTOR_BOOLEAN_TYPE_P. */ 1072 VECTOR_BOOLEAN_TYPE_P. */
821 || ((TREE_CODE (TYPE) == INTEGER_TYPE \ 1076 || ((TREE_CODE (TYPE) == INTEGER_TYPE \
822 || TREE_CODE (TYPE) == ENUMERAL_TYPE) \ 1077 || TREE_CODE (TYPE) == ENUMERAL_TYPE) \
823 && TYPE_PRECISION (TYPE) == 1 \ 1078 && TYPE_PRECISION (TYPE) == 1 \
824 && TYPE_UNSIGNED (TYPE))) 1079 && TYPE_UNSIGNED (TYPE)))
825 1080
826 extern vec<stmt_vec_info> stmt_vec_info_vec; 1081 static inline bool
827 1082 nested_in_vect_loop_p (struct loop *loop, stmt_vec_info stmt_info)
828 void init_stmt_vec_info_vec (void); 1083 {
829 void free_stmt_vec_info_vec (void); 1084 return (loop->inner
830 1085 && (loop->inner == (gimple_bb (stmt_info->stmt))->loop_father));
831 /* Return a stmt_vec_info corresponding to STMT. */ 1086 }
1087
1088 /* Return the earlier statement between STMT1_INFO and STMT2_INFO. */
832 1089
833 static inline stmt_vec_info 1090 static inline stmt_vec_info
834 vinfo_for_stmt (gimple *stmt) 1091 get_earlier_stmt (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info)
835 { 1092 {
836 int uid = gimple_uid (stmt); 1093 gcc_checking_assert ((STMT_VINFO_IN_PATTERN_P (stmt1_info)
837 if (uid <= 0) 1094 || !STMT_VINFO_RELATED_STMT (stmt1_info))
838 return NULL; 1095 && (STMT_VINFO_IN_PATTERN_P (stmt2_info)
839 1096 || !STMT_VINFO_RELATED_STMT (stmt2_info)));
840 return stmt_vec_info_vec[uid - 1]; 1097
841 } 1098 if (gimple_uid (stmt1_info->stmt) < gimple_uid (stmt2_info->stmt))
842 1099 return stmt1_info;
843 /* Set vectorizer information INFO for STMT. */
844
845 static inline void
846 set_vinfo_for_stmt (gimple *stmt, stmt_vec_info info)
847 {
848 unsigned int uid = gimple_uid (stmt);
849 if (uid == 0)
850 {
851 gcc_checking_assert (info);
852 uid = stmt_vec_info_vec.length () + 1;
853 gimple_set_uid (stmt, uid);
854 stmt_vec_info_vec.safe_push (info);
855 }
856 else 1100 else
857 { 1101 return stmt2_info;
858 gcc_checking_assert (info == NULL); 1102 }
859 stmt_vec_info_vec[uid - 1] = info; 1103
860 } 1104 /* Return the later statement between STMT1_INFO and STMT2_INFO. */
861 } 1105
862 1106 static inline stmt_vec_info
863 /* Return the earlier statement between STMT1 and STMT2. */ 1107 get_later_stmt (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info)
864 1108 {
865 static inline gimple * 1109 gcc_checking_assert ((STMT_VINFO_IN_PATTERN_P (stmt1_info)
866 get_earlier_stmt (gimple *stmt1, gimple *stmt2) 1110 || !STMT_VINFO_RELATED_STMT (stmt1_info))
867 { 1111 && (STMT_VINFO_IN_PATTERN_P (stmt2_info)
868 unsigned int uid1, uid2; 1112 || !STMT_VINFO_RELATED_STMT (stmt2_info)));
869 1113
870 if (stmt1 == NULL) 1114 if (gimple_uid (stmt1_info->stmt) > gimple_uid (stmt2_info->stmt))
871 return stmt2; 1115 return stmt1_info;
872
873 if (stmt2 == NULL)
874 return stmt1;
875
876 uid1 = gimple_uid (stmt1);
877 uid2 = gimple_uid (stmt2);
878
879 if (uid1 == 0 || uid2 == 0)
880 return NULL;
881
882 gcc_checking_assert (uid1 <= stmt_vec_info_vec.length ()
883 && uid2 <= stmt_vec_info_vec.length ());
884
885 if (uid1 < uid2)
886 return stmt1;
887 else 1116 else
888 return stmt2; 1117 return stmt2_info;
889 }
890
891 /* Return the later statement between STMT1 and STMT2. */
892
893 static inline gimple *
894 get_later_stmt (gimple *stmt1, gimple *stmt2)
895 {
896 unsigned int uid1, uid2;
897
898 if (stmt1 == NULL)
899 return stmt2;
900
901 if (stmt2 == NULL)
902 return stmt1;
903
904 uid1 = gimple_uid (stmt1);
905 uid2 = gimple_uid (stmt2);
906
907 if (uid1 == 0 || uid2 == 0)
908 return NULL;
909
910 gcc_assert (uid1 <= stmt_vec_info_vec.length ());
911 gcc_assert (uid2 <= stmt_vec_info_vec.length ());
912
913 if (uid1 > uid2)
914 return stmt1;
915 else
916 return stmt2;
917 } 1118 }
918 1119
919 /* Return TRUE if a statement represented by STMT_INFO is a part of a 1120 /* Return TRUE if a statement represented by STMT_INFO is a part of a
920 pattern. */ 1121 pattern. */
921 1122
922 static inline bool 1123 static inline bool
923 is_pattern_stmt_p (stmt_vec_info stmt_info) 1124 is_pattern_stmt_p (stmt_vec_info stmt_info)
924 { 1125 {
925 gimple *related_stmt; 1126 return stmt_info->pattern_stmt_p;
926 stmt_vec_info related_stmt_info; 1127 }
927 1128
928 related_stmt = STMT_VINFO_RELATED_STMT (stmt_info); 1129 /* If STMT_INFO is a pattern statement, return the statement that it
929 if (related_stmt 1130 replaces, otherwise return STMT_INFO itself. */
930 && (related_stmt_info = vinfo_for_stmt (related_stmt)) 1131
931 && STMT_VINFO_IN_PATTERN_P (related_stmt_info)) 1132 inline stmt_vec_info
932 return true; 1133 vect_orig_stmt (stmt_vec_info stmt_info)
933 1134 {
934 return false; 1135 if (is_pattern_stmt_p (stmt_info))
1136 return STMT_VINFO_RELATED_STMT (stmt_info);
1137 return stmt_info;
1138 }
1139
1140 /* If STMT_INFO has been replaced by a pattern statement, return the
1141 replacement statement, otherwise return STMT_INFO itself. */
1142
1143 inline stmt_vec_info
1144 vect_stmt_to_vectorize (stmt_vec_info stmt_info)
1145 {
1146 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
1147 return STMT_VINFO_RELATED_STMT (stmt_info);
1148 return stmt_info;
935 } 1149 }
936 1150
937 /* Return true if BB is a loop header. */ 1151 /* Return true if BB is a loop header. */
938 1152
939 static inline bool 1153 static inline bool
982 init_cost (struct loop *loop_info) 1196 init_cost (struct loop *loop_info)
983 { 1197 {
984 return targetm.vectorize.init_cost (loop_info); 1198 return targetm.vectorize.init_cost (loop_info);
985 } 1199 }
986 1200
1201 extern void dump_stmt_cost (FILE *, void *, int, enum vect_cost_for_stmt,
1202 stmt_vec_info, int, unsigned,
1203 enum vect_cost_model_location);
1204
987 /* Alias targetm.vectorize.add_stmt_cost. */ 1205 /* Alias targetm.vectorize.add_stmt_cost. */
988 1206
989 static inline unsigned 1207 static inline unsigned
990 add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind, 1208 add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
991 stmt_vec_info stmt_info, int misalign, 1209 stmt_vec_info stmt_info, int misalign,
992 enum vect_cost_model_location where) 1210 enum vect_cost_model_location where)
993 { 1211 {
994 return targetm.vectorize.add_stmt_cost (data, count, kind, 1212 unsigned cost = targetm.vectorize.add_stmt_cost (data, count, kind,
995 stmt_info, misalign, where); 1213 stmt_info, misalign, where);
1214 if (dump_file && (dump_flags & TDF_DETAILS))
1215 dump_stmt_cost (dump_file, data, count, kind, stmt_info, misalign,
1216 cost, where);
1217 return cost;
996 } 1218 }
997 1219
998 /* Alias targetm.vectorize.finish_cost. */ 1220 /* Alias targetm.vectorize.finish_cost. */
999 1221
1000 static inline void 1222 static inline void
1010 destroy_cost_data (void *data) 1232 destroy_cost_data (void *data)
1011 { 1233 {
1012 targetm.vectorize.destroy_cost_data (data); 1234 targetm.vectorize.destroy_cost_data (data);
1013 } 1235 }
1014 1236
1237 inline void
1238 add_stmt_costs (void *data, stmt_vector_for_cost *cost_vec)
1239 {
1240 stmt_info_for_cost *cost;
1241 unsigned i;
1242 FOR_EACH_VEC_ELT (*cost_vec, i, cost)
1243 add_stmt_cost (data, cost->count, cost->kind, cost->stmt_info,
1244 cost->misalign, cost->where);
1245 }
1246
1015 /*-----------------------------------------------------------------*/ 1247 /*-----------------------------------------------------------------*/
1016 /* Info on data references alignment. */ 1248 /* Info on data references alignment. */
1017 /*-----------------------------------------------------------------*/ 1249 /*-----------------------------------------------------------------*/
1250 #define DR_MISALIGNMENT_UNKNOWN (-1)
1251 #define DR_MISALIGNMENT_UNINITIALIZED (-2)
1252
1018 inline void 1253 inline void
1019 set_dr_misalignment (struct data_reference *dr, int val) 1254 set_dr_misalignment (dr_vec_info *dr_info, int val)
1020 { 1255 {
1021 dataref_aux *data_aux = DR_VECT_AUX (dr); 1256 dr_info->misalignment = val;
1022
1023 if (!data_aux)
1024 {
1025 data_aux = XCNEW (dataref_aux);
1026 dr->aux = data_aux;
1027 }
1028
1029 data_aux->misalignment = val;
1030 } 1257 }
1031 1258
1032 inline int 1259 inline int
1033 dr_misalignment (struct data_reference *dr) 1260 dr_misalignment (dr_vec_info *dr_info)
1034 { 1261 {
1035 return DR_VECT_AUX (dr)->misalignment; 1262 int misalign = dr_info->misalignment;
1263 gcc_assert (misalign != DR_MISALIGNMENT_UNINITIALIZED);
1264 return misalign;
1036 } 1265 }
1037 1266
1038 /* Reflects actual alignment of first access in the vectorized loop, 1267 /* Reflects actual alignment of first access in the vectorized loop,
1039 taking into account peeling/versioning if applied. */ 1268 taking into account peeling/versioning if applied. */
1040 #define DR_MISALIGNMENT(DR) dr_misalignment (DR) 1269 #define DR_MISALIGNMENT(DR) dr_misalignment (DR)
1041 #define SET_DR_MISALIGNMENT(DR, VAL) set_dr_misalignment (DR, VAL) 1270 #define SET_DR_MISALIGNMENT(DR, VAL) set_dr_misalignment (DR, VAL)
1042 #define DR_MISALIGNMENT_UNKNOWN (-1)
1043 1271
1044 /* Only defined once DR_MISALIGNMENT is defined. */ 1272 /* Only defined once DR_MISALIGNMENT is defined. */
1045 #define DR_TARGET_ALIGNMENT(DR) DR_VECT_AUX (DR)->target_alignment 1273 #define DR_TARGET_ALIGNMENT(DR) ((DR)->target_alignment)
1046 1274
1047 /* Return true if data access DR is aligned to its target alignment 1275 /* Return true if data access DR_INFO is aligned to its target alignment
1048 (which may be less than a full vector). */ 1276 (which may be less than a full vector). */
1049 1277
1050 static inline bool 1278 static inline bool
1051 aligned_access_p (struct data_reference *data_ref_info) 1279 aligned_access_p (dr_vec_info *dr_info)
1052 { 1280 {
1053 return (DR_MISALIGNMENT (data_ref_info) == 0); 1281 return (DR_MISALIGNMENT (dr_info) == 0);
1054 } 1282 }
1055 1283
1056 /* Return TRUE if the alignment of the data access is known, and FALSE 1284 /* Return TRUE if the alignment of the data access is known, and FALSE
1057 otherwise. */ 1285 otherwise. */
1058 1286
1059 static inline bool 1287 static inline bool
1060 known_alignment_for_access_p (struct data_reference *data_ref_info) 1288 known_alignment_for_access_p (dr_vec_info *dr_info)
1061 { 1289 {
1062 return (DR_MISALIGNMENT (data_ref_info) != DR_MISALIGNMENT_UNKNOWN); 1290 return (DR_MISALIGNMENT (dr_info) != DR_MISALIGNMENT_UNKNOWN);
1063 } 1291 }
1064 1292
1065 /* Return the minimum alignment in bytes that the vectorized version 1293 /* Return the minimum alignment in bytes that the vectorized version
1066 of DR is guaranteed to have. */ 1294 of DR_INFO is guaranteed to have. */
1067 1295
1068 static inline unsigned int 1296 static inline unsigned int
1069 vect_known_alignment_in_bytes (struct data_reference *dr) 1297 vect_known_alignment_in_bytes (dr_vec_info *dr_info)
1070 { 1298 {
1071 if (DR_MISALIGNMENT (dr) == DR_MISALIGNMENT_UNKNOWN) 1299 if (DR_MISALIGNMENT (dr_info) == DR_MISALIGNMENT_UNKNOWN)
1072 return TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr))); 1300 return TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_info->dr)));
1073 if (DR_MISALIGNMENT (dr) == 0) 1301 if (DR_MISALIGNMENT (dr_info) == 0)
1074 return DR_TARGET_ALIGNMENT (dr); 1302 return DR_TARGET_ALIGNMENT (dr_info);
1075 return DR_MISALIGNMENT (dr) & -DR_MISALIGNMENT (dr); 1303 return DR_MISALIGNMENT (dr_info) & -DR_MISALIGNMENT (dr_info);
1076 } 1304 }
1077 1305
1078 /* Return the behavior of DR with respect to the vectorization context 1306 /* Return the behavior of DR_INFO with respect to the vectorization context
1079 (which for outer loop vectorization might not be the behavior recorded 1307 (which for outer loop vectorization might not be the behavior recorded
1080 in DR itself). */ 1308 in DR_INFO itself). */
1081 1309
1082 static inline innermost_loop_behavior * 1310 static inline innermost_loop_behavior *
1083 vect_dr_behavior (data_reference *dr) 1311 vect_dr_behavior (dr_vec_info *dr_info)
1084 { 1312 {
1085 gimple *stmt = DR_STMT (dr); 1313 stmt_vec_info stmt_info = dr_info->stmt;
1086 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
1087 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 1314 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
1088 if (loop_vinfo == NULL 1315 if (loop_vinfo == NULL
1089 || !nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), stmt)) 1316 || !nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), stmt_info))
1090 return &DR_INNERMOST (dr); 1317 return &DR_INNERMOST (dr_info->dr);
1091 else 1318 else
1092 return &STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info); 1319 return &STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info);
1093 } 1320 }
1094 1321
1095 /* Return true if the vect cost model is unlimited. */ 1322 /* Return true if the vect cost model is unlimited. */
1100 && flag_simd_cost_model != VECT_COST_MODEL_DEFAULT) 1327 && flag_simd_cost_model != VECT_COST_MODEL_DEFAULT)
1101 return flag_simd_cost_model == VECT_COST_MODEL_UNLIMITED; 1328 return flag_simd_cost_model == VECT_COST_MODEL_UNLIMITED;
1102 return (flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED); 1329 return (flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED);
1103 } 1330 }
1104 1331
1332 /* Return true if the loop described by LOOP_VINFO is fully-masked and
1333 if the first iteration should use a partial mask in order to achieve
1334 alignment. */
1335
1336 static inline bool
1337 vect_use_loop_mask_for_alignment_p (loop_vec_info loop_vinfo)
1338 {
1339 return (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
1340 && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo));
1341 }
1342
1343 /* Return the number of vectors of type VECTYPE that are needed to get
1344 NUNITS elements. NUNITS should be based on the vectorization factor,
1345 so it is always a known multiple of the number of elements in VECTYPE. */
1346
1347 static inline unsigned int
1348 vect_get_num_vectors (poly_uint64 nunits, tree vectype)
1349 {
1350 return exact_div (nunits, TYPE_VECTOR_SUBPARTS (vectype)).to_constant ();
1351 }
1352
1105 /* Return the number of copies needed for loop vectorization when 1353 /* Return the number of copies needed for loop vectorization when
1106 a statement operates on vectors of type VECTYPE. This is the 1354 a statement operates on vectors of type VECTYPE. This is the
1107 vectorization factor divided by the number of elements in 1355 vectorization factor divided by the number of elements in
1108 VECTYPE and is always known at compile time. */ 1356 VECTYPE and is always known at compile time. */
1109 1357
1110 static inline unsigned int 1358 static inline unsigned int
1111 vect_get_num_copies (loop_vec_info loop_vinfo, tree vectype) 1359 vect_get_num_copies (loop_vec_info loop_vinfo, tree vectype)
1112 { 1360 {
1113 gcc_checking_assert (LOOP_VINFO_VECT_FACTOR (loop_vinfo) 1361 return vect_get_num_vectors (LOOP_VINFO_VECT_FACTOR (loop_vinfo), vectype);
1114 % TYPE_VECTOR_SUBPARTS (vectype) == 0); 1362 }
1115 return (LOOP_VINFO_VECT_FACTOR (loop_vinfo) 1363
1116 / TYPE_VECTOR_SUBPARTS (vectype)); 1364 /* Update maximum unit count *MAX_NUNITS so that it accounts for
1117 } 1365 the number of units in vector type VECTYPE. *MAX_NUNITS can be 1
1118 1366 if we haven't yet recorded any vector types. */
1119 /* Return the size of the value accessed by unvectorized data reference DR. 1367
1120 This is only valid once STMT_VINFO_VECTYPE has been calculated for the 1368 static inline void
1121 associated gimple statement, since that guarantees that DR accesses 1369 vect_update_max_nunits (poly_uint64 *max_nunits, tree vectype)
1122 either a scalar or a scalar equivalent. ("Scalar equivalent" here 1370 {
1123 includes things like V1SI, which can be vectorized in the same way 1371 /* All unit counts have the form current_vector_size * X for some
1372 rational X, so two unit sizes must have a common multiple.
1373 Everything is a multiple of the initial value of 1. */
1374 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
1375 *max_nunits = force_common_multiple (*max_nunits, nunits);
1376 }
1377
1378 /* Return the vectorization factor that should be used for costing
1379 purposes while vectorizing the loop described by LOOP_VINFO.
1380 Pick a reasonable estimate if the vectorization factor isn't
1381 known at compile time. */
1382
1383 static inline unsigned int
1384 vect_vf_for_cost (loop_vec_info loop_vinfo)
1385 {
1386 return estimated_poly_value (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
1387 }
1388
1389 /* Estimate the number of elements in VEC_TYPE for costing purposes.
1390 Pick a reasonable estimate if the exact number isn't known at
1391 compile time. */
1392
1393 static inline unsigned int
1394 vect_nunits_for_cost (tree vec_type)
1395 {
1396 return estimated_poly_value (TYPE_VECTOR_SUBPARTS (vec_type));
1397 }
1398
1399 /* Return the maximum possible vectorization factor for LOOP_VINFO. */
1400
1401 static inline unsigned HOST_WIDE_INT
1402 vect_max_vf (loop_vec_info loop_vinfo)
1403 {
1404 unsigned HOST_WIDE_INT vf;
1405 if (LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
1406 return vf;
1407 return MAX_VECTORIZATION_FACTOR;
1408 }
1409
1410 /* Return the size of the value accessed by unvectorized data reference
1411 DR_INFO. This is only valid once STMT_VINFO_VECTYPE has been calculated
1412 for the associated gimple statement, since that guarantees that DR_INFO
1413 accesses either a scalar or a scalar equivalent. ("Scalar equivalent"
1414 here includes things like V1SI, which can be vectorized in the same way
1124 as a plain SI.) */ 1415 as a plain SI.) */
1125 1416
1126 inline unsigned int 1417 inline unsigned int
1127 vect_get_scalar_dr_size (struct data_reference *dr) 1418 vect_get_scalar_dr_size (dr_vec_info *dr_info)
1128 { 1419 {
1129 return tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)))); 1420 return tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_info->dr))));
1130 } 1421 }
1131 1422
1132 /* Source location */ 1423 /* Source location + hotness information. */
1133 extern source_location vect_location; 1424 extern dump_user_location_t vect_location;
1425
1426 /* A macro for calling:
1427 dump_begin_scope (MSG, vect_location);
1428 via an RAII object, thus printing "=== MSG ===\n" to the dumpfile etc,
1429 and then calling
1430 dump_end_scope ();
1431 once the object goes out of scope, thus capturing the nesting of
1432 the scopes.
1433
1434 These scopes affect dump messages within them: dump messages at the
1435 top level implicitly default to MSG_PRIORITY_USER_FACING, whereas those
1436 in a nested scope implicitly default to MSG_PRIORITY_INTERNALS. */
1437
1438 #define DUMP_VECT_SCOPE(MSG) \
1439 AUTO_DUMP_SCOPE (MSG, vect_location)
1134 1440
1135 /*-----------------------------------------------------------------*/ 1441 /*-----------------------------------------------------------------*/
1136 /* Function prototypes. */ 1442 /* Function prototypes. */
1137 /*-----------------------------------------------------------------*/ 1443 /*-----------------------------------------------------------------*/
1138 1444
1139 /* Simple loop peeling and versioning utilities for vectorizer's purposes - 1445 /* Simple loop peeling and versioning utilities for vectorizer's purposes -
1140 in tree-vect-loop-manip.c. */ 1446 in tree-vect-loop-manip.c. */
1141 extern void slpeel_make_loop_iterate_ntimes (struct loop *, tree); 1447 extern void vect_set_loop_condition (struct loop *, loop_vec_info,
1448 tree, tree, tree, bool);
1142 extern bool slpeel_can_duplicate_loop_p (const struct loop *, const_edge); 1449 extern bool slpeel_can_duplicate_loop_p (const struct loop *, const_edge);
1143 struct loop *slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *, 1450 struct loop *slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *,
1144 struct loop *, edge); 1451 struct loop *, edge);
1145 extern void vect_loop_versioning (loop_vec_info, unsigned int, bool); 1452 extern void vect_loop_versioning (loop_vec_info, unsigned int, bool,
1453 poly_uint64);
1146 extern struct loop *vect_do_peeling (loop_vec_info, tree, tree, 1454 extern struct loop *vect_do_peeling (loop_vec_info, tree, tree,
1147 tree *, int, bool, bool); 1455 tree *, tree *, tree *, int, bool, bool);
1148 extern source_location find_loop_location (struct loop *); 1456 extern void vect_prepare_for_masked_peels (loop_vec_info);
1457 extern dump_user_location_t find_loop_location (struct loop *);
1149 extern bool vect_can_advance_ivs_p (loop_vec_info); 1458 extern bool vect_can_advance_ivs_p (loop_vec_info);
1150 1459
1151 /* In tree-vect-stmts.c. */ 1460 /* In tree-vect-stmts.c. */
1152 extern unsigned int current_vector_size; 1461 extern poly_uint64 current_vector_size;
1153 extern tree get_vectype_for_scalar_type (tree); 1462 extern tree get_vectype_for_scalar_type (tree);
1463 extern tree get_vectype_for_scalar_type_and_size (tree, poly_uint64);
1154 extern tree get_mask_type_for_scalar_type (tree); 1464 extern tree get_mask_type_for_scalar_type (tree);
1155 extern tree get_same_sized_vectype (tree, tree); 1465 extern tree get_same_sized_vectype (tree, tree);
1156 extern bool vect_is_simple_use (tree, vec_info *, gimple **, 1466 extern bool vect_get_loop_mask_type (loop_vec_info);
1157 enum vect_def_type *); 1467 extern bool vect_is_simple_use (tree, vec_info *, enum vect_def_type *,
1158 extern bool vect_is_simple_use (tree, vec_info *, gimple **, 1468 stmt_vec_info * = NULL, gimple ** = NULL);
1159 enum vect_def_type *, tree *); 1469 extern bool vect_is_simple_use (tree, vec_info *, enum vect_def_type *,
1160 extern bool supportable_widening_operation (enum tree_code, gimple *, tree, 1470 tree *, stmt_vec_info * = NULL,
1161 tree, enum tree_code *, 1471 gimple ** = NULL);
1472 extern bool supportable_widening_operation (enum tree_code, stmt_vec_info,
1473 tree, tree, enum tree_code *,
1162 enum tree_code *, int *, 1474 enum tree_code *, int *,
1163 vec<tree> *); 1475 vec<tree> *);
1164 extern bool supportable_narrowing_operation (enum tree_code, tree, tree, 1476 extern bool supportable_narrowing_operation (enum tree_code, tree, tree,
1165 enum tree_code *, 1477 enum tree_code *,
1166 int *, vec<tree> *); 1478 int *, vec<tree> *);
1167 extern stmt_vec_info new_stmt_vec_info (gimple *stmt, vec_info *);
1168 extern void free_stmt_vec_info (gimple *stmt);
1169 extern void vect_model_simple_cost (stmt_vec_info, int, enum vect_def_type *,
1170 int, stmt_vector_for_cost *,
1171 stmt_vector_for_cost *);
1172 extern void vect_model_store_cost (stmt_vec_info, int, vect_memory_access_type,
1173 enum vect_def_type, slp_tree,
1174 stmt_vector_for_cost *,
1175 stmt_vector_for_cost *);
1176 extern void vect_model_load_cost (stmt_vec_info, int, vect_memory_access_type,
1177 slp_tree, stmt_vector_for_cost *,
1178 stmt_vector_for_cost *);
1179 extern unsigned record_stmt_cost (stmt_vector_for_cost *, int, 1479 extern unsigned record_stmt_cost (stmt_vector_for_cost *, int,
1180 enum vect_cost_for_stmt, stmt_vec_info, 1480 enum vect_cost_for_stmt, stmt_vec_info,
1181 int, enum vect_cost_model_location); 1481 int, enum vect_cost_model_location);
1182 extern void vect_finish_stmt_generation (gimple *, gimple *, 1482 extern stmt_vec_info vect_finish_replace_stmt (stmt_vec_info, gimple *);
1183 gimple_stmt_iterator *); 1483 extern stmt_vec_info vect_finish_stmt_generation (stmt_vec_info, gimple *,
1184 extern bool vect_mark_stmts_to_be_vectorized (loop_vec_info); 1484 gimple_stmt_iterator *);
1185 extern tree vect_get_vec_def_for_operand_1 (gimple *, enum vect_def_type); 1485 extern opt_result vect_mark_stmts_to_be_vectorized (loop_vec_info);
1186 extern tree vect_get_vec_def_for_operand (tree, gimple *, tree = NULL); 1486 extern tree vect_get_store_rhs (stmt_vec_info);
1187 extern void vect_get_vec_defs (tree, tree, gimple *, vec<tree> *, 1487 extern tree vect_get_vec_def_for_operand_1 (stmt_vec_info, enum vect_def_type);
1488 extern tree vect_get_vec_def_for_operand (tree, stmt_vec_info, tree = NULL);
1489 extern void vect_get_vec_defs (tree, tree, stmt_vec_info, vec<tree> *,
1188 vec<tree> *, slp_tree); 1490 vec<tree> *, slp_tree);
1189 extern void vect_get_vec_defs_for_stmt_copy (enum vect_def_type *, 1491 extern void vect_get_vec_defs_for_stmt_copy (vec_info *,
1190 vec<tree> *, vec<tree> *); 1492 vec<tree> *, vec<tree> *);
1191 extern tree vect_init_vector (gimple *, tree, tree, 1493 extern tree vect_init_vector (stmt_vec_info, tree, tree,
1192 gimple_stmt_iterator *); 1494 gimple_stmt_iterator *);
1193 extern tree vect_get_vec_def_for_stmt_copy (enum vect_def_type, tree); 1495 extern tree vect_get_vec_def_for_stmt_copy (vec_info *, tree);
1194 extern bool vect_transform_stmt (gimple *, gimple_stmt_iterator *, 1496 extern bool vect_transform_stmt (stmt_vec_info, gimple_stmt_iterator *,
1195 bool *, slp_tree, slp_instance); 1497 slp_tree, slp_instance);
1196 extern void vect_remove_stores (gimple *); 1498 extern void vect_remove_stores (stmt_vec_info);
1197 extern bool vect_analyze_stmt (gimple *, bool *, slp_tree, slp_instance); 1499 extern opt_result vect_analyze_stmt (stmt_vec_info, bool *, slp_tree,
1198 extern bool vectorizable_condition (gimple *, gimple_stmt_iterator *, 1500 slp_instance, stmt_vector_for_cost *);
1199 gimple **, tree, int, slp_tree); 1501 extern bool vectorizable_condition (stmt_vec_info, gimple_stmt_iterator *,
1200 extern void vect_get_load_cost (struct data_reference *, int, bool, 1502 stmt_vec_info *, tree, int, slp_tree,
1503 stmt_vector_for_cost *);
1504 extern void vect_get_load_cost (stmt_vec_info, int, bool,
1201 unsigned int *, unsigned int *, 1505 unsigned int *, unsigned int *,
1202 stmt_vector_for_cost *, 1506 stmt_vector_for_cost *,
1203 stmt_vector_for_cost *, bool); 1507 stmt_vector_for_cost *, bool);
1204 extern void vect_get_store_cost (struct data_reference *, int, 1508 extern void vect_get_store_cost (stmt_vec_info, int,
1205 unsigned int *, stmt_vector_for_cost *); 1509 unsigned int *, stmt_vector_for_cost *);
1206 extern bool vect_supportable_shift (enum tree_code, tree); 1510 extern bool vect_supportable_shift (enum tree_code, tree);
1207 extern tree vect_gen_perm_mask_any (tree, vec_perm_indices); 1511 extern tree vect_gen_perm_mask_any (tree, const vec_perm_indices &);
1208 extern tree vect_gen_perm_mask_checked (tree, vec_perm_indices); 1512 extern tree vect_gen_perm_mask_checked (tree, const vec_perm_indices &);
1209 extern void optimize_mask_stores (struct loop*); 1513 extern void optimize_mask_stores (struct loop*);
1514 extern gcall *vect_gen_while (tree, tree, tree);
1515 extern tree vect_gen_while_not (gimple_seq *, tree, tree, tree);
1516 extern opt_result vect_get_vector_types_for_stmt (stmt_vec_info, tree *,
1517 tree *);
1518 extern opt_tree vect_get_mask_type_for_stmt (stmt_vec_info);
1210 1519
1211 /* In tree-vect-data-refs.c. */ 1520 /* In tree-vect-data-refs.c. */
1212 extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int); 1521 extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int);
1213 extern enum dr_alignment_support vect_supportable_dr_alignment 1522 extern enum dr_alignment_support vect_supportable_dr_alignment
1214 (struct data_reference *, bool); 1523 (dr_vec_info *, bool);
1215 extern tree vect_get_smallest_scalar_type (gimple *, HOST_WIDE_INT *, 1524 extern tree vect_get_smallest_scalar_type (stmt_vec_info, HOST_WIDE_INT *,
1216 HOST_WIDE_INT *); 1525 HOST_WIDE_INT *);
1217 extern bool vect_analyze_data_ref_dependences (loop_vec_info, int *); 1526 extern opt_result vect_analyze_data_ref_dependences (loop_vec_info, unsigned int *);
1218 extern bool vect_slp_analyze_instance_dependence (slp_instance); 1527 extern bool vect_slp_analyze_instance_dependence (slp_instance);
1219 extern bool vect_enhance_data_refs_alignment (loop_vec_info); 1528 extern opt_result vect_enhance_data_refs_alignment (loop_vec_info);
1220 extern bool vect_analyze_data_refs_alignment (loop_vec_info); 1529 extern opt_result vect_analyze_data_refs_alignment (loop_vec_info);
1221 extern bool vect_verify_datarefs_alignment (loop_vec_info); 1530 extern opt_result vect_verify_datarefs_alignment (loop_vec_info);
1222 extern bool vect_slp_analyze_and_verify_instance_alignment (slp_instance); 1531 extern bool vect_slp_analyze_and_verify_instance_alignment (slp_instance);
1223 extern bool vect_analyze_data_ref_accesses (vec_info *); 1532 extern opt_result vect_analyze_data_ref_accesses (vec_info *);
1224 extern bool vect_prune_runtime_alias_test_list (loop_vec_info); 1533 extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info);
1225 extern bool vect_check_gather_scatter (gimple *, loop_vec_info, 1534 extern bool vect_gather_scatter_fn_p (bool, bool, tree, tree, unsigned int,
1535 signop, int, internal_fn *, tree *);
1536 extern bool vect_check_gather_scatter (stmt_vec_info, loop_vec_info,
1226 gather_scatter_info *); 1537 gather_scatter_info *);
1227 extern bool vect_analyze_data_refs (vec_info *, int *); 1538 extern opt_result vect_find_stmt_data_reference (loop_p, gimple *,
1539 vec<data_reference_p> *);
1540 extern opt_result vect_analyze_data_refs (vec_info *, poly_uint64 *);
1228 extern void vect_record_base_alignments (vec_info *); 1541 extern void vect_record_base_alignments (vec_info *);
1229 extern tree vect_create_data_ref_ptr (gimple *, tree, struct loop *, tree, 1542 extern tree vect_create_data_ref_ptr (stmt_vec_info, tree, struct loop *, tree,
1230 tree *, gimple_stmt_iterator *, 1543 tree *, gimple_stmt_iterator *,
1231 gimple **, bool, bool *, 1544 gimple **, bool,
1232 tree = NULL_TREE); 1545 tree = NULL_TREE, tree = NULL_TREE);
1233 extern tree bump_vector_ptr (tree, gimple *, gimple_stmt_iterator *, gimple *, 1546 extern tree bump_vector_ptr (tree, gimple *, gimple_stmt_iterator *,
1234 tree); 1547 stmt_vec_info, tree);
1548 extern void vect_copy_ref_info (tree, tree);
1235 extern tree vect_create_destination_var (tree, tree); 1549 extern tree vect_create_destination_var (tree, tree);
1236 extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT); 1550 extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT);
1237 extern bool vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT); 1551 extern bool vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT, bool);
1238 extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT); 1552 extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT);
1239 extern bool vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT); 1553 extern bool vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT, bool);
1240 extern void vect_permute_store_chain (vec<tree> ,unsigned int, gimple *, 1554 extern void vect_permute_store_chain (vec<tree> ,unsigned int, stmt_vec_info,
1241 gimple_stmt_iterator *, vec<tree> *); 1555 gimple_stmt_iterator *, vec<tree> *);
1242 extern tree vect_setup_realignment (gimple *, gimple_stmt_iterator *, tree *, 1556 extern tree vect_setup_realignment (stmt_vec_info, gimple_stmt_iterator *,
1243 enum dr_alignment_support, tree, 1557 tree *, enum dr_alignment_support, tree,
1244 struct loop **); 1558 struct loop **);
1245 extern void vect_transform_grouped_load (gimple *, vec<tree> , int, 1559 extern void vect_transform_grouped_load (stmt_vec_info, vec<tree> , int,
1246 gimple_stmt_iterator *); 1560 gimple_stmt_iterator *);
1247 extern void vect_record_grouped_load_vectors (gimple *, vec<tree> ); 1561 extern void vect_record_grouped_load_vectors (stmt_vec_info, vec<tree>);
1248 extern tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *); 1562 extern tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *);
1249 extern tree vect_get_new_ssa_name (tree, enum vect_var_kind, 1563 extern tree vect_get_new_ssa_name (tree, enum vect_var_kind,
1250 const char * = NULL); 1564 const char * = NULL);
1251 extern tree vect_create_addr_base_for_vector_ref (gimple *, gimple_seq *, 1565 extern tree vect_create_addr_base_for_vector_ref (stmt_vec_info, gimple_seq *,
1252 tree, tree = NULL_TREE); 1566 tree, tree = NULL_TREE);
1253 1567
1254 /* In tree-vect-loop.c. */ 1568 /* In tree-vect-loop.c. */
1255 /* FORNOW: Used in tree-parloops.c. */ 1569 /* FORNOW: Used in tree-parloops.c. */
1256 extern gimple *vect_force_simple_reduction (loop_vec_info, gimple *, 1570 extern stmt_vec_info vect_force_simple_reduction (loop_vec_info, stmt_vec_info,
1257 bool *, bool); 1571 bool *, bool);
1572 /* Used in gimple-loop-interchange.c. */
1573 extern bool check_reduction_path (dump_user_location_t, loop_p, gphi *, tree,
1574 enum tree_code);
1258 /* Drive for loop analysis stage. */ 1575 /* Drive for loop analysis stage. */
1259 extern loop_vec_info vect_analyze_loop (struct loop *, loop_vec_info); 1576 extern opt_loop_vec_info vect_analyze_loop (struct loop *,
1577 loop_vec_info,
1578 vec_info_shared *);
1260 extern tree vect_build_loop_niters (loop_vec_info, bool * = NULL); 1579 extern tree vect_build_loop_niters (loop_vec_info, bool * = NULL);
1261 extern void vect_gen_vector_loop_niters (loop_vec_info, tree, tree *, bool); 1580 extern void vect_gen_vector_loop_niters (loop_vec_info, tree, tree *,
1581 tree *, bool);
1582 extern tree vect_halve_mask_nunits (tree);
1583 extern tree vect_double_mask_nunits (tree);
1584 extern void vect_record_loop_mask (loop_vec_info, vec_loop_masks *,
1585 unsigned int, tree);
1586 extern tree vect_get_loop_mask (gimple_stmt_iterator *, vec_loop_masks *,
1587 unsigned int, tree, unsigned int);
1588
1262 /* Drive for loop transformation stage. */ 1589 /* Drive for loop transformation stage. */
1263 extern struct loop *vect_transform_loop (loop_vec_info); 1590 extern struct loop *vect_transform_loop (loop_vec_info);
1264 extern loop_vec_info vect_analyze_loop_form (struct loop *); 1591 extern opt_loop_vec_info vect_analyze_loop_form (struct loop *,
1265 extern bool vectorizable_live_operation (gimple *, gimple_stmt_iterator *, 1592 vec_info_shared *);
1266 slp_tree, int, gimple **); 1593 extern bool vectorizable_live_operation (stmt_vec_info, gimple_stmt_iterator *,
1267 extern bool vectorizable_reduction (gimple *, gimple_stmt_iterator *, 1594 slp_tree, int, stmt_vec_info *,
1268 gimple **, slp_tree, slp_instance); 1595 stmt_vector_for_cost *);
1269 extern bool vectorizable_induction (gimple *, gimple_stmt_iterator *, 1596 extern bool vectorizable_reduction (stmt_vec_info, gimple_stmt_iterator *,
1270 gimple **, slp_tree); 1597 stmt_vec_info *, slp_tree, slp_instance,
1271 extern tree get_initial_def_for_reduction (gimple *, tree, tree *); 1598 stmt_vector_for_cost *);
1599 extern bool vectorizable_induction (stmt_vec_info, gimple_stmt_iterator *,
1600 stmt_vec_info *, slp_tree,
1601 stmt_vector_for_cost *);
1602 extern tree get_initial_def_for_reduction (stmt_vec_info, tree, tree *);
1272 extern bool vect_worthwhile_without_simd_p (vec_info *, tree_code); 1603 extern bool vect_worthwhile_without_simd_p (vec_info *, tree_code);
1273 extern int vect_get_known_peeling_cost (loop_vec_info, int, int *, 1604 extern int vect_get_known_peeling_cost (loop_vec_info, int, int *,
1274 stmt_vector_for_cost *, 1605 stmt_vector_for_cost *,
1275 stmt_vector_for_cost *, 1606 stmt_vector_for_cost *,
1276 stmt_vector_for_cost *); 1607 stmt_vector_for_cost *);
1608 extern tree cse_and_gimplify_to_preheader (loop_vec_info, tree);
1277 1609
1278 /* In tree-vect-slp.c. */ 1610 /* In tree-vect-slp.c. */
1279 extern void vect_free_slp_instance (slp_instance); 1611 extern void vect_free_slp_instance (slp_instance, bool);
1280 extern bool vect_transform_slp_perm_load (slp_tree, vec<tree> , 1612 extern bool vect_transform_slp_perm_load (slp_tree, vec<tree> ,
1281 gimple_stmt_iterator *, int, 1613 gimple_stmt_iterator *, poly_uint64,
1282 slp_instance, bool, unsigned *); 1614 slp_instance, bool, unsigned *);
1283 extern bool vect_slp_analyze_operations (vec_info *); 1615 extern bool vect_slp_analyze_operations (vec_info *);
1284 extern bool vect_schedule_slp (vec_info *); 1616 extern void vect_schedule_slp (vec_info *);
1285 extern bool vect_analyze_slp (vec_info *, unsigned); 1617 extern opt_result vect_analyze_slp (vec_info *, unsigned);
1286 extern bool vect_make_slp_decision (loop_vec_info); 1618 extern bool vect_make_slp_decision (loop_vec_info);
1287 extern void vect_detect_hybrid_slp (loop_vec_info); 1619 extern void vect_detect_hybrid_slp (loop_vec_info);
1288 extern void vect_get_slp_defs (vec<tree> , slp_tree, vec<vec<tree> > *); 1620 extern void vect_get_slp_defs (vec<tree> , slp_tree, vec<vec<tree> > *);
1289 extern bool vect_slp_bb (basic_block); 1621 extern bool vect_slp_bb (basic_block);
1290 extern gimple *vect_find_last_scalar_stmt_in_slp (slp_tree); 1622 extern stmt_vec_info vect_find_last_scalar_stmt_in_slp (slp_tree);
1291 extern bool is_simple_and_all_uses_invariant (gimple *, loop_vec_info); 1623 extern bool is_simple_and_all_uses_invariant (stmt_vec_info, loop_vec_info);
1624 extern bool can_duplicate_and_interleave_p (unsigned int, machine_mode,
1625 unsigned int * = NULL,
1626 tree * = NULL, tree * = NULL);
1627 extern void duplicate_and_interleave (gimple_seq *, tree, vec<tree>,
1628 unsigned int, vec<tree> &);
1629 extern int vect_get_place_in_interleaving_chain (stmt_vec_info, stmt_vec_info);
1292 1630
1293 /* In tree-vect-patterns.c. */ 1631 /* In tree-vect-patterns.c. */
1294 /* Pattern recognition functions. 1632 /* Pattern recognition functions.
1295 Additional pattern recognition functions can (and will) be added 1633 Additional pattern recognition functions can (and will) be added
1296 in the future. */ 1634 in the future. */
1297 typedef gimple *(* vect_recog_func_ptr) (vec<gimple *> *, tree *, tree *);
1298 #define NUM_PATTERNS 14
1299 void vect_pattern_recog (vec_info *); 1635 void vect_pattern_recog (vec_info *);
1300 1636
1301 /* In tree-vectorizer.c. */ 1637 /* In tree-vectorizer.c. */
1302 unsigned vectorize_loops (void); 1638 unsigned vectorize_loops (void);
1303 bool vect_stmt_in_region_p (vec_info *, gimple *);
1304 void vect_free_loop_info_assumptions (struct loop *); 1639 void vect_free_loop_info_assumptions (struct loop *);
1305 1640
1306 #endif /* GCC_TREE_VECTORIZER_H */ 1641 #endif /* GCC_TREE_VECTORIZER_H */