Mercurial > hg > CbC > CbC_gcc
annotate gcc/tree-vectorizer.h @ 131:84e7813d76e9
gcc-8.2
author | mir3636 |
---|---|
date | Thu, 25 Oct 2018 07:37:49 +0900 |
parents | 04ced10e8804 |
children | 1830386684a0 |
rev | line source |
---|---|
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1 /* Vectorizer |
131 | 2 Copyright (C) 2003-2018 Free Software Foundation, Inc. |
0 | 3 Contributed by Dorit Naishlos <dorit@il.ibm.com> |
4 | |
5 This file is part of GCC. | |
6 | |
7 GCC is free software; you can redistribute it and/or modify it under | |
8 the terms of the GNU General Public License as published by the Free | |
9 Software Foundation; either version 3, or (at your option) any later | |
10 version. | |
11 | |
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 for more details. | |
16 | |
17 You should have received a copy of the GNU General Public License | |
18 along with GCC; see the file COPYING3. If not see | |
19 <http://www.gnu.org/licenses/>. */ | |
20 | |
21 #ifndef GCC_TREE_VECTORIZER_H | |
22 #define GCC_TREE_VECTORIZER_H | |
23 | |
131 | 24 typedef struct _stmt_vec_info *stmt_vec_info; |
25 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
26 #include "tree-data-ref.h" |
111 | 27 #include "tree-hash-traits.h" |
28 #include "target.h" | |
0 | 29 |
30 /* Used for naming of new temporaries. */ | |
31 enum vect_var_kind { | |
32 vect_simple_var, | |
33 vect_pointer_var, | |
111 | 34 vect_scalar_var, |
35 vect_mask_var | |
0 | 36 }; |
37 | |
38 /* Defines type of operation. */ | |
39 enum operation_type { | |
40 unary_op = 1, | |
41 binary_op, | |
42 ternary_op | |
43 }; | |
44 | |
45 /* Define type of available alignment support. */ | |
46 enum dr_alignment_support { | |
47 dr_unaligned_unsupported, | |
48 dr_unaligned_supported, | |
49 dr_explicit_realign, | |
50 dr_explicit_realign_optimized, | |
51 dr_aligned | |
52 }; | |
53 | |
54 /* Define type of def-use cross-iteration cycle. */ | |
55 enum vect_def_type { | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
56 vect_uninitialized_def = 0, |
0 | 57 vect_constant_def = 1, |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
58 vect_external_def, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
59 vect_internal_def, |
0 | 60 vect_induction_def, |
61 vect_reduction_def, | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
62 vect_double_reduction_def, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
63 vect_nested_cycle, |
0 | 64 vect_unknown_def_type |
65 }; | |
66 | |
111 | 67 /* Define type of reduction. */ |
68 enum vect_reduction_type { | |
69 TREE_CODE_REDUCTION, | |
70 COND_REDUCTION, | |
71 INTEGER_INDUC_COND_REDUCTION, | |
131 | 72 CONST_COND_REDUCTION, |
73 | |
74 /* Retain a scalar phi and use a FOLD_EXTRACT_LAST within the loop | |
75 to implement: | |
76 | |
77 for (int i = 0; i < VF; ++i) | |
78 res = cond[i] ? val[i] : res; */ | |
79 EXTRACT_LAST_REDUCTION, | |
80 | |
81 /* Use a folding reduction within the loop to implement: | |
82 | |
83 for (int i = 0; i < VF; ++i) | |
84 res = res OP val[i]; | |
85 | |
86 (with no reassocation). */ | |
87 FOLD_LEFT_REDUCTION | |
111 | 88 }; |
89 | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
90 #define VECTORIZABLE_CYCLE_DEF(D) (((D) == vect_reduction_def) \ |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
91 || ((D) == vect_double_reduction_def) \ |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
92 || ((D) == vect_nested_cycle)) |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
93 |
111 | 94 /* Structure to encapsulate information about a group of like |
95 instructions to be presented to the target cost model. */ | |
96 struct stmt_info_for_cost { | |
97 int count; | |
98 enum vect_cost_for_stmt kind; | |
131 | 99 enum vect_cost_model_location where; |
100 stmt_vec_info stmt_info; | |
111 | 101 int misalign; |
102 }; | |
103 | |
104 typedef vec<stmt_info_for_cost> stmt_vector_for_cost; | |
105 | |
106 /* Maps base addresses to an innermost_loop_behavior that gives the maximum | |
107 known alignment for that base. */ | |
108 typedef hash_map<tree_operand_hash, | |
109 innermost_loop_behavior *> vec_base_alignments; | |
110 | |
0 | 111 /************************************************************************ |
112 SLP | |
113 ************************************************************************/ | |
111 | 114 typedef struct _slp_tree *slp_tree; |
0 | 115 |
111 | 116 /* A computation tree of an SLP instance. Each node corresponds to a group of |
0 | 117 stmts to be packed in a SIMD stmt. */ |
111 | 118 struct _slp_tree { |
119 /* Nodes that contain def-stmts of this node statements operands. */ | |
120 vec<slp_tree> children; | |
0 | 121 /* A group of scalar stmts to be vectorized together. */ |
131 | 122 vec<stmt_vec_info> stmts; |
111 | 123 /* Load permutation relative to the stores, NULL if there is no |
124 permutation. */ | |
125 vec<unsigned> load_permutation; | |
0 | 126 /* Vectorized stmt/s. */ |
131 | 127 vec<stmt_vec_info> vec_stmts; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
128 /* Number of vector stmts that are created to replace the group of scalar |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
129 stmts. It is calculated during the transformation phase as the number of |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
130 scalar elements in one scalar iteration (GROUP_SIZE) multiplied by VF |
0 | 131 divided by vector size. */ |
132 unsigned int vec_stmts_size; | |
111 | 133 /* Whether the scalar computations use two different operators. */ |
134 bool two_operators; | |
135 /* The DEF type of this node. */ | |
136 enum vect_def_type def_type; | |
137 }; | |
0 | 138 |
139 | |
140 /* SLP instance is a sequence of stmts in a loop that can be packed into | |
141 SIMD stmts. */ | |
142 typedef struct _slp_instance { | |
143 /* The root of SLP tree. */ | |
144 slp_tree root; | |
145 | |
146 /* Size of groups of scalar stmts that will be replaced by SIMD stmt/s. */ | |
147 unsigned int group_size; | |
148 | |
149 /* The unrolling factor required to vectorized this SLP instance. */ | |
131 | 150 poly_uint64 unrolling_factor; |
0 | 151 |
111 | 152 /* The group of nodes that contain loads of this SLP instance. */ |
153 vec<slp_tree> loads; | |
0 | 154 |
111 | 155 /* The SLP node containing the reduction PHIs. */ |
156 slp_tree reduc_phis; | |
0 | 157 } *slp_instance; |
158 | |
159 | |
160 /* Access Functions. */ | |
161 #define SLP_INSTANCE_TREE(S) (S)->root | |
162 #define SLP_INSTANCE_GROUP_SIZE(S) (S)->group_size | |
163 #define SLP_INSTANCE_UNROLLING_FACTOR(S) (S)->unrolling_factor | |
164 #define SLP_INSTANCE_LOADS(S) (S)->loads | |
165 | |
111 | 166 #define SLP_TREE_CHILDREN(S) (S)->children |
0 | 167 #define SLP_TREE_SCALAR_STMTS(S) (S)->stmts |
168 #define SLP_TREE_VEC_STMTS(S) (S)->vec_stmts | |
169 #define SLP_TREE_NUMBER_OF_VEC_STMTS(S) (S)->vec_stmts_size | |
111 | 170 #define SLP_TREE_LOAD_PERMUTATION(S) (S)->load_permutation |
171 #define SLP_TREE_TWO_OPERATORS(S) (S)->two_operators | |
172 #define SLP_TREE_DEF_TYPE(S) (S)->def_type | |
173 | |
0 | 174 |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
175 |
111 | 176 /* Describes two objects whose addresses must be unequal for the vectorized |
177 loop to be valid. */ | |
178 typedef std::pair<tree, tree> vec_object_pair; | |
179 | |
131 | 180 /* Records that vectorization is only possible if abs (EXPR) >= MIN_VALUE. |
181 UNSIGNED_P is true if we can assume that abs (EXPR) == EXPR. */ | |
182 struct vec_lower_bound { | |
183 vec_lower_bound () {} | |
184 vec_lower_bound (tree e, bool u, poly_uint64 m) | |
185 : expr (e), unsigned_p (u), min_value (m) {} | |
186 | |
187 tree expr; | |
188 bool unsigned_p; | |
189 poly_uint64 min_value; | |
190 }; | |
191 | |
192 /* Vectorizer state shared between different analyses like vector sizes | |
193 of the same CFG region. */ | |
194 struct vec_info_shared { | |
195 vec_info_shared(); | |
196 ~vec_info_shared(); | |
197 | |
198 void save_datarefs(); | |
199 void check_datarefs(); | |
200 | |
201 /* All data references. Freed by free_data_refs, so not an auto_vec. */ | |
202 vec<data_reference_p> datarefs; | |
203 vec<data_reference> datarefs_copy; | |
204 | |
205 /* The loop nest in which the data dependences are computed. */ | |
206 auto_vec<loop_p> loop_nest; | |
207 | |
208 /* All data dependences. Freed by free_dependence_relations, so not | |
209 an auto_vec. */ | |
210 vec<ddr_p> ddrs; | |
211 }; | |
212 | |
111 | 213 /* Vectorizer state common between loop and basic-block vectorization. */ |
214 struct vec_info { | |
215 enum vec_kind { bb, loop }; | |
216 | |
131 | 217 vec_info (vec_kind, void *, vec_info_shared *); |
111 | 218 ~vec_info (); |
219 | |
131 | 220 stmt_vec_info add_stmt (gimple *); |
221 stmt_vec_info lookup_stmt (gimple *); | |
222 stmt_vec_info lookup_def (tree); | |
223 stmt_vec_info lookup_single_use (tree); | |
224 struct dr_vec_info *lookup_dr (data_reference *); | |
225 void move_dr (stmt_vec_info, stmt_vec_info); | |
226 void remove_stmt (stmt_vec_info); | |
227 void replace_stmt (gimple_stmt_iterator *, stmt_vec_info, gimple *); | |
228 | |
111 | 229 /* The type of vectorization. */ |
230 vec_kind kind; | |
231 | |
131 | 232 /* Shared vectorizer state. */ |
233 vec_info_shared *shared; | |
234 | |
235 /* The mapping of GIMPLE UID to stmt_vec_info. */ | |
236 vec<stmt_vec_info> stmt_vec_infos; | |
237 | |
111 | 238 /* All SLP instances. */ |
239 auto_vec<slp_instance> slp_instances; | |
240 | |
241 /* Maps base addresses to an innermost_loop_behavior that gives the maximum | |
242 known alignment for that base. */ | |
243 vec_base_alignments base_alignments; | |
244 | |
245 /* All interleaving chains of stores, represented by the first | |
246 stmt in the chain. */ | |
131 | 247 auto_vec<stmt_vec_info> grouped_stores; |
111 | 248 |
249 /* Cost data used by the target cost model. */ | |
250 void *target_cost_data; | |
131 | 251 |
252 private: | |
253 stmt_vec_info new_stmt_vec_info (gimple *stmt); | |
254 void set_vinfo_for_stmt (gimple *, stmt_vec_info); | |
255 void free_stmt_vec_infos (); | |
256 void free_stmt_vec_info (stmt_vec_info); | |
111 | 257 }; |
258 | |
259 struct _loop_vec_info; | |
260 struct _bb_vec_info; | |
261 | |
262 template<> | |
263 template<> | |
264 inline bool | |
265 is_a_helper <_loop_vec_info *>::test (vec_info *i) | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
266 { |
111 | 267 return i->kind == vec_info::loop; |
268 } | |
269 | |
270 template<> | |
271 template<> | |
272 inline bool | |
273 is_a_helper <_bb_vec_info *>::test (vec_info *i) | |
274 { | |
275 return i->kind == vec_info::bb; | |
276 } | |
277 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
278 |
131 | 279 /* In general, we can divide the vector statements in a vectorized loop |
280 into related groups ("rgroups") and say that for each rgroup there is | |
281 some nS such that the rgroup operates on nS values from one scalar | |
282 iteration followed by nS values from the next. That is, if VF is the | |
283 vectorization factor of the loop, the rgroup operates on a sequence: | |
284 | |
285 (1,1) (1,2) ... (1,nS) (2,1) ... (2,nS) ... (VF,1) ... (VF,nS) | |
286 | |
287 where (i,j) represents a scalar value with index j in a scalar | |
288 iteration with index i. | |
289 | |
290 [ We use the term "rgroup" to emphasise that this grouping isn't | |
291 necessarily the same as the grouping of statements used elsewhere. | |
292 For example, if we implement a group of scalar loads using gather | |
293 loads, we'll use a separate gather load for each scalar load, and | |
294 thus each gather load will belong to its own rgroup. ] | |
295 | |
296 In general this sequence will occupy nV vectors concatenated | |
297 together. If these vectors have nL lanes each, the total number | |
298 of scalar values N is given by: | |
299 | |
300 N = nS * VF = nV * nL | |
301 | |
302 None of nS, VF, nV and nL are required to be a power of 2. nS and nV | |
303 are compile-time constants but VF and nL can be variable (if the target | |
304 supports variable-length vectors). | |
305 | |
306 In classical vectorization, each iteration of the vector loop would | |
307 handle exactly VF iterations of the original scalar loop. However, | |
308 in a fully-masked loop, a particular iteration of the vector loop | |
309 might handle fewer than VF iterations of the scalar loop. The vector | |
310 lanes that correspond to iterations of the scalar loop are said to be | |
311 "active" and the other lanes are said to be "inactive". | |
312 | |
313 In a fully-masked loop, many rgroups need to be masked to ensure that | |
314 they have no effect for the inactive lanes. Each such rgroup needs a | |
315 sequence of booleans in the same order as above, but with each (i,j) | |
316 replaced by a boolean that indicates whether iteration i is active. | |
317 This sequence occupies nV vector masks that again have nL lanes each. | |
318 Thus the mask sequence as a whole consists of VF independent booleans | |
319 that are each repeated nS times. | |
320 | |
321 We make the simplifying assumption that if a sequence of nV masks is | |
322 suitable for one (nS,nL) pair, we can reuse it for (nS/2,nL/2) by | |
323 VIEW_CONVERTing it. This holds for all current targets that support | |
324 fully-masked loops. For example, suppose the scalar loop is: | |
325 | |
326 float *f; | |
327 double *d; | |
328 for (int i = 0; i < n; ++i) | |
329 { | |
330 f[i * 2 + 0] += 1.0f; | |
331 f[i * 2 + 1] += 2.0f; | |
332 d[i] += 3.0; | |
333 } | |
334 | |
335 and suppose that vectors have 256 bits. The vectorized f accesses | |
336 will belong to one rgroup and the vectorized d access to another: | |
337 | |
338 f rgroup: nS = 2, nV = 1, nL = 8 | |
339 d rgroup: nS = 1, nV = 1, nL = 4 | |
340 VF = 4 | |
341 | |
342 [ In this simple example the rgroups do correspond to the normal | |
343 SLP grouping scheme. ] | |
344 | |
345 If only the first three lanes are active, the masks we need are: | |
346 | |
347 f rgroup: 1 1 | 1 1 | 1 1 | 0 0 | |
348 d rgroup: 1 | 1 | 1 | 0 | |
349 | |
350 Here we can use a mask calculated for f's rgroup for d's, but not | |
351 vice versa. | |
352 | |
353 Thus for each value of nV, it is enough to provide nV masks, with the | |
354 mask being calculated based on the highest nL (or, equivalently, based | |
355 on the highest nS) required by any rgroup with that nV. We therefore | |
356 represent the entire collection of masks as a two-level table, with the | |
357 first level being indexed by nV - 1 (since nV == 0 doesn't exist) and | |
358 the second being indexed by the mask index 0 <= i < nV. */ | |
359 | |
360 /* The masks needed by rgroups with nV vectors, according to the | |
361 description above. */ | |
362 struct rgroup_masks { | |
363 /* The largest nS for all rgroups that use these masks. */ | |
364 unsigned int max_nscalars_per_iter; | |
365 | |
366 /* The type of mask to use, based on the highest nS recorded above. */ | |
367 tree mask_type; | |
368 | |
369 /* A vector of nV masks, in iteration order. */ | |
370 vec<tree> masks; | |
371 }; | |
372 | |
373 typedef auto_vec<rgroup_masks> vec_loop_masks; | |
374 | |
0 | 375 /*-----------------------------------------------------------------*/ |
376 /* Info on vectorized loops. */ | |
377 /*-----------------------------------------------------------------*/ | |
111 | 378 typedef struct _loop_vec_info : public vec_info { |
131 | 379 _loop_vec_info (struct loop *, vec_info_shared *); |
111 | 380 ~_loop_vec_info (); |
0 | 381 |
382 /* The loop to which this info struct refers to. */ | |
383 struct loop *loop; | |
384 | |
385 /* The loop basic blocks. */ | |
386 basic_block *bbs; | |
387 | |
111 | 388 /* Number of latch executions. */ |
389 tree num_itersm1; | |
0 | 390 /* Number of iterations. */ |
391 tree num_iters; | |
111 | 392 /* Number of iterations of the original loop. */ |
0 | 393 tree num_iters_unchanged; |
111 | 394 /* Condition under which this loop is analyzed and versioned. */ |
395 tree num_iters_assumptions; | |
0 | 396 |
111 | 397 /* Threshold of number of iterations below which vectorzation will not be |
398 performed. It is calculated from MIN_PROFITABLE_ITERS and | |
399 PARAM_MIN_VECT_LOOP_BOUND. */ | |
400 unsigned int th; | |
0 | 401 |
131 | 402 /* When applying loop versioning, the vector form should only be used |
403 if the number of scalar iterations is >= this value, on top of all | |
404 the other requirements. Ignored when loop versioning is not being | |
405 used. */ | |
406 poly_uint64 versioning_threshold; | |
407 | |
0 | 408 /* Unrolling factor */ |
131 | 409 poly_uint64 vectorization_factor; |
0 | 410 |
111 | 411 /* Maximum runtime vectorization factor, or MAX_VECTORIZATION_FACTOR |
412 if there is no particular limit. */ | |
413 unsigned HOST_WIDE_INT max_vectorization_factor; | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
414 |
131 | 415 /* The masks that a fully-masked loop should use to avoid operating |
416 on inactive scalars. */ | |
417 vec_loop_masks masks; | |
418 | |
419 /* If we are using a loop mask to align memory addresses, this variable | |
420 contains the number of vector elements that we should skip in the | |
421 first iteration of the vector loop (i.e. the number of leading | |
422 elements that should be false in the first mask). */ | |
423 tree mask_skip_niters; | |
424 | |
425 /* Type of the variables to use in the WHILE_ULT call for fully-masked | |
426 loops. */ | |
427 tree mask_compare_type; | |
428 | |
0 | 429 /* Unknown DRs according to which loop was peeled. */ |
131 | 430 struct dr_vec_info *unaligned_dr; |
0 | 431 |
432 /* peeling_for_alignment indicates whether peeling for alignment will take | |
433 place, and what the peeling factor should be: | |
434 peeling_for_alignment = X means: | |
435 If X=0: Peeling for alignment will not be applied. | |
436 If X>0: Peel first X iterations. | |
437 If X=-1: Generate a runtime test to calculate the number of iterations | |
438 to be peeled, using the dataref recorded in the field | |
439 unaligned_dr. */ | |
440 int peeling_for_alignment; | |
441 | |
442 /* The mask used to check the alignment of pointers or arrays. */ | |
443 int ptr_mask; | |
444 | |
445 /* Data Dependence Relations defining address ranges that are candidates | |
446 for a run-time aliasing check. */ | |
111 | 447 auto_vec<ddr_p> may_alias_ddrs; |
448 | |
449 /* Data Dependence Relations defining address ranges together with segment | |
450 lengths from which the run-time aliasing check is built. */ | |
451 auto_vec<dr_with_seg_len_pair_t> comp_alias_ddrs; | |
452 | |
453 /* Check that the addresses of each pair of objects is unequal. */ | |
454 auto_vec<vec_object_pair> check_unequal_addrs; | |
0 | 455 |
131 | 456 /* List of values that are required to be nonzero. This is used to check |
457 whether things like "x[i * n] += 1;" are safe and eventually gets added | |
458 to the checks for lower bounds below. */ | |
459 auto_vec<tree> check_nonzero; | |
460 | |
461 /* List of values that need to be checked for a minimum value. */ | |
462 auto_vec<vec_lower_bound> lower_bounds; | |
463 | |
0 | 464 /* Statements in the loop that have data references that are candidates for a |
465 runtime (loop versioning) misalignment check. */ | |
131 | 466 auto_vec<stmt_vec_info> may_misalign_stmts; |
0 | 467 |
111 | 468 /* Reduction cycles detected in the loop. Used in loop-aware SLP. */ |
131 | 469 auto_vec<stmt_vec_info> reductions; |
111 | 470 |
471 /* All reduction chains in the loop, represented by the first | |
0 | 472 stmt in the chain. */ |
131 | 473 auto_vec<stmt_vec_info> reduction_chains; |
0 | 474 |
111 | 475 /* Cost vector for a single scalar iteration. */ |
476 auto_vec<stmt_info_for_cost> scalar_cost_vec; | |
0 | 477 |
131 | 478 /* Map of IV base/step expressions to inserted name in the preheader. */ |
479 hash_map<tree_operand_hash, tree> *ivexpr_map; | |
480 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
481 /* The unrolling factor needed to SLP the loop. In case of that pure SLP is |
0 | 482 applied to the loop, i.e., no unrolling is needed, this is 1. */ |
131 | 483 poly_uint64 slp_unrolling_factor; |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
484 |
111 | 485 /* Cost of a single scalar iteration. */ |
486 int single_scalar_iteration_cost; | |
487 | |
488 /* Is the loop vectorizable? */ | |
489 bool vectorizable; | |
490 | |
131 | 491 /* Records whether we still have the option of using a fully-masked loop. */ |
492 bool can_fully_mask_p; | |
493 | |
494 /* True if have decided to use a fully-masked loop. */ | |
495 bool fully_masked_p; | |
496 | |
111 | 497 /* When we have grouped data accesses with gaps, we may introduce invalid |
498 memory accesses. We peel the last iteration of the loop to prevent | |
499 this. */ | |
500 bool peeling_for_gaps; | |
501 | |
502 /* When the number of iterations is not a multiple of the vector size | |
503 we need to peel off iterations at the end to form an epilogue loop. */ | |
504 bool peeling_for_niter; | |
505 | |
506 /* Reductions are canonicalized so that the last operand is the reduction | |
507 operand. If this places a constant into RHS1, this decanonicalizes | |
508 GIMPLE for other phases, so we must track when this has occurred and | |
509 fix it up. */ | |
510 bool operands_swapped; | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
511 |
111 | 512 /* True if there are no loop carried data dependencies in the loop. |
513 If loop->safelen <= 1, then this is always true, either the loop | |
514 didn't have any loop carried data dependencies, or the loop is being | |
515 vectorized guarded with some runtime alias checks, or couldn't | |
516 be vectorized at all, but then this field shouldn't be used. | |
517 For loop->safelen >= 2, the user has asserted that there are no | |
518 backward dependencies, but there still could be loop carried forward | |
519 dependencies in such loops. This flag will be false if normal | |
520 vectorizer data dependency analysis would fail or require versioning | |
521 for alias, but because of loop->safelen >= 2 it has been vectorized | |
522 even without versioning for alias. E.g. in: | |
523 #pragma omp simd | |
524 for (int i = 0; i < m; i++) | |
525 a[i] = a[i + k] * c; | |
526 (or #pragma simd or #pragma ivdep) we can vectorize this and it will | |
527 DTRT even for k > 0 && k < m, but without safelen we would not | |
528 vectorize this, so this field would be false. */ | |
529 bool no_data_dependencies; | |
530 | |
531 /* Mark loops having masked stores. */ | |
532 bool has_mask_store; | |
533 | |
534 /* If if-conversion versioned this loop before conversion, this is the | |
535 loop version without if-conversion. */ | |
536 struct loop *scalar_loop; | |
537 | |
538 /* For loops being epilogues of already vectorized loops | |
539 this points to the original vectorized loop. Otherwise NULL. */ | |
540 _loop_vec_info *orig_loop_info; | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
541 |
0 | 542 } *loop_vec_info; |
543 | |
544 /* Access Functions. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
545 #define LOOP_VINFO_LOOP(L) (L)->loop |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
546 #define LOOP_VINFO_BBS(L) (L)->bbs |
111 | 547 #define LOOP_VINFO_NITERSM1(L) (L)->num_itersm1 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
548 #define LOOP_VINFO_NITERS(L) (L)->num_iters |
111 | 549 /* Since LOOP_VINFO_NITERS and LOOP_VINFO_NITERSM1 can change after |
550 prologue peeling retain total unchanged scalar loop iterations for | |
551 cost model. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
552 #define LOOP_VINFO_NITERS_UNCHANGED(L) (L)->num_iters_unchanged |
111 | 553 #define LOOP_VINFO_NITERS_ASSUMPTIONS(L) (L)->num_iters_assumptions |
554 #define LOOP_VINFO_COST_MODEL_THRESHOLD(L) (L)->th | |
131 | 555 #define LOOP_VINFO_VERSIONING_THRESHOLD(L) (L)->versioning_threshold |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
556 #define LOOP_VINFO_VECTORIZABLE_P(L) (L)->vectorizable |
131 | 557 #define LOOP_VINFO_CAN_FULLY_MASK_P(L) (L)->can_fully_mask_p |
558 #define LOOP_VINFO_FULLY_MASKED_P(L) (L)->fully_masked_p | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
559 #define LOOP_VINFO_VECT_FACTOR(L) (L)->vectorization_factor |
111 | 560 #define LOOP_VINFO_MAX_VECT_FACTOR(L) (L)->max_vectorization_factor |
131 | 561 #define LOOP_VINFO_MASKS(L) (L)->masks |
562 #define LOOP_VINFO_MASK_SKIP_NITERS(L) (L)->mask_skip_niters | |
563 #define LOOP_VINFO_MASK_COMPARE_TYPE(L) (L)->mask_compare_type | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
564 #define LOOP_VINFO_PTR_MASK(L) (L)->ptr_mask |
131 | 565 #define LOOP_VINFO_LOOP_NEST(L) (L)->shared->loop_nest |
566 #define LOOP_VINFO_DATAREFS(L) (L)->shared->datarefs | |
567 #define LOOP_VINFO_DDRS(L) (L)->shared->ddrs | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
568 #define LOOP_VINFO_INT_NITERS(L) (TREE_INT_CST_LOW ((L)->num_iters)) |
111 | 569 #define LOOP_VINFO_PEELING_FOR_ALIGNMENT(L) (L)->peeling_for_alignment |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
570 #define LOOP_VINFO_UNALIGNED_DR(L) (L)->unaligned_dr |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
571 #define LOOP_VINFO_MAY_MISALIGN_STMTS(L) (L)->may_misalign_stmts |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
572 #define LOOP_VINFO_MAY_ALIAS_DDRS(L) (L)->may_alias_ddrs |
111 | 573 #define LOOP_VINFO_COMP_ALIAS_DDRS(L) (L)->comp_alias_ddrs |
574 #define LOOP_VINFO_CHECK_UNEQUAL_ADDRS(L) (L)->check_unequal_addrs | |
131 | 575 #define LOOP_VINFO_CHECK_NONZERO(L) (L)->check_nonzero |
576 #define LOOP_VINFO_LOWER_BOUNDS(L) (L)->lower_bounds | |
111 | 577 #define LOOP_VINFO_GROUPED_STORES(L) (L)->grouped_stores |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
578 #define LOOP_VINFO_SLP_INSTANCES(L) (L)->slp_instances |
0 | 579 #define LOOP_VINFO_SLP_UNROLLING_FACTOR(L) (L)->slp_unrolling_factor |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
580 #define LOOP_VINFO_REDUCTIONS(L) (L)->reductions |
111 | 581 #define LOOP_VINFO_REDUCTION_CHAINS(L) (L)->reduction_chains |
582 #define LOOP_VINFO_TARGET_COST_DATA(L) (L)->target_cost_data | |
583 #define LOOP_VINFO_PEELING_FOR_GAPS(L) (L)->peeling_for_gaps | |
584 #define LOOP_VINFO_OPERANDS_SWAPPED(L) (L)->operands_swapped | |
585 #define LOOP_VINFO_PEELING_FOR_NITER(L) (L)->peeling_for_niter | |
586 #define LOOP_VINFO_NO_DATA_DEPENDENCIES(L) (L)->no_data_dependencies | |
587 #define LOOP_VINFO_SCALAR_LOOP(L) (L)->scalar_loop | |
588 #define LOOP_VINFO_HAS_MASK_STORE(L) (L)->has_mask_store | |
589 #define LOOP_VINFO_SCALAR_ITERATION_COST(L) (L)->scalar_cost_vec | |
590 #define LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST(L) (L)->single_scalar_iteration_cost | |
591 #define LOOP_VINFO_ORIG_LOOP_INFO(L) (L)->orig_loop_info | |
0 | 592 |
111 | 593 #define LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT(L) \ |
594 ((L)->may_misalign_stmts.length () > 0) | |
595 #define LOOP_REQUIRES_VERSIONING_FOR_ALIAS(L) \ | |
596 ((L)->comp_alias_ddrs.length () > 0 \ | |
131 | 597 || (L)->check_unequal_addrs.length () > 0 \ |
598 || (L)->lower_bounds.length () > 0) | |
111 | 599 #define LOOP_REQUIRES_VERSIONING_FOR_NITERS(L) \ |
600 (LOOP_VINFO_NITERS_ASSUMPTIONS (L)) | |
601 #define LOOP_REQUIRES_VERSIONING(L) \ | |
602 (LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (L) \ | |
603 || LOOP_REQUIRES_VERSIONING_FOR_ALIAS (L) \ | |
604 || LOOP_REQUIRES_VERSIONING_FOR_NITERS (L)) | |
0 | 605 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
606 #define LOOP_VINFO_NITERS_KNOWN_P(L) \ |
111 | 607 (tree_fits_shwi_p ((L)->num_iters) && tree_to_shwi ((L)->num_iters) > 0) |
608 | |
609 #define LOOP_VINFO_EPILOGUE_P(L) \ | |
610 (LOOP_VINFO_ORIG_LOOP_INFO (L) != NULL) | |
611 | |
612 #define LOOP_VINFO_ORIG_MAX_VECT_FACTOR(L) \ | |
613 (LOOP_VINFO_MAX_VECT_FACTOR (LOOP_VINFO_ORIG_LOOP_INFO (L))) | |
0 | 614 |
131 | 615 /* Wrapper for loop_vec_info, for tracking success/failure, where a non-NULL |
616 value signifies success, and a NULL value signifies failure, supporting | |
617 propagating an opt_problem * describing the failure back up the call | |
618 stack. */ | |
619 typedef opt_pointer_wrapper <loop_vec_info> opt_loop_vec_info; | |
620 | |
0 | 621 static inline loop_vec_info |
622 loop_vec_info_for_loop (struct loop *loop) | |
623 { | |
624 return (loop_vec_info) loop->aux; | |
625 } | |
626 | |
111 | 627 typedef struct _bb_vec_info : public vec_info |
628 { | |
131 | 629 _bb_vec_info (gimple_stmt_iterator, gimple_stmt_iterator, vec_info_shared *); |
111 | 630 ~_bb_vec_info (); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
631 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
632 basic_block bb; |
111 | 633 gimple_stmt_iterator region_begin; |
634 gimple_stmt_iterator region_end; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
635 } *bb_vec_info; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
636 |
111 | 637 #define BB_VINFO_BB(B) (B)->bb |
638 #define BB_VINFO_GROUPED_STORES(B) (B)->grouped_stores | |
639 #define BB_VINFO_SLP_INSTANCES(B) (B)->slp_instances | |
131 | 640 #define BB_VINFO_DATAREFS(B) (B)->shared->datarefs |
641 #define BB_VINFO_DDRS(B) (B)->shared->ddrs | |
111 | 642 #define BB_VINFO_TARGET_COST_DATA(B) (B)->target_cost_data |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
643 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
644 static inline bb_vec_info |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
645 vec_info_for_bb (basic_block bb) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
646 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
647 return (bb_vec_info) bb->aux; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
648 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
649 |
0 | 650 /*-----------------------------------------------------------------*/ |
651 /* Info on vectorized defs. */ | |
652 /*-----------------------------------------------------------------*/ | |
653 enum stmt_vec_info_type { | |
654 undef_vec_info_type = 0, | |
655 load_vec_info_type, | |
656 store_vec_info_type, | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
657 shift_vec_info_type, |
0 | 658 op_vec_info_type, |
659 call_vec_info_type, | |
111 | 660 call_simd_clone_vec_info_type, |
0 | 661 assignment_vec_info_type, |
662 condition_vec_info_type, | |
111 | 663 comparison_vec_info_type, |
0 | 664 reduc_vec_info_type, |
665 induc_vec_info_type, | |
666 type_promotion_vec_info_type, | |
667 type_demotion_vec_info_type, | |
668 type_conversion_vec_info_type, | |
669 loop_exit_ctrl_vec_info_type | |
670 }; | |
671 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
672 /* Indicates whether/how a variable is used in the scope of loop/basic |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
673 block. */ |
0 | 674 enum vect_relevant { |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
675 vect_unused_in_scope = 0, |
111 | 676 |
677 /* The def is only used outside the loop. */ | |
678 vect_used_only_live, | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
679 /* The def is in the inner loop, and the use is in the outer loop, and the |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
680 use is a reduction stmt. */ |
0 | 681 vect_used_in_outer_by_reduction, |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
682 /* The def is in the inner loop, and the use is in the outer loop (and is |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
683 not part of reduction). */ |
0 | 684 vect_used_in_outer, |
685 | |
686 /* defs that feed computations that end up (only) in a reduction. These | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
687 defs may be used by non-reduction stmts, but eventually, any |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
688 computations/values that are affected by these defs are used to compute |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
689 a reduction (i.e. don't get stored to memory, for example). We use this |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
690 to identify computations that we can change the order in which they are |
0 | 691 computed. */ |
692 vect_used_by_reduction, | |
693 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
694 vect_used_in_scope |
0 | 695 }; |
696 | |
697 /* The type of vectorization that can be applied to the stmt: regular loop-based | |
698 vectorization; pure SLP - the stmt is a part of SLP instances and does not | |
699 have uses outside SLP instances; or hybrid SLP and loop-based - the stmt is | |
700 a part of SLP instance and also must be loop-based vectorized, since it has | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
701 uses outside SLP sequences. |
0 | 702 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
703 In the loop context the meanings of pure and hybrid SLP are slightly |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
704 different. By saying that pure SLP is applied to the loop, we mean that we |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
705 exploit only intra-iteration parallelism in the loop; i.e., the loop can be |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
706 vectorized without doing any conceptual unrolling, cause we don't pack |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
707 together stmts from different iterations, only within a single iteration. |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
708 Loop hybrid SLP means that we exploit both intra-iteration and |
0 | 709 inter-iteration parallelism (e.g., number of elements in the vector is 4 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
710 and the slp-group-size is 2, in which case we don't have enough parallelism |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
711 within an iteration, so we obtain the rest of the parallelism from subsequent |
0 | 712 iterations by unrolling the loop by 2). */ |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
713 enum slp_vect_type { |
0 | 714 loop_vect = 0, |
715 pure_slp, | |
716 hybrid | |
717 }; | |
718 | |
131 | 719 /* Says whether a statement is a load, a store of a vectorized statement |
720 result, or a store of an invariant value. */ | |
721 enum vec_load_store_type { | |
722 VLS_LOAD, | |
723 VLS_STORE, | |
724 VLS_STORE_INVARIANT | |
725 }; | |
726 | |
111 | 727 /* Describes how we're going to vectorize an individual load or store, |
728 or a group of loads or stores. */ | |
729 enum vect_memory_access_type { | |
730 /* An access to an invariant address. This is used only for loads. */ | |
731 VMAT_INVARIANT, | |
732 | |
733 /* A simple contiguous access. */ | |
734 VMAT_CONTIGUOUS, | |
735 | |
736 /* A contiguous access that goes down in memory rather than up, | |
737 with no additional permutation. This is used only for stores | |
738 of invariants. */ | |
739 VMAT_CONTIGUOUS_DOWN, | |
740 | |
741 /* A simple contiguous access in which the elements need to be permuted | |
742 after loading or before storing. Only used for loop vectorization; | |
743 SLP uses separate permutes. */ | |
744 VMAT_CONTIGUOUS_PERMUTE, | |
745 | |
746 /* A simple contiguous access in which the elements need to be reversed | |
747 after loading or before storing. */ | |
748 VMAT_CONTIGUOUS_REVERSE, | |
749 | |
750 /* An access that uses IFN_LOAD_LANES or IFN_STORE_LANES. */ | |
751 VMAT_LOAD_STORE_LANES, | |
752 | |
753 /* An access in which each scalar element is loaded or stored | |
754 individually. */ | |
755 VMAT_ELEMENTWISE, | |
756 | |
757 /* A hybrid of VMAT_CONTIGUOUS and VMAT_ELEMENTWISE, used for grouped | |
758 SLP accesses. Each unrolled iteration uses a contiguous load | |
759 or store for the whole group, but the groups from separate iterations | |
760 are combined in the same way as for VMAT_ELEMENTWISE. */ | |
761 VMAT_STRIDED_SLP, | |
762 | |
763 /* The access uses gather loads or scatter stores. */ | |
764 VMAT_GATHER_SCATTER | |
765 }; | |
0 | 766 |
131 | 767 struct dr_vec_info { |
768 /* The data reference itself. */ | |
769 data_reference *dr; | |
770 /* The statement that contains the data reference. */ | |
771 stmt_vec_info stmt; | |
772 /* The misalignment in bytes of the reference, or -1 if not known. */ | |
773 int misalignment; | |
774 /* The byte alignment that we'd ideally like the reference to have, | |
775 and the value that misalignment is measured against. */ | |
776 int target_alignment; | |
777 /* If true the alignment of base_decl needs to be increased. */ | |
778 bool base_misaligned; | |
779 tree base_decl; | |
780 }; | |
781 | |
0 | 782 typedef struct data_reference *dr_p; |
783 | |
131 | 784 struct _stmt_vec_info { |
0 | 785 |
786 enum stmt_vec_info_type type; | |
787 | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
788 /* Indicates whether this stmts is part of a computation whose result is |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
789 used outside the loop. */ |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
790 bool live; |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
791 |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
792 /* Stmt is part of some pattern (computation idiom) */ |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
793 bool in_pattern_p; |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
794 |
131 | 795 /* True if the statement was created during pattern recognition as |
796 part of the replacement for RELATED_STMT. This implies that the | |
797 statement isn't part of any basic block, although for convenience | |
798 its gimple_bb is the same as for RELATED_STMT. */ | |
799 bool pattern_stmt_p; | |
800 | |
111 | 801 /* Is this statement vectorizable or should it be skipped in (partial) |
802 vectorization. */ | |
803 bool vectorizable; | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
804 |
0 | 805 /* The stmt to which this info struct refers to. */ |
111 | 806 gimple *stmt; |
0 | 807 |
111 | 808 /* The vec_info with respect to which STMT is vectorized. */ |
809 vec_info *vinfo; | |
0 | 810 |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
811 /* The vector type to be used for the LHS of this statement. */ |
0 | 812 tree vectype; |
813 | |
814 /* The vectorized version of the stmt. */ | |
131 | 815 stmt_vec_info vectorized_stmt; |
0 | 816 |
817 | |
111 | 818 /* The following is relevant only for stmts that contain a non-scalar |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
819 data-ref (array/pointer/struct access). A GIMPLE stmt is expected to have |
111 | 820 at most one such data-ref. */ |
0 | 821 |
131 | 822 dr_vec_info dr_aux; |
0 | 823 |
824 /* Information about the data-ref relative to this loop | |
825 nest (the loop that is being considered for vectorization). */ | |
111 | 826 innermost_loop_behavior dr_wrt_vec_loop; |
827 | |
828 /* For loop PHI nodes, the base and evolution part of it. This makes sure | |
829 this information is still available in vect_update_ivs_after_vectorizer | |
830 where we may not be able to re-analyze the PHI nodes evolution as | |
831 peeling for the prologue loop can make it unanalyzable. The evolution | |
832 part is still correct after peeling, but the base may have changed from | |
833 the version here. */ | |
834 tree loop_phi_evolution_base_unchanged; | |
835 tree loop_phi_evolution_part; | |
0 | 836 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
837 /* Used for various bookkeeping purposes, generally holding a pointer to |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
838 some other stmt S that is in some way "related" to this stmt. |
0 | 839 Current use of this field is: |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
840 If this stmt is part of a pattern (i.e. the field 'in_pattern_p' is |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
841 true): S is the "pattern stmt" that represents (and replaces) the |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
842 sequence of stmts that constitutes the pattern. Similarly, the |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
843 related_stmt of the "pattern stmt" points back to this stmt (which is |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
844 the last stmt in the original sequence of stmts that constitutes the |
0 | 845 pattern). */ |
131 | 846 stmt_vec_info related_stmt; |
111 | 847 |
131 | 848 /* Used to keep a sequence of def stmts of a pattern stmt if such exists. |
849 The sequence is attached to the original statement rather than the | |
850 pattern statement. */ | |
111 | 851 gimple_seq pattern_def_seq; |
0 | 852 |
853 /* List of datarefs that are known to have the same alignment as the dataref | |
854 of this stmt. */ | |
111 | 855 vec<dr_p> same_align_refs; |
856 | |
857 /* Selected SIMD clone's function info. First vector element | |
858 is SIMD clone's function decl, followed by a pair of trees (base + step) | |
859 for linear arguments (pair of NULLs for other arguments). */ | |
860 vec<tree> simd_clone_info; | |
0 | 861 |
862 /* Classify the def of this stmt. */ | |
863 enum vect_def_type def_type; | |
864 | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
865 /* Whether the stmt is SLPed, loop-based vectorized, or both. */ |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
866 enum slp_vect_type slp_type; |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
867 |
111 | 868 /* Interleaving and reduction chains info. */ |
869 /* First element in the group. */ | |
131 | 870 stmt_vec_info first_element; |
111 | 871 /* Pointer to the next element in the group. */ |
131 | 872 stmt_vec_info next_element; |
111 | 873 /* For data-refs, in case that two or more stmts share data-ref, this is the |
874 pointer to the previously detected stmt with the same dr. */ | |
131 | 875 stmt_vec_info same_dr_stmt; |
111 | 876 /* The size of the group. */ |
0 | 877 unsigned int size; |
878 /* For stores, number of stores from this group seen. We vectorize the last | |
879 one. */ | |
880 unsigned int store_count; | |
881 /* For loads only, the gap from the previous load. For consecutive loads, GAP | |
882 is 1. */ | |
883 unsigned int gap; | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
884 |
111 | 885 /* The minimum negative dependence distance this stmt participates in |
886 or zero if none. */ | |
887 unsigned int min_neg_dist; | |
888 | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
889 /* Not all stmts in the loop need to be vectorized. e.g, the increment |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
890 of the loop induction variable and computation of array indexes. relevant |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
891 indicates whether the stmt needs to be vectorized. */ |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
892 enum vect_relevant relevant; |
0 | 893 |
111 | 894 /* For loads if this is a gather, for stores if this is a scatter. */ |
895 bool gather_scatter_p; | |
896 | |
897 /* True if this is an access with loop-invariant stride. */ | |
898 bool strided_p; | |
899 | |
900 /* For both loads and stores. */ | |
901 bool simd_lane_access_p; | |
902 | |
903 /* Classifies how the load or store is going to be implemented | |
904 for loop vectorization. */ | |
905 vect_memory_access_type memory_access_type; | |
906 | |
907 /* For reduction loops, this is the type of reduction. */ | |
908 enum vect_reduction_type v_reduc_type; | |
909 | |
910 /* For CONST_COND_REDUCTION, record the reduc code. */ | |
911 enum tree_code const_cond_reduc_code; | |
912 | |
913 /* On a reduction PHI the reduction type as detected by | |
914 vect_force_simple_reduction. */ | |
915 enum vect_reduction_type reduc_type; | |
0 | 916 |
111 | 917 /* On a reduction PHI the def returned by vect_force_simple_reduction. |
918 On the def returned by vect_force_simple_reduction the | |
919 corresponding PHI. */ | |
131 | 920 stmt_vec_info reduc_def; |
111 | 921 |
922 /* The number of scalar stmt references from active SLP instances. */ | |
923 unsigned int num_slp_uses; | |
131 | 924 |
925 /* If nonzero, the lhs of the statement could be truncated to this | |
926 many bits without affecting any users of the result. */ | |
927 unsigned int min_output_precision; | |
928 | |
929 /* If nonzero, all non-boolean input operands have the same precision, | |
930 and they could each be truncated to this many bits without changing | |
931 the result. */ | |
932 unsigned int min_input_precision; | |
933 | |
934 /* If OPERATION_BITS is nonzero, the statement could be performed on | |
935 an integer with the sign and number of bits given by OPERATION_SIGN | |
936 and OPERATION_BITS without changing the result. */ | |
937 unsigned int operation_precision; | |
938 signop operation_sign; | |
939 }; | |
111 | 940 |
941 /* Information about a gather/scatter call. */ | |
942 struct gather_scatter_info { | |
131 | 943 /* The internal function to use for the gather/scatter operation, |
944 or IFN_LAST if a built-in function should be used instead. */ | |
945 internal_fn ifn; | |
946 | |
947 /* The FUNCTION_DECL for the built-in gather/scatter function, | |
948 or null if an internal function should be used instead. */ | |
111 | 949 tree decl; |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
950 |
111 | 951 /* The loop-invariant base value. */ |
952 tree base; | |
953 | |
954 /* The original scalar offset, which is a non-loop-invariant SSA_NAME. */ | |
955 tree offset; | |
956 | |
957 /* Each offset element should be multiplied by this amount before | |
958 being added to the base. */ | |
959 int scale; | |
960 | |
961 /* The definition type for the vectorized offset. */ | |
962 enum vect_def_type offset_dt; | |
963 | |
964 /* The type of the vectorized offset. */ | |
965 tree offset_vectype; | |
131 | 966 |
967 /* The type of the scalar elements after loading or before storing. */ | |
968 tree element_type; | |
969 | |
970 /* The type of the scalar elements being loaded or stored. */ | |
971 tree memory_type; | |
111 | 972 }; |
0 | 973 |
974 /* Access Functions. */ | |
975 #define STMT_VINFO_TYPE(S) (S)->type | |
976 #define STMT_VINFO_STMT(S) (S)->stmt | |
111 | 977 inline loop_vec_info |
978 STMT_VINFO_LOOP_VINFO (stmt_vec_info stmt_vinfo) | |
979 { | |
980 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (stmt_vinfo->vinfo)) | |
981 return loop_vinfo; | |
982 return NULL; | |
983 } | |
984 inline bb_vec_info | |
985 STMT_VINFO_BB_VINFO (stmt_vec_info stmt_vinfo) | |
986 { | |
987 if (bb_vec_info bb_vinfo = dyn_cast <bb_vec_info> (stmt_vinfo->vinfo)) | |
988 return bb_vinfo; | |
989 return NULL; | |
990 } | |
0 | 991 #define STMT_VINFO_RELEVANT(S) (S)->relevant |
992 #define STMT_VINFO_LIVE_P(S) (S)->live | |
993 #define STMT_VINFO_VECTYPE(S) (S)->vectype | |
994 #define STMT_VINFO_VEC_STMT(S) (S)->vectorized_stmt | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
995 #define STMT_VINFO_VECTORIZABLE(S) (S)->vectorizable |
131 | 996 #define STMT_VINFO_DATA_REF(S) ((S)->dr_aux.dr + 0) |
111 | 997 #define STMT_VINFO_GATHER_SCATTER_P(S) (S)->gather_scatter_p |
998 #define STMT_VINFO_STRIDED_P(S) (S)->strided_p | |
999 #define STMT_VINFO_MEMORY_ACCESS_TYPE(S) (S)->memory_access_type | |
1000 #define STMT_VINFO_SIMD_LANE_ACCESS_P(S) (S)->simd_lane_access_p | |
1001 #define STMT_VINFO_VEC_REDUCTION_TYPE(S) (S)->v_reduc_type | |
1002 #define STMT_VINFO_VEC_CONST_COND_REDUC_CODE(S) (S)->const_cond_reduc_code | |
0 | 1003 |
111 | 1004 #define STMT_VINFO_DR_WRT_VEC_LOOP(S) (S)->dr_wrt_vec_loop |
1005 #define STMT_VINFO_DR_BASE_ADDRESS(S) (S)->dr_wrt_vec_loop.base_address | |
1006 #define STMT_VINFO_DR_INIT(S) (S)->dr_wrt_vec_loop.init | |
1007 #define STMT_VINFO_DR_OFFSET(S) (S)->dr_wrt_vec_loop.offset | |
1008 #define STMT_VINFO_DR_STEP(S) (S)->dr_wrt_vec_loop.step | |
1009 #define STMT_VINFO_DR_BASE_ALIGNMENT(S) (S)->dr_wrt_vec_loop.base_alignment | |
1010 #define STMT_VINFO_DR_BASE_MISALIGNMENT(S) \ | |
1011 (S)->dr_wrt_vec_loop.base_misalignment | |
1012 #define STMT_VINFO_DR_OFFSET_ALIGNMENT(S) \ | |
1013 (S)->dr_wrt_vec_loop.offset_alignment | |
1014 #define STMT_VINFO_DR_STEP_ALIGNMENT(S) \ | |
1015 (S)->dr_wrt_vec_loop.step_alignment | |
0 | 1016 |
131 | 1017 #define STMT_VINFO_DR_INFO(S) \ |
1018 (gcc_checking_assert ((S)->dr_aux.stmt == (S)), &(S)->dr_aux) | |
1019 | |
0 | 1020 #define STMT_VINFO_IN_PATTERN_P(S) (S)->in_pattern_p |
1021 #define STMT_VINFO_RELATED_STMT(S) (S)->related_stmt | |
111 | 1022 #define STMT_VINFO_PATTERN_DEF_SEQ(S) (S)->pattern_def_seq |
0 | 1023 #define STMT_VINFO_SAME_ALIGN_REFS(S) (S)->same_align_refs |
111 | 1024 #define STMT_VINFO_SIMD_CLONE_INFO(S) (S)->simd_clone_info |
0 | 1025 #define STMT_VINFO_DEF_TYPE(S) (S)->def_type |
131 | 1026 #define STMT_VINFO_GROUPED_ACCESS(S) \ |
1027 ((S)->dr_aux.dr && DR_GROUP_FIRST_ELEMENT(S)) | |
111 | 1028 #define STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED(S) (S)->loop_phi_evolution_base_unchanged |
1029 #define STMT_VINFO_LOOP_PHI_EVOLUTION_PART(S) (S)->loop_phi_evolution_part | |
1030 #define STMT_VINFO_MIN_NEG_DIST(S) (S)->min_neg_dist | |
1031 #define STMT_VINFO_NUM_SLP_USES(S) (S)->num_slp_uses | |
1032 #define STMT_VINFO_REDUC_TYPE(S) (S)->reduc_type | |
1033 #define STMT_VINFO_REDUC_DEF(S) (S)->reduc_def | |
0 | 1034 |
131 | 1035 #define DR_GROUP_FIRST_ELEMENT(S) \ |
1036 (gcc_checking_assert ((S)->dr_aux.dr), (S)->first_element) | |
1037 #define DR_GROUP_NEXT_ELEMENT(S) \ | |
1038 (gcc_checking_assert ((S)->dr_aux.dr), (S)->next_element) | |
1039 #define DR_GROUP_SIZE(S) \ | |
1040 (gcc_checking_assert ((S)->dr_aux.dr), (S)->size) | |
1041 #define DR_GROUP_STORE_COUNT(S) \ | |
1042 (gcc_checking_assert ((S)->dr_aux.dr), (S)->store_count) | |
1043 #define DR_GROUP_GAP(S) \ | |
1044 (gcc_checking_assert ((S)->dr_aux.dr), (S)->gap) | |
1045 #define DR_GROUP_SAME_DR_STMT(S) \ | |
1046 (gcc_checking_assert ((S)->dr_aux.dr), (S)->same_dr_stmt) | |
1047 | |
1048 #define REDUC_GROUP_FIRST_ELEMENT(S) \ | |
1049 (gcc_checking_assert (!(S)->dr_aux.dr), (S)->first_element) | |
1050 #define REDUC_GROUP_NEXT_ELEMENT(S) \ | |
1051 (gcc_checking_assert (!(S)->dr_aux.dr), (S)->next_element) | |
1052 #define REDUC_GROUP_SIZE(S) \ | |
1053 (gcc_checking_assert (!(S)->dr_aux.dr), (S)->size) | |
0 | 1054 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1055 #define STMT_VINFO_RELEVANT_P(S) ((S)->relevant != vect_unused_in_scope) |
0 | 1056 |
1057 #define HYBRID_SLP_STMT(S) ((S)->slp_type == hybrid) | |
1058 #define PURE_SLP_STMT(S) ((S)->slp_type == pure_slp) | |
1059 #define STMT_SLP_TYPE(S) (S)->slp_type | |
1060 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1061 #define VECT_MAX_COST 1000 |
0 | 1062 |
1063 /* The maximum number of intermediate steps required in multi-step type | |
1064 conversion. */ | |
1065 #define MAX_INTERM_CVT_STEPS 3 | |
1066 | |
131 | 1067 #define MAX_VECTORIZATION_FACTOR INT_MAX |
111 | 1068 |
1069 /* Nonzero if TYPE represents a (scalar) boolean type or type | |
1070 in the middle-end compatible with it (unsigned precision 1 integral | |
1071 types). Used to determine which types should be vectorized as | |
1072 VECTOR_BOOLEAN_TYPE_P. */ | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1073 |
111 | 1074 #define VECT_SCALAR_BOOLEAN_TYPE_P(TYPE) \ |
1075 (TREE_CODE (TYPE) == BOOLEAN_TYPE \ | |
1076 || ((TREE_CODE (TYPE) == INTEGER_TYPE \ | |
1077 || TREE_CODE (TYPE) == ENUMERAL_TYPE) \ | |
1078 && TYPE_PRECISION (TYPE) == 1 \ | |
1079 && TYPE_UNSIGNED (TYPE))) | |
0 | 1080 |
131 | 1081 static inline bool |
1082 nested_in_vect_loop_p (struct loop *loop, stmt_vec_info stmt_info) | |
0 | 1083 { |
131 | 1084 return (loop->inner |
1085 && (loop->inner == (gimple_bb (stmt_info->stmt))->loop_father)); | |
0 | 1086 } |
1087 | |
131 | 1088 /* Return the earlier statement between STMT1_INFO and STMT2_INFO. */ |
0 | 1089 |
131 | 1090 static inline stmt_vec_info |
1091 get_earlier_stmt (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info) | |
1092 { | |
1093 gcc_checking_assert ((STMT_VINFO_IN_PATTERN_P (stmt1_info) | |
1094 || !STMT_VINFO_RELATED_STMT (stmt1_info)) | |
1095 && (STMT_VINFO_IN_PATTERN_P (stmt2_info) | |
1096 || !STMT_VINFO_RELATED_STMT (stmt2_info))); | |
0 | 1097 |
131 | 1098 if (gimple_uid (stmt1_info->stmt) < gimple_uid (stmt2_info->stmt)) |
1099 return stmt1_info; | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1100 else |
131 | 1101 return stmt2_info; |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1102 } |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1103 |
131 | 1104 /* Return the later statement between STMT1_INFO and STMT2_INFO. */ |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1105 |
131 | 1106 static inline stmt_vec_info |
1107 get_later_stmt (stmt_vec_info stmt1_info, stmt_vec_info stmt2_info) | |
1108 { | |
1109 gcc_checking_assert ((STMT_VINFO_IN_PATTERN_P (stmt1_info) | |
1110 || !STMT_VINFO_RELATED_STMT (stmt1_info)) | |
1111 && (STMT_VINFO_IN_PATTERN_P (stmt2_info) | |
1112 || !STMT_VINFO_RELATED_STMT (stmt2_info))); | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1113 |
131 | 1114 if (gimple_uid (stmt1_info->stmt) > gimple_uid (stmt2_info->stmt)) |
1115 return stmt1_info; | |
0 | 1116 else |
131 | 1117 return stmt2_info; |
0 | 1118 } |
1119 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1120 /* Return TRUE if a statement represented by STMT_INFO is a part of a |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1121 pattern. */ |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1122 |
0 | 1123 static inline bool |
1124 is_pattern_stmt_p (stmt_vec_info stmt_info) | |
1125 { | |
131 | 1126 return stmt_info->pattern_stmt_p; |
1127 } | |
1128 | |
1129 /* If STMT_INFO is a pattern statement, return the statement that it | |
1130 replaces, otherwise return STMT_INFO itself. */ | |
0 | 1131 |
131 | 1132 inline stmt_vec_info |
1133 vect_orig_stmt (stmt_vec_info stmt_info) | |
1134 { | |
1135 if (is_pattern_stmt_p (stmt_info)) | |
1136 return STMT_VINFO_RELATED_STMT (stmt_info); | |
1137 return stmt_info; | |
1138 } | |
0 | 1139 |
131 | 1140 /* If STMT_INFO has been replaced by a pattern statement, return the |
1141 replacement statement, otherwise return STMT_INFO itself. */ | |
1142 | |
1143 inline stmt_vec_info | |
1144 vect_stmt_to_vectorize (stmt_vec_info stmt_info) | |
1145 { | |
1146 if (STMT_VINFO_IN_PATTERN_P (stmt_info)) | |
1147 return STMT_VINFO_RELATED_STMT (stmt_info); | |
1148 return stmt_info; | |
0 | 1149 } |
1150 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1151 /* Return true if BB is a loop header. */ |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1152 |
0 | 1153 static inline bool |
1154 is_loop_header_bb_p (basic_block bb) | |
1155 { | |
1156 if (bb == (bb->loop_father)->header) | |
1157 return true; | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1158 gcc_checking_assert (EDGE_COUNT (bb->preds) == 1); |
0 | 1159 return false; |
1160 } | |
1161 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1162 /* Return pow2 (X). */ |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1163 |
0 | 1164 static inline int |
1165 vect_pow2 (int x) | |
1166 { | |
1167 int i, res = 1; | |
1168 | |
1169 for (i = 0; i < x; i++) | |
1170 res *= 2; | |
1171 | |
1172 return res; | |
1173 } | |
1174 | |
111 | 1175 /* Alias targetm.vectorize.builtin_vectorization_cost. */ |
1176 | |
1177 static inline int | |
1178 builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, | |
1179 tree vectype, int misalign) | |
1180 { | |
1181 return targetm.vectorize.builtin_vectorization_cost (type_of_cost, | |
1182 vectype, misalign); | |
1183 } | |
1184 | |
1185 /* Get cost by calling cost target builtin. */ | |
1186 | |
1187 static inline | |
1188 int vect_get_stmt_cost (enum vect_cost_for_stmt type_of_cost) | |
1189 { | |
1190 return builtin_vectorization_cost (type_of_cost, NULL, 0); | |
1191 } | |
1192 | |
1193 /* Alias targetm.vectorize.init_cost. */ | |
1194 | |
1195 static inline void * | |
1196 init_cost (struct loop *loop_info) | |
1197 { | |
1198 return targetm.vectorize.init_cost (loop_info); | |
1199 } | |
1200 | |
131 | 1201 extern void dump_stmt_cost (FILE *, void *, int, enum vect_cost_for_stmt, |
1202 stmt_vec_info, int, unsigned, | |
1203 enum vect_cost_model_location); | |
1204 | |
111 | 1205 /* Alias targetm.vectorize.add_stmt_cost. */ |
1206 | |
1207 static inline unsigned | |
1208 add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind, | |
1209 stmt_vec_info stmt_info, int misalign, | |
1210 enum vect_cost_model_location where) | |
1211 { | |
131 | 1212 unsigned cost = targetm.vectorize.add_stmt_cost (data, count, kind, |
1213 stmt_info, misalign, where); | |
1214 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1215 dump_stmt_cost (dump_file, data, count, kind, stmt_info, misalign, | |
1216 cost, where); | |
1217 return cost; | |
111 | 1218 } |
1219 | |
1220 /* Alias targetm.vectorize.finish_cost. */ | |
1221 | |
1222 static inline void | |
1223 finish_cost (void *data, unsigned *prologue_cost, | |
1224 unsigned *body_cost, unsigned *epilogue_cost) | |
1225 { | |
1226 targetm.vectorize.finish_cost (data, prologue_cost, body_cost, epilogue_cost); | |
1227 } | |
1228 | |
1229 /* Alias targetm.vectorize.destroy_cost_data. */ | |
1230 | |
1231 static inline void | |
1232 destroy_cost_data (void *data) | |
1233 { | |
1234 targetm.vectorize.destroy_cost_data (data); | |
1235 } | |
1236 | |
131 | 1237 inline void |
1238 add_stmt_costs (void *data, stmt_vector_for_cost *cost_vec) | |
1239 { | |
1240 stmt_info_for_cost *cost; | |
1241 unsigned i; | |
1242 FOR_EACH_VEC_ELT (*cost_vec, i, cost) | |
1243 add_stmt_cost (data, cost->count, cost->kind, cost->stmt_info, | |
1244 cost->misalign, cost->where); | |
1245 } | |
1246 | |
0 | 1247 /*-----------------------------------------------------------------*/ |
1248 /* Info on data references alignment. */ | |
1249 /*-----------------------------------------------------------------*/ | |
131 | 1250 #define DR_MISALIGNMENT_UNKNOWN (-1) |
1251 #define DR_MISALIGNMENT_UNINITIALIZED (-2) | |
111 | 1252 |
131 | 1253 inline void |
1254 set_dr_misalignment (dr_vec_info *dr_info, int val) | |
1255 { | |
1256 dr_info->misalignment = val; | |
111 | 1257 } |
1258 | |
1259 inline int | |
131 | 1260 dr_misalignment (dr_vec_info *dr_info) |
111 | 1261 { |
131 | 1262 int misalign = dr_info->misalignment; |
1263 gcc_assert (misalign != DR_MISALIGNMENT_UNINITIALIZED); | |
1264 return misalign; | |
111 | 1265 } |
0 | 1266 |
1267 /* Reflects actual alignment of first access in the vectorized loop, | |
1268 taking into account peeling/versioning if applied. */ | |
111 | 1269 #define DR_MISALIGNMENT(DR) dr_misalignment (DR) |
1270 #define SET_DR_MISALIGNMENT(DR, VAL) set_dr_misalignment (DR, VAL) | |
0 | 1271 |
111 | 1272 /* Only defined once DR_MISALIGNMENT is defined. */ |
131 | 1273 #define DR_TARGET_ALIGNMENT(DR) ((DR)->target_alignment) |
111 | 1274 |
131 | 1275 /* Return true if data access DR_INFO is aligned to its target alignment |
111 | 1276 (which may be less than a full vector). */ |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1277 |
0 | 1278 static inline bool |
131 | 1279 aligned_access_p (dr_vec_info *dr_info) |
0 | 1280 { |
131 | 1281 return (DR_MISALIGNMENT (dr_info) == 0); |
0 | 1282 } |
1283 | |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1284 /* Return TRUE if the alignment of the data access is known, and FALSE |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1285 otherwise. */ |
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
1286 |
0 | 1287 static inline bool |
131 | 1288 known_alignment_for_access_p (dr_vec_info *dr_info) |
0 | 1289 { |
131 | 1290 return (DR_MISALIGNMENT (dr_info) != DR_MISALIGNMENT_UNKNOWN); |
111 | 1291 } |
1292 | |
1293 /* Return the minimum alignment in bytes that the vectorized version | |
131 | 1294 of DR_INFO is guaranteed to have. */ |
111 | 1295 |
1296 static inline unsigned int | |
131 | 1297 vect_known_alignment_in_bytes (dr_vec_info *dr_info) |
111 | 1298 { |
131 | 1299 if (DR_MISALIGNMENT (dr_info) == DR_MISALIGNMENT_UNKNOWN) |
1300 return TYPE_ALIGN_UNIT (TREE_TYPE (DR_REF (dr_info->dr))); | |
1301 if (DR_MISALIGNMENT (dr_info) == 0) | |
1302 return DR_TARGET_ALIGNMENT (dr_info); | |
1303 return DR_MISALIGNMENT (dr_info) & -DR_MISALIGNMENT (dr_info); | |
111 | 1304 } |
1305 | |
131 | 1306 /* Return the behavior of DR_INFO with respect to the vectorization context |
111 | 1307 (which for outer loop vectorization might not be the behavior recorded |
131 | 1308 in DR_INFO itself). */ |
111 | 1309 |
1310 static inline innermost_loop_behavior * | |
131 | 1311 vect_dr_behavior (dr_vec_info *dr_info) |
111 | 1312 { |
131 | 1313 stmt_vec_info stmt_info = dr_info->stmt; |
111 | 1314 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); |
1315 if (loop_vinfo == NULL | |
131 | 1316 || !nested_in_vect_loop_p (LOOP_VINFO_LOOP (loop_vinfo), stmt_info)) |
1317 return &DR_INNERMOST (dr_info->dr); | |
111 | 1318 else |
1319 return &STMT_VINFO_DR_WRT_VEC_LOOP (stmt_info); | |
0 | 1320 } |
1321 | |
111 | 1322 /* Return true if the vect cost model is unlimited. */ |
1323 static inline bool | |
1324 unlimited_cost_model (loop_p loop) | |
1325 { | |
1326 if (loop != NULL && loop->force_vectorize | |
1327 && flag_simd_cost_model != VECT_COST_MODEL_DEFAULT) | |
1328 return flag_simd_cost_model == VECT_COST_MODEL_UNLIMITED; | |
1329 return (flag_vect_cost_model == VECT_COST_MODEL_UNLIMITED); | |
1330 } | |
1331 | |
131 | 1332 /* Return true if the loop described by LOOP_VINFO is fully-masked and |
1333 if the first iteration should use a partial mask in order to achieve | |
1334 alignment. */ | |
1335 | |
1336 static inline bool | |
1337 vect_use_loop_mask_for_alignment_p (loop_vec_info loop_vinfo) | |
1338 { | |
1339 return (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) | |
1340 && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)); | |
1341 } | |
1342 | |
1343 /* Return the number of vectors of type VECTYPE that are needed to get | |
1344 NUNITS elements. NUNITS should be based on the vectorization factor, | |
1345 so it is always a known multiple of the number of elements in VECTYPE. */ | |
1346 | |
1347 static inline unsigned int | |
1348 vect_get_num_vectors (poly_uint64 nunits, tree vectype) | |
1349 { | |
1350 return exact_div (nunits, TYPE_VECTOR_SUBPARTS (vectype)).to_constant (); | |
1351 } | |
1352 | |
111 | 1353 /* Return the number of copies needed for loop vectorization when |
1354 a statement operates on vectors of type VECTYPE. This is the | |
1355 vectorization factor divided by the number of elements in | |
1356 VECTYPE and is always known at compile time. */ | |
1357 | |
1358 static inline unsigned int | |
1359 vect_get_num_copies (loop_vec_info loop_vinfo, tree vectype) | |
1360 { | |
131 | 1361 return vect_get_num_vectors (LOOP_VINFO_VECT_FACTOR (loop_vinfo), vectype); |
1362 } | |
1363 | |
1364 /* Update maximum unit count *MAX_NUNITS so that it accounts for | |
1365 the number of units in vector type VECTYPE. *MAX_NUNITS can be 1 | |
1366 if we haven't yet recorded any vector types. */ | |
1367 | |
1368 static inline void | |
1369 vect_update_max_nunits (poly_uint64 *max_nunits, tree vectype) | |
1370 { | |
1371 /* All unit counts have the form current_vector_size * X for some | |
1372 rational X, so two unit sizes must have a common multiple. | |
1373 Everything is a multiple of the initial value of 1. */ | |
1374 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); | |
1375 *max_nunits = force_common_multiple (*max_nunits, nunits); | |
111 | 1376 } |
1377 | |
131 | 1378 /* Return the vectorization factor that should be used for costing |
1379 purposes while vectorizing the loop described by LOOP_VINFO. | |
1380 Pick a reasonable estimate if the vectorization factor isn't | |
1381 known at compile time. */ | |
1382 | |
1383 static inline unsigned int | |
1384 vect_vf_for_cost (loop_vec_info loop_vinfo) | |
1385 { | |
1386 return estimated_poly_value (LOOP_VINFO_VECT_FACTOR (loop_vinfo)); | |
1387 } | |
1388 | |
1389 /* Estimate the number of elements in VEC_TYPE for costing purposes. | |
1390 Pick a reasonable estimate if the exact number isn't known at | |
1391 compile time. */ | |
1392 | |
1393 static inline unsigned int | |
1394 vect_nunits_for_cost (tree vec_type) | |
1395 { | |
1396 return estimated_poly_value (TYPE_VECTOR_SUBPARTS (vec_type)); | |
1397 } | |
1398 | |
1399 /* Return the maximum possible vectorization factor for LOOP_VINFO. */ | |
1400 | |
1401 static inline unsigned HOST_WIDE_INT | |
1402 vect_max_vf (loop_vec_info loop_vinfo) | |
1403 { | |
1404 unsigned HOST_WIDE_INT vf; | |
1405 if (LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf)) | |
1406 return vf; | |
1407 return MAX_VECTORIZATION_FACTOR; | |
1408 } | |
1409 | |
1410 /* Return the size of the value accessed by unvectorized data reference | |
1411 DR_INFO. This is only valid once STMT_VINFO_VECTYPE has been calculated | |
1412 for the associated gimple statement, since that guarantees that DR_INFO | |
1413 accesses either a scalar or a scalar equivalent. ("Scalar equivalent" | |
1414 here includes things like V1SI, which can be vectorized in the same way | |
111 | 1415 as a plain SI.) */ |
1416 | |
1417 inline unsigned int | |
131 | 1418 vect_get_scalar_dr_size (dr_vec_info *dr_info) |
111 | 1419 { |
131 | 1420 return tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr_info->dr)))); |
111 | 1421 } |
1422 | |
131 | 1423 /* Source location + hotness information. */ |
1424 extern dump_user_location_t vect_location; | |
1425 | |
1426 /* A macro for calling: | |
1427 dump_begin_scope (MSG, vect_location); | |
1428 via an RAII object, thus printing "=== MSG ===\n" to the dumpfile etc, | |
1429 and then calling | |
1430 dump_end_scope (); | |
1431 once the object goes out of scope, thus capturing the nesting of | |
1432 the scopes. | |
1433 | |
1434 These scopes affect dump messages within them: dump messages at the | |
1435 top level implicitly default to MSG_PRIORITY_USER_FACING, whereas those | |
1436 in a nested scope implicitly default to MSG_PRIORITY_INTERNALS. */ | |
1437 | |
1438 #define DUMP_VECT_SCOPE(MSG) \ | |
1439 AUTO_DUMP_SCOPE (MSG, vect_location) | |
0 | 1440 |
1441 /*-----------------------------------------------------------------*/ | |
1442 /* Function prototypes. */ | |
1443 /*-----------------------------------------------------------------*/ | |
1444 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1445 /* Simple loop peeling and versioning utilities for vectorizer's purposes - |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1446 in tree-vect-loop-manip.c. */ |
131 | 1447 extern void vect_set_loop_condition (struct loop *, loop_vec_info, |
1448 tree, tree, tree, bool); | |
0 | 1449 extern bool slpeel_can_duplicate_loop_p (const struct loop *, const_edge); |
111 | 1450 struct loop *slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *, |
1451 struct loop *, edge); | |
131 | 1452 extern void vect_loop_versioning (loop_vec_info, unsigned int, bool, |
1453 poly_uint64); | |
111 | 1454 extern struct loop *vect_do_peeling (loop_vec_info, tree, tree, |
131 | 1455 tree *, tree *, tree *, int, bool, bool); |
1456 extern void vect_prepare_for_masked_peels (loop_vec_info); | |
1457 extern dump_user_location_t find_loop_location (struct loop *); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1458 extern bool vect_can_advance_ivs_p (loop_vec_info); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1459 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1460 /* In tree-vect-stmts.c. */ |
131 | 1461 extern poly_uint64 current_vector_size; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1462 extern tree get_vectype_for_scalar_type (tree); |
131 | 1463 extern tree get_vectype_for_scalar_type_and_size (tree, poly_uint64); |
111 | 1464 extern tree get_mask_type_for_scalar_type (tree); |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1465 extern tree get_same_sized_vectype (tree, tree); |
131 | 1466 extern bool vect_get_loop_mask_type (loop_vec_info); |
1467 extern bool vect_is_simple_use (tree, vec_info *, enum vect_def_type *, | |
1468 stmt_vec_info * = NULL, gimple ** = NULL); | |
1469 extern bool vect_is_simple_use (tree, vec_info *, enum vect_def_type *, | |
1470 tree *, stmt_vec_info * = NULL, | |
1471 gimple ** = NULL); | |
1472 extern bool supportable_widening_operation (enum tree_code, stmt_vec_info, | |
1473 tree, tree, enum tree_code *, | |
111 | 1474 enum tree_code *, int *, |
1475 vec<tree> *); | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1476 extern bool supportable_narrowing_operation (enum tree_code, tree, tree, |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1477 enum tree_code *, |
111 | 1478 int *, vec<tree> *); |
1479 extern unsigned record_stmt_cost (stmt_vector_for_cost *, int, | |
1480 enum vect_cost_for_stmt, stmt_vec_info, | |
1481 int, enum vect_cost_model_location); | |
131 | 1482 extern stmt_vec_info vect_finish_replace_stmt (stmt_vec_info, gimple *); |
1483 extern stmt_vec_info vect_finish_stmt_generation (stmt_vec_info, gimple *, | |
1484 gimple_stmt_iterator *); | |
1485 extern opt_result vect_mark_stmts_to_be_vectorized (loop_vec_info); | |
1486 extern tree vect_get_store_rhs (stmt_vec_info); | |
1487 extern tree vect_get_vec_def_for_operand_1 (stmt_vec_info, enum vect_def_type); | |
1488 extern tree vect_get_vec_def_for_operand (tree, stmt_vec_info, tree = NULL); | |
1489 extern void vect_get_vec_defs (tree, tree, stmt_vec_info, vec<tree> *, | |
111 | 1490 vec<tree> *, slp_tree); |
131 | 1491 extern void vect_get_vec_defs_for_stmt_copy (vec_info *, |
111 | 1492 vec<tree> *, vec<tree> *); |
131 | 1493 extern tree vect_init_vector (stmt_vec_info, tree, tree, |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1494 gimple_stmt_iterator *); |
131 | 1495 extern tree vect_get_vec_def_for_stmt_copy (vec_info *, tree); |
1496 extern bool vect_transform_stmt (stmt_vec_info, gimple_stmt_iterator *, | |
1497 slp_tree, slp_instance); | |
1498 extern void vect_remove_stores (stmt_vec_info); | |
1499 extern opt_result vect_analyze_stmt (stmt_vec_info, bool *, slp_tree, | |
1500 slp_instance, stmt_vector_for_cost *); | |
1501 extern bool vectorizable_condition (stmt_vec_info, gimple_stmt_iterator *, | |
1502 stmt_vec_info *, tree, int, slp_tree, | |
1503 stmt_vector_for_cost *); | |
1504 extern void vect_get_load_cost (stmt_vec_info, int, bool, | |
111 | 1505 unsigned int *, unsigned int *, |
1506 stmt_vector_for_cost *, | |
1507 stmt_vector_for_cost *, bool); | |
131 | 1508 extern void vect_get_store_cost (stmt_vec_info, int, |
111 | 1509 unsigned int *, stmt_vector_for_cost *); |
1510 extern bool vect_supportable_shift (enum tree_code, tree); | |
131 | 1511 extern tree vect_gen_perm_mask_any (tree, const vec_perm_indices &); |
1512 extern tree vect_gen_perm_mask_checked (tree, const vec_perm_indices &); | |
111 | 1513 extern void optimize_mask_stores (struct loop*); |
131 | 1514 extern gcall *vect_gen_while (tree, tree, tree); |
1515 extern tree vect_gen_while_not (gimple_seq *, tree, tree, tree); | |
1516 extern opt_result vect_get_vector_types_for_stmt (stmt_vec_info, tree *, | |
1517 tree *); | |
1518 extern opt_tree vect_get_mask_type_for_stmt (stmt_vec_info); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1519 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1520 /* In tree-vect-data-refs.c. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1521 extern bool vect_can_force_dr_alignment_p (const_tree, unsigned int); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1522 extern enum dr_alignment_support vect_supportable_dr_alignment |
131 | 1523 (dr_vec_info *, bool); |
1524 extern tree vect_get_smallest_scalar_type (stmt_vec_info, HOST_WIDE_INT *, | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1525 HOST_WIDE_INT *); |
131 | 1526 extern opt_result vect_analyze_data_ref_dependences (loop_vec_info, unsigned int *); |
111 | 1527 extern bool vect_slp_analyze_instance_dependence (slp_instance); |
131 | 1528 extern opt_result vect_enhance_data_refs_alignment (loop_vec_info); |
1529 extern opt_result vect_analyze_data_refs_alignment (loop_vec_info); | |
1530 extern opt_result vect_verify_datarefs_alignment (loop_vec_info); | |
111 | 1531 extern bool vect_slp_analyze_and_verify_instance_alignment (slp_instance); |
131 | 1532 extern opt_result vect_analyze_data_ref_accesses (vec_info *); |
1533 extern opt_result vect_prune_runtime_alias_test_list (loop_vec_info); | |
1534 extern bool vect_gather_scatter_fn_p (bool, bool, tree, tree, unsigned int, | |
1535 signop, int, internal_fn *, tree *); | |
1536 extern bool vect_check_gather_scatter (stmt_vec_info, loop_vec_info, | |
111 | 1537 gather_scatter_info *); |
131 | 1538 extern opt_result vect_find_stmt_data_reference (loop_p, gimple *, |
1539 vec<data_reference_p> *); | |
1540 extern opt_result vect_analyze_data_refs (vec_info *, poly_uint64 *); | |
111 | 1541 extern void vect_record_base_alignments (vec_info *); |
131 | 1542 extern tree vect_create_data_ref_ptr (stmt_vec_info, tree, struct loop *, tree, |
111 | 1543 tree *, gimple_stmt_iterator *, |
131 | 1544 gimple **, bool, |
1545 tree = NULL_TREE, tree = NULL_TREE); | |
1546 extern tree bump_vector_ptr (tree, gimple *, gimple_stmt_iterator *, | |
1547 stmt_vec_info, tree); | |
1548 extern void vect_copy_ref_info (tree, tree); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1549 extern tree vect_create_destination_var (tree, tree); |
111 | 1550 extern bool vect_grouped_store_supported (tree, unsigned HOST_WIDE_INT); |
131 | 1551 extern bool vect_store_lanes_supported (tree, unsigned HOST_WIDE_INT, bool); |
111 | 1552 extern bool vect_grouped_load_supported (tree, bool, unsigned HOST_WIDE_INT); |
131 | 1553 extern bool vect_load_lanes_supported (tree, unsigned HOST_WIDE_INT, bool); |
1554 extern void vect_permute_store_chain (vec<tree> ,unsigned int, stmt_vec_info, | |
111 | 1555 gimple_stmt_iterator *, vec<tree> *); |
131 | 1556 extern tree vect_setup_realignment (stmt_vec_info, gimple_stmt_iterator *, |
1557 tree *, enum dr_alignment_support, tree, | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1558 struct loop **); |
131 | 1559 extern void vect_transform_grouped_load (stmt_vec_info, vec<tree> , int, |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1560 gimple_stmt_iterator *); |
131 | 1561 extern void vect_record_grouped_load_vectors (stmt_vec_info, vec<tree>); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1562 extern tree vect_get_new_vect_var (tree, enum vect_var_kind, const char *); |
111 | 1563 extern tree vect_get_new_ssa_name (tree, enum vect_var_kind, |
1564 const char * = NULL); | |
131 | 1565 extern tree vect_create_addr_base_for_vector_ref (stmt_vec_info, gimple_seq *, |
111 | 1566 tree, tree = NULL_TREE); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1567 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1568 /* In tree-vect-loop.c. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1569 /* FORNOW: Used in tree-parloops.c. */ |
131 | 1570 extern stmt_vec_info vect_force_simple_reduction (loop_vec_info, stmt_vec_info, |
1571 bool *, bool); | |
1572 /* Used in gimple-loop-interchange.c. */ | |
1573 extern bool check_reduction_path (dump_user_location_t, loop_p, gphi *, tree, | |
1574 enum tree_code); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1575 /* Drive for loop analysis stage. */ |
131 | 1576 extern opt_loop_vec_info vect_analyze_loop (struct loop *, |
1577 loop_vec_info, | |
1578 vec_info_shared *); | |
111 | 1579 extern tree vect_build_loop_niters (loop_vec_info, bool * = NULL); |
131 | 1580 extern void vect_gen_vector_loop_niters (loop_vec_info, tree, tree *, |
1581 tree *, bool); | |
1582 extern tree vect_halve_mask_nunits (tree); | |
1583 extern tree vect_double_mask_nunits (tree); | |
1584 extern void vect_record_loop_mask (loop_vec_info, vec_loop_masks *, | |
1585 unsigned int, tree); | |
1586 extern tree vect_get_loop_mask (gimple_stmt_iterator *, vec_loop_masks *, | |
1587 unsigned int, tree, unsigned int); | |
1588 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1589 /* Drive for loop transformation stage. */ |
111 | 1590 extern struct loop *vect_transform_loop (loop_vec_info); |
131 | 1591 extern opt_loop_vec_info vect_analyze_loop_form (struct loop *, |
1592 vec_info_shared *); | |
1593 extern bool vectorizable_live_operation (stmt_vec_info, gimple_stmt_iterator *, | |
1594 slp_tree, int, stmt_vec_info *, | |
1595 stmt_vector_for_cost *); | |
1596 extern bool vectorizable_reduction (stmt_vec_info, gimple_stmt_iterator *, | |
1597 stmt_vec_info *, slp_tree, slp_instance, | |
1598 stmt_vector_for_cost *); | |
1599 extern bool vectorizable_induction (stmt_vec_info, gimple_stmt_iterator *, | |
1600 stmt_vec_info *, slp_tree, | |
1601 stmt_vector_for_cost *); | |
1602 extern tree get_initial_def_for_reduction (stmt_vec_info, tree, tree *); | |
111 | 1603 extern bool vect_worthwhile_without_simd_p (vec_info *, tree_code); |
1604 extern int vect_get_known_peeling_cost (loop_vec_info, int, int *, | |
1605 stmt_vector_for_cost *, | |
1606 stmt_vector_for_cost *, | |
1607 stmt_vector_for_cost *); | |
131 | 1608 extern tree cse_and_gimplify_to_preheader (loop_vec_info, tree); |
0 | 1609 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1610 /* In tree-vect-slp.c. */ |
131 | 1611 extern void vect_free_slp_instance (slp_instance, bool); |
111 | 1612 extern bool vect_transform_slp_perm_load (slp_tree, vec<tree> , |
131 | 1613 gimple_stmt_iterator *, poly_uint64, |
1614 slp_instance, bool, unsigned *); | |
111 | 1615 extern bool vect_slp_analyze_operations (vec_info *); |
131 | 1616 extern void vect_schedule_slp (vec_info *); |
1617 extern opt_result vect_analyze_slp (vec_info *, unsigned); | |
111 | 1618 extern bool vect_make_slp_decision (loop_vec_info); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1619 extern void vect_detect_hybrid_slp (loop_vec_info); |
111 | 1620 extern void vect_get_slp_defs (vec<tree> , slp_tree, vec<vec<tree> > *); |
1621 extern bool vect_slp_bb (basic_block); | |
131 | 1622 extern stmt_vec_info vect_find_last_scalar_stmt_in_slp (slp_tree); |
1623 extern bool is_simple_and_all_uses_invariant (stmt_vec_info, loop_vec_info); | |
1624 extern bool can_duplicate_and_interleave_p (unsigned int, machine_mode, | |
1625 unsigned int * = NULL, | |
1626 tree * = NULL, tree * = NULL); | |
1627 extern void duplicate_and_interleave (gimple_seq *, tree, vec<tree>, | |
1628 unsigned int, vec<tree> &); | |
1629 extern int vect_get_place_in_interleaving_chain (stmt_vec_info, stmt_vec_info); | |
0 | 1630 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1631 /* In tree-vect-patterns.c. */ |
0 | 1632 /* Pattern recognition functions. |
1633 Additional pattern recognition functions can (and will) be added | |
1634 in the future. */ | |
111 | 1635 void vect_pattern_recog (vec_info *); |
0 | 1636 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1637 /* In tree-vectorizer.c. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1638 unsigned vectorize_loops (void); |
111 | 1639 void vect_free_loop_info_assumptions (struct loop *); |
0 | 1640 |
1641 #endif /* GCC_TREE_VECTORIZER_H */ |