Mercurial > hg > CbC > CbC_gcc
comparison gcc/tree-vect-slp.c @ 131:84e7813d76e9
gcc-8.2
author | mir3636 |
---|---|
date | Thu, 25 Oct 2018 07:37:49 +0900 |
parents | 04ced10e8804 |
children | 1830386684a0 |
comparison
equal
deleted
inserted
replaced
111:04ced10e8804 | 131:84e7813d76e9 |
---|---|
1 /* SLP - Basic Block Vectorization | 1 /* SLP - Basic Block Vectorization |
2 Copyright (C) 2007-2017 Free Software Foundation, Inc. | 2 Copyright (C) 2007-2018 Free Software Foundation, Inc. |
3 Contributed by Dorit Naishlos <dorit@il.ibm.com> | 3 Contributed by Dorit Naishlos <dorit@il.ibm.com> |
4 and Ira Rosen <irar@il.ibm.com> | 4 and Ira Rosen <irar@il.ibm.com> |
5 | 5 |
6 This file is part of GCC. | 6 This file is part of GCC. |
7 | 7 |
39 #include "cfgloop.h" | 39 #include "cfgloop.h" |
40 #include "tree-vectorizer.h" | 40 #include "tree-vectorizer.h" |
41 #include "langhooks.h" | 41 #include "langhooks.h" |
42 #include "gimple-walk.h" | 42 #include "gimple-walk.h" |
43 #include "dbgcnt.h" | 43 #include "dbgcnt.h" |
44 | 44 #include "tree-vector-builder.h" |
45 | 45 #include "vec-perm-indices.h" |
46 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */ | 46 #include "gimple-fold.h" |
47 #include "internal-fn.h" | |
48 | |
49 | |
50 /* Recursively free the memory allocated for the SLP tree rooted at NODE. | |
51 FINAL_P is true if we have vectorized the instance or if we have | |
52 made a final decision not to vectorize the statements in any way. */ | |
47 | 53 |
48 static void | 54 static void |
49 vect_free_slp_tree (slp_tree node) | 55 vect_free_slp_tree (slp_tree node, bool final_p) |
50 { | 56 { |
51 int i; | 57 int i; |
52 slp_tree child; | 58 slp_tree child; |
53 | 59 |
54 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) | 60 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |
55 vect_free_slp_tree (child); | 61 vect_free_slp_tree (child, final_p); |
56 | 62 |
57 gimple *stmt; | 63 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant. |
58 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt) | 64 Some statements might no longer exist, after having been |
59 /* After transform some stmts are removed and thus their vinfo is gone. */ | 65 removed by vect_transform_stmt. Updating the remaining |
60 if (vinfo_for_stmt (stmt)) | 66 statements would be redundant. */ |
61 { | 67 if (!final_p) |
62 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) > 0); | 68 { |
63 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))--; | 69 stmt_vec_info stmt_info; |
64 } | 70 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info) |
71 { | |
72 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info) > 0); | |
73 STMT_VINFO_NUM_SLP_USES (stmt_info)--; | |
74 } | |
75 } | |
65 | 76 |
66 SLP_TREE_CHILDREN (node).release (); | 77 SLP_TREE_CHILDREN (node).release (); |
67 SLP_TREE_SCALAR_STMTS (node).release (); | 78 SLP_TREE_SCALAR_STMTS (node).release (); |
68 SLP_TREE_VEC_STMTS (node).release (); | 79 SLP_TREE_VEC_STMTS (node).release (); |
69 SLP_TREE_LOAD_PERMUTATION (node).release (); | 80 SLP_TREE_LOAD_PERMUTATION (node).release (); |
70 | 81 |
71 free (node); | 82 free (node); |
72 } | 83 } |
73 | 84 |
74 | 85 |
75 /* Free the memory allocated for the SLP instance. */ | 86 /* Free the memory allocated for the SLP instance. FINAL_P is true if we |
87 have vectorized the instance or if we have made a final decision not | |
88 to vectorize the statements in any way. */ | |
76 | 89 |
77 void | 90 void |
78 vect_free_slp_instance (slp_instance instance) | 91 vect_free_slp_instance (slp_instance instance, bool final_p) |
79 { | 92 { |
80 vect_free_slp_tree (SLP_INSTANCE_TREE (instance)); | 93 vect_free_slp_tree (SLP_INSTANCE_TREE (instance), final_p); |
81 SLP_INSTANCE_LOADS (instance).release (); | 94 SLP_INSTANCE_LOADS (instance).release (); |
82 free (instance); | 95 free (instance); |
83 } | 96 } |
84 | 97 |
85 | 98 |
86 /* Create an SLP node for SCALAR_STMTS. */ | 99 /* Create an SLP node for SCALAR_STMTS. */ |
87 | 100 |
88 static slp_tree | 101 static slp_tree |
89 vect_create_new_slp_node (vec<gimple *> scalar_stmts) | 102 vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts) |
90 { | 103 { |
91 slp_tree node; | 104 slp_tree node; |
92 gimple *stmt = scalar_stmts[0]; | 105 stmt_vec_info stmt_info = scalar_stmts[0]; |
93 unsigned int nops; | 106 unsigned int nops; |
94 | 107 |
95 if (is_gimple_call (stmt)) | 108 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt)) |
96 nops = gimple_call_num_args (stmt); | 109 nops = gimple_call_num_args (stmt); |
97 else if (is_gimple_assign (stmt)) | 110 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt)) |
98 { | 111 { |
99 nops = gimple_num_ops (stmt) - 1; | 112 nops = gimple_num_ops (stmt) - 1; |
100 if (gimple_assign_rhs_code (stmt) == COND_EXPR) | 113 if (gimple_assign_rhs_code (stmt) == COND_EXPR) |
101 nops++; | 114 nops++; |
102 } | 115 } |
103 else if (gimple_code (stmt) == GIMPLE_PHI) | 116 else if (is_a <gphi *> (stmt_info->stmt)) |
104 nops = 0; | 117 nops = 0; |
105 else | 118 else |
106 return NULL; | 119 return NULL; |
107 | 120 |
108 node = XNEW (struct _slp_tree); | 121 node = XNEW (struct _slp_tree); |
109 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts; | 122 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts; |
110 SLP_TREE_VEC_STMTS (node).create (0); | 123 SLP_TREE_VEC_STMTS (node).create (0); |
124 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0; | |
111 SLP_TREE_CHILDREN (node).create (nops); | 125 SLP_TREE_CHILDREN (node).create (nops); |
112 SLP_TREE_LOAD_PERMUTATION (node) = vNULL; | 126 SLP_TREE_LOAD_PERMUTATION (node) = vNULL; |
113 SLP_TREE_TWO_OPERATORS (node) = false; | 127 SLP_TREE_TWO_OPERATORS (node) = false; |
114 SLP_TREE_DEF_TYPE (node) = vect_internal_def; | 128 SLP_TREE_DEF_TYPE (node) = vect_internal_def; |
115 | 129 |
116 unsigned i; | 130 unsigned i; |
117 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt) | 131 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info) |
118 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))++; | 132 STMT_VINFO_NUM_SLP_USES (stmt_info)++; |
119 | 133 |
120 return node; | 134 return node; |
121 } | 135 } |
122 | 136 |
123 | 137 |
125 corresponds to the same operand in a group of scalar stmts in an SLP | 139 corresponds to the same operand in a group of scalar stmts in an SLP |
126 node. */ | 140 node. */ |
127 typedef struct _slp_oprnd_info | 141 typedef struct _slp_oprnd_info |
128 { | 142 { |
129 /* Def-stmts for the operands. */ | 143 /* Def-stmts for the operands. */ |
130 vec<gimple *> def_stmts; | 144 vec<stmt_vec_info> def_stmts; |
131 /* Information about the first statement, its vector def-type, type, the | 145 /* Information about the first statement, its vector def-type, type, the |
132 operand itself in case it's constant, and an indication if it's a pattern | 146 operand itself in case it's constant, and an indication if it's a pattern |
133 stmt. */ | 147 stmt. */ |
134 tree first_op_type; | 148 tree first_op_type; |
135 enum vect_def_type first_dt; | 149 enum vect_def_type first_dt; |
179 | 193 |
180 oprnds_info.release (); | 194 oprnds_info.release (); |
181 } | 195 } |
182 | 196 |
183 | 197 |
184 /* Find the place of the data-ref in STMT in the interleaving chain that starts | 198 /* Find the place of the data-ref in STMT_INFO in the interleaving chain |
185 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */ | 199 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part |
186 | 200 of the chain. */ |
187 static int | 201 |
188 vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt) | 202 int |
189 { | 203 vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info, |
190 gimple *next_stmt = first_stmt; | 204 stmt_vec_info first_stmt_info) |
205 { | |
206 stmt_vec_info next_stmt_info = first_stmt_info; | |
191 int result = 0; | 207 int result = 0; |
192 | 208 |
193 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))) | 209 if (first_stmt_info != DR_GROUP_FIRST_ELEMENT (stmt_info)) |
194 return -1; | 210 return -1; |
195 | 211 |
196 do | 212 do |
197 { | 213 { |
198 if (next_stmt == stmt) | 214 if (next_stmt_info == stmt_info) |
199 return result; | 215 return result; |
200 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt)); | 216 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info); |
201 if (next_stmt) | 217 if (next_stmt_info) |
202 result += GROUP_GAP (vinfo_for_stmt (next_stmt)); | 218 result += DR_GROUP_GAP (next_stmt_info); |
203 } | 219 } |
204 while (next_stmt); | 220 while (next_stmt_info); |
205 | 221 |
206 return -1; | 222 return -1; |
207 } | 223 } |
208 | 224 |
225 /* Check whether it is possible to load COUNT elements of type ELT_MODE | |
226 using the method implemented by duplicate_and_interleave. Return true | |
227 if so, returning the number of intermediate vectors in *NVECTORS_OUT | |
228 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT | |
229 (if nonnull). */ | |
230 | |
231 bool | |
232 can_duplicate_and_interleave_p (unsigned int count, machine_mode elt_mode, | |
233 unsigned int *nvectors_out, | |
234 tree *vector_type_out, | |
235 tree *permutes) | |
236 { | |
237 poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode); | |
238 poly_int64 nelts; | |
239 unsigned int nvectors = 1; | |
240 for (;;) | |
241 { | |
242 scalar_int_mode int_mode; | |
243 poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT; | |
244 if (multiple_p (current_vector_size, elt_bytes, &nelts) | |
245 && int_mode_for_size (elt_bits, 0).exists (&int_mode)) | |
246 { | |
247 tree int_type = build_nonstandard_integer_type | |
248 (GET_MODE_BITSIZE (int_mode), 1); | |
249 tree vector_type = build_vector_type (int_type, nelts); | |
250 if (VECTOR_MODE_P (TYPE_MODE (vector_type))) | |
251 { | |
252 vec_perm_builder sel1 (nelts, 2, 3); | |
253 vec_perm_builder sel2 (nelts, 2, 3); | |
254 poly_int64 half_nelts = exact_div (nelts, 2); | |
255 for (unsigned int i = 0; i < 3; ++i) | |
256 { | |
257 sel1.quick_push (i); | |
258 sel1.quick_push (i + nelts); | |
259 sel2.quick_push (half_nelts + i); | |
260 sel2.quick_push (half_nelts + i + nelts); | |
261 } | |
262 vec_perm_indices indices1 (sel1, 2, nelts); | |
263 vec_perm_indices indices2 (sel2, 2, nelts); | |
264 if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1) | |
265 && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2)) | |
266 { | |
267 if (nvectors_out) | |
268 *nvectors_out = nvectors; | |
269 if (vector_type_out) | |
270 *vector_type_out = vector_type; | |
271 if (permutes) | |
272 { | |
273 permutes[0] = vect_gen_perm_mask_checked (vector_type, | |
274 indices1); | |
275 permutes[1] = vect_gen_perm_mask_checked (vector_type, | |
276 indices2); | |
277 } | |
278 return true; | |
279 } | |
280 } | |
281 } | |
282 if (!multiple_p (elt_bytes, 2, &elt_bytes)) | |
283 return false; | |
284 nvectors *= 2; | |
285 } | |
286 } | |
209 | 287 |
210 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that | 288 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that |
211 they are of a valid type and that they match the defs of the first stmt of | 289 they are of a valid type and that they match the defs of the first stmt of |
212 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts | 290 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts |
213 by swapping operands of STMT when possible. Non-zero *SWAP indicates swap | 291 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP |
214 is required for cond_expr stmts. Specifically, *SWAP is 1 if STMT is cond | 292 indicates swap is required for cond_expr stmts. Specifically, *SWAP |
215 and operands of comparison need to be swapped; *SWAP is 2 if STMT is cond | 293 is 1 if STMT is cond and operands of comparison need to be swapped; |
216 and code of comparison needs to be inverted. If there is any operand swap | 294 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted. |
217 in this function, *SWAP is set to non-zero value. | 295 If there is any operand swap in this function, *SWAP is set to non-zero |
296 value. | |
218 If there was a fatal error return -1; if the error could be corrected by | 297 If there was a fatal error return -1; if the error could be corrected by |
219 swapping operands of father node of this one, return 1; if everything is | 298 swapping operands of father node of this one, return 1; if everything is |
220 ok return 0. */ | 299 ok return 0. */ |
221 | |
222 static int | 300 static int |
223 vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap, | 301 vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap, |
224 gimple *stmt, unsigned stmt_num, | 302 vec<stmt_vec_info> stmts, unsigned stmt_num, |
225 vec<slp_oprnd_info> *oprnds_info) | 303 vec<slp_oprnd_info> *oprnds_info) |
226 { | 304 { |
305 stmt_vec_info stmt_info = stmts[stmt_num]; | |
227 tree oprnd; | 306 tree oprnd; |
228 unsigned int i, number_of_oprnds; | 307 unsigned int i, number_of_oprnds; |
229 gimple *def_stmt; | |
230 enum vect_def_type dt = vect_uninitialized_def; | 308 enum vect_def_type dt = vect_uninitialized_def; |
231 bool pattern = false; | 309 bool pattern = false; |
232 slp_oprnd_info oprnd_info; | 310 slp_oprnd_info oprnd_info; |
233 int first_op_idx = 1; | 311 int first_op_idx = 1; |
234 bool commutative = false; | 312 unsigned int commutative_op = -1U; |
235 bool first_op_cond = false; | 313 bool first_op_cond = false; |
236 bool first = stmt_num == 0; | 314 bool first = stmt_num == 0; |
237 bool second = stmt_num == 1; | 315 bool second = stmt_num == 1; |
238 | 316 |
239 if (is_gimple_call (stmt)) | 317 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt)) |
240 { | 318 { |
241 number_of_oprnds = gimple_call_num_args (stmt); | 319 number_of_oprnds = gimple_call_num_args (stmt); |
242 first_op_idx = 3; | 320 first_op_idx = 3; |
243 } | 321 if (gimple_call_internal_p (stmt)) |
244 else if (is_gimple_assign (stmt)) | 322 { |
323 internal_fn ifn = gimple_call_internal_fn (stmt); | |
324 commutative_op = first_commutative_argument (ifn); | |
325 } | |
326 } | |
327 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt)) | |
245 { | 328 { |
246 enum tree_code code = gimple_assign_rhs_code (stmt); | 329 enum tree_code code = gimple_assign_rhs_code (stmt); |
247 number_of_oprnds = gimple_num_ops (stmt) - 1; | 330 number_of_oprnds = gimple_num_ops (stmt) - 1; |
248 /* Swap can only be done for cond_expr if asked to, otherwise we | 331 /* Swap can only be done for cond_expr if asked to, otherwise we |
249 could result in different comparison code to the first stmt. */ | 332 could result in different comparison code to the first stmt. */ |
252 { | 335 { |
253 first_op_cond = true; | 336 first_op_cond = true; |
254 number_of_oprnds++; | 337 number_of_oprnds++; |
255 } | 338 } |
256 else | 339 else |
257 commutative = commutative_tree_code (code); | 340 commutative_op = commutative_tree_code (code) ? 0U : -1U; |
258 } | 341 } |
259 else | 342 else |
260 return -1; | 343 return -1; |
261 | 344 |
262 bool swapped = (*swap != 0); | 345 bool swapped = (*swap != 0); |
269 /* Map indicating how operands of cond_expr should be swapped. */ | 352 /* Map indicating how operands of cond_expr should be swapped. */ |
270 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}}; | 353 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}}; |
271 int *map = maps[*swap]; | 354 int *map = maps[*swap]; |
272 | 355 |
273 if (i < 2) | 356 if (i < 2) |
274 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx), map[i]); | 357 oprnd = TREE_OPERAND (gimple_op (stmt_info->stmt, |
358 first_op_idx), map[i]); | |
275 else | 359 else |
276 oprnd = gimple_op (stmt, map[i]); | 360 oprnd = gimple_op (stmt_info->stmt, map[i]); |
277 } | 361 } |
278 else | 362 else |
279 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i)); | 363 oprnd = gimple_op (stmt_info->stmt, first_op_idx + (swapped ? !i : i)); |
280 | 364 |
281 oprnd_info = (*oprnds_info)[i]; | 365 oprnd_info = (*oprnds_info)[i]; |
282 | 366 |
283 if (!vect_is_simple_use (oprnd, vinfo, &def_stmt, &dt)) | 367 stmt_vec_info def_stmt_info; |
368 if (!vect_is_simple_use (oprnd, vinfo, &dt, &def_stmt_info)) | |
284 { | 369 { |
285 if (dump_enabled_p ()) | 370 if (dump_enabled_p ()) |
286 { | 371 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
287 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 372 "Build SLP failed: can't analyze def for %T\n", |
288 "Build SLP failed: can't analyze def for "); | 373 oprnd); |
289 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd); | |
290 dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); | |
291 } | |
292 | 374 |
293 return -1; | 375 return -1; |
294 } | 376 } |
295 | |
296 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt | |
297 from the pattern. Check that all the stmts of the node are in the | |
298 pattern. */ | |
299 if (def_stmt && gimple_bb (def_stmt) | |
300 && vect_stmt_in_region_p (vinfo, def_stmt) | |
301 && vinfo_for_stmt (def_stmt) | |
302 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt)) | |
303 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt)) | |
304 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt))) | |
305 { | |
306 pattern = true; | |
307 if (!first && !oprnd_info->first_pattern | |
308 /* Allow different pattern state for the defs of the | |
309 first stmt in reduction chains. */ | |
310 && (oprnd_info->first_dt != vect_reduction_def | |
311 || (!second && !oprnd_info->second_pattern))) | |
312 { | |
313 if (i == 0 | |
314 && !swapped | |
315 && commutative) | |
316 { | |
317 swapped = true; | |
318 goto again; | |
319 } | |
320 | |
321 if (dump_enabled_p ()) | |
322 { | |
323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
324 "Build SLP failed: some of the stmts" | |
325 " are in a pattern, and others are not "); | |
326 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd); | |
327 dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); | |
328 } | |
329 | |
330 return 1; | |
331 } | |
332 | |
333 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt)); | |
334 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt)); | |
335 | |
336 if (dt == vect_unknown_def_type) | |
337 { | |
338 if (dump_enabled_p ()) | |
339 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
340 "Unsupported pattern.\n"); | |
341 return -1; | |
342 } | |
343 | |
344 switch (gimple_code (def_stmt)) | |
345 { | |
346 case GIMPLE_PHI: | |
347 case GIMPLE_ASSIGN: | |
348 break; | |
349 | |
350 default: | |
351 if (dump_enabled_p ()) | |
352 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
353 "unsupported defining stmt:\n"); | |
354 return -1; | |
355 } | |
356 } | |
357 | 377 |
358 if (second) | 378 if (second) |
359 oprnd_info->second_pattern = pattern; | 379 oprnd_info->second_pattern = pattern; |
360 | 380 |
361 if (first) | 381 if (first) |
369 /* Not first stmt of the group, check that the def-stmt/s match | 389 /* Not first stmt of the group, check that the def-stmt/s match |
370 the def-stmt/s of the first stmt. Allow different definition | 390 the def-stmt/s of the first stmt. Allow different definition |
371 types for reduction chains: the first stmt must be a | 391 types for reduction chains: the first stmt must be a |
372 vect_reduction_def (a phi node), and the rest | 392 vect_reduction_def (a phi node), and the rest |
373 vect_internal_def. */ | 393 vect_internal_def. */ |
374 if (((oprnd_info->first_dt != dt | 394 tree type = TREE_TYPE (oprnd); |
375 && !(oprnd_info->first_dt == vect_reduction_def | 395 if ((oprnd_info->first_dt != dt |
376 && dt == vect_internal_def) | 396 && !(oprnd_info->first_dt == vect_reduction_def |
377 && !((oprnd_info->first_dt == vect_external_def | 397 && dt == vect_internal_def) |
378 || oprnd_info->first_dt == vect_constant_def) | 398 && !((oprnd_info->first_dt == vect_external_def |
379 && (dt == vect_external_def | 399 || oprnd_info->first_dt == vect_constant_def) |
380 || dt == vect_constant_def))) | 400 && (dt == vect_external_def |
381 || !types_compatible_p (oprnd_info->first_op_type, | 401 || dt == vect_constant_def))) |
382 TREE_TYPE (oprnd)))) | 402 || !types_compatible_p (oprnd_info->first_op_type, type)) |
383 { | 403 { |
384 /* Try swapping operands if we got a mismatch. */ | 404 /* Try swapping operands if we got a mismatch. */ |
385 if (i == 0 | 405 if (i == commutative_op && !swapped) |
386 && !swapped | |
387 && commutative) | |
388 { | 406 { |
389 swapped = true; | 407 swapped = true; |
390 goto again; | 408 goto again; |
391 } | 409 } |
392 | 410 |
394 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 412 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
395 "Build SLP failed: different types\n"); | 413 "Build SLP failed: different types\n"); |
396 | 414 |
397 return 1; | 415 return 1; |
398 } | 416 } |
417 if ((dt == vect_constant_def | |
418 || dt == vect_external_def) | |
419 && !current_vector_size.is_constant () | |
420 && (TREE_CODE (type) == BOOLEAN_TYPE | |
421 || !can_duplicate_and_interleave_p (stmts.length (), | |
422 TYPE_MODE (type)))) | |
423 { | |
424 if (dump_enabled_p ()) | |
425 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
426 "Build SLP failed: invalid type of def " | |
427 "for variable-length SLP %T\n", oprnd); | |
428 return -1; | |
429 } | |
399 } | 430 } |
400 | 431 |
401 /* Check the types of the definitions. */ | 432 /* Check the types of the definitions. */ |
402 switch (dt) | 433 switch (dt) |
403 { | 434 { |
406 break; | 437 break; |
407 | 438 |
408 case vect_reduction_def: | 439 case vect_reduction_def: |
409 case vect_induction_def: | 440 case vect_induction_def: |
410 case vect_internal_def: | 441 case vect_internal_def: |
411 oprnd_info->def_stmts.quick_push (def_stmt); | 442 oprnd_info->def_stmts.quick_push (def_stmt_info); |
412 break; | 443 break; |
413 | 444 |
414 default: | 445 default: |
415 /* FORNOW: Not supported. */ | 446 /* FORNOW: Not supported. */ |
416 if (dump_enabled_p ()) | 447 if (dump_enabled_p ()) |
417 { | 448 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
418 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 449 "Build SLP failed: illegal type of def %T\n", |
419 "Build SLP failed: illegal type of def "); | 450 oprnd); |
420 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd); | |
421 dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); | |
422 } | |
423 | 451 |
424 return -1; | 452 return -1; |
425 } | 453 } |
426 } | 454 } |
427 | 455 |
428 /* Swap operands. */ | 456 /* Swap operands. */ |
429 if (swapped) | 457 if (swapped) |
430 { | 458 { |
431 /* If there are already uses of this stmt in a SLP instance then | 459 /* If there are already uses of this stmt in a SLP instance then |
432 we've committed to the operand order and can't swap it. */ | 460 we've committed to the operand order and can't swap it. */ |
433 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) != 0) | 461 if (STMT_VINFO_NUM_SLP_USES (stmt_info) != 0) |
434 { | 462 { |
435 if (dump_enabled_p ()) | 463 if (dump_enabled_p ()) |
436 { | 464 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
437 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 465 "Build SLP failed: cannot swap operands of " |
438 "Build SLP failed: cannot swap operands of " | 466 "shared stmt %G", stmt_info->stmt); |
439 "shared stmt "); | |
440 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); | |
441 } | |
442 return -1; | 467 return -1; |
443 } | 468 } |
444 | 469 |
445 if (first_op_cond) | 470 if (first_op_cond) |
446 { | 471 { |
472 gassign *stmt = as_a <gassign *> (stmt_info->stmt); | |
447 tree cond = gimple_assign_rhs1 (stmt); | 473 tree cond = gimple_assign_rhs1 (stmt); |
448 enum tree_code code = TREE_CODE (cond); | 474 enum tree_code code = TREE_CODE (cond); |
449 | 475 |
450 /* Swap. */ | 476 /* Swap. */ |
451 if (*swap == 1) | 477 if (*swap == 1) |
464 gcc_assert (code != ERROR_MARK); | 490 gcc_assert (code != ERROR_MARK); |
465 TREE_SET_CODE (cond, code); | 491 TREE_SET_CODE (cond, code); |
466 } | 492 } |
467 } | 493 } |
468 else | 494 else |
469 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt), | 495 { |
470 gimple_assign_rhs2_ptr (stmt)); | 496 unsigned int op = commutative_op + first_op_idx; |
497 swap_ssa_operands (stmt_info->stmt, | |
498 gimple_op_ptr (stmt_info->stmt, op), | |
499 gimple_op_ptr (stmt_info->stmt, op + 1)); | |
500 } | |
471 if (dump_enabled_p ()) | 501 if (dump_enabled_p ()) |
472 { | 502 dump_printf_loc (MSG_NOTE, vect_location, |
473 dump_printf_loc (MSG_NOTE, vect_location, | 503 "swapped operands to match def types in %G", |
474 "swapped operands to match def types in "); | 504 stmt_info->stmt); |
475 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); | |
476 } | |
477 } | 505 } |
478 | 506 |
479 *swap = swapped; | 507 *swap = swapped; |
480 return 0; | 508 return 0; |
481 } | 509 } |
482 | 510 |
511 /* Return true if call statements CALL1 and CALL2 are similar enough | |
512 to be combined into the same SLP group. */ | |
513 | |
514 static bool | |
515 compatible_calls_p (gcall *call1, gcall *call2) | |
516 { | |
517 unsigned int nargs = gimple_call_num_args (call1); | |
518 if (nargs != gimple_call_num_args (call2)) | |
519 return false; | |
520 | |
521 if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2)) | |
522 return false; | |
523 | |
524 if (gimple_call_internal_p (call1)) | |
525 { | |
526 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1)), | |
527 TREE_TYPE (gimple_call_lhs (call2)))) | |
528 return false; | |
529 for (unsigned int i = 0; i < nargs; ++i) | |
530 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1, i)), | |
531 TREE_TYPE (gimple_call_arg (call2, i)))) | |
532 return false; | |
533 } | |
534 else | |
535 { | |
536 if (!operand_equal_p (gimple_call_fn (call1), | |
537 gimple_call_fn (call2), 0)) | |
538 return false; | |
539 | |
540 if (gimple_call_fntype (call1) != gimple_call_fntype (call2)) | |
541 return false; | |
542 } | |
543 return true; | |
544 } | |
545 | |
483 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the | 546 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the |
484 caller's attempt to find the vector type in STMT with the narrowest | 547 caller's attempt to find the vector type in STMT_INFO with the narrowest |
485 element type. Return true if VECTYPE is nonnull and if it is valid | 548 element type. Return true if VECTYPE is nonnull and if it is valid |
486 for VINFO. When returning true, update MAX_NUNITS to reflect the | 549 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the |
487 number of units in VECTYPE. VINFO, GORUP_SIZE and MAX_NUNITS are | 550 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for |
488 as for vect_build_slp_tree. */ | 551 vect_build_slp_tree. */ |
489 | 552 |
490 static bool | 553 static bool |
491 vect_record_max_nunits (vec_info *vinfo, gimple *stmt, unsigned int group_size, | 554 vect_record_max_nunits (stmt_vec_info stmt_info, unsigned int group_size, |
492 tree vectype, unsigned int *max_nunits) | 555 tree vectype, poly_uint64 *max_nunits) |
493 { | 556 { |
494 if (!vectype) | 557 if (!vectype) |
495 { | 558 { |
496 if (dump_enabled_p ()) | 559 if (dump_enabled_p ()) |
497 { | 560 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
498 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 561 "Build SLP failed: unsupported data-type in %G\n", |
499 "Build SLP failed: unsupported data-type in "); | 562 stmt_info->stmt); |
500 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); | |
501 dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); | |
502 } | |
503 /* Fatal mismatch. */ | 563 /* Fatal mismatch. */ |
504 return false; | 564 return false; |
505 } | 565 } |
506 | 566 |
507 /* If populating the vector type requires unrolling then fail | 567 /* If populating the vector type requires unrolling then fail |
508 before adjusting *max_nunits for basic-block vectorization. */ | 568 before adjusting *max_nunits for basic-block vectorization. */ |
509 if (is_a <bb_vec_info> (vinfo) | 569 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); |
510 && TYPE_VECTOR_SUBPARTS (vectype) > group_size) | 570 unsigned HOST_WIDE_INT const_nunits; |
571 if (STMT_VINFO_BB_VINFO (stmt_info) | |
572 && (!nunits.is_constant (&const_nunits) | |
573 || const_nunits > group_size)) | |
511 { | 574 { |
512 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 575 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
513 "Build SLP failed: unrolling required " | 576 "Build SLP failed: unrolling required " |
514 "in basic block SLP\n"); | 577 "in basic block SLP\n"); |
515 /* Fatal mismatch. */ | 578 /* Fatal mismatch. */ |
516 return false; | 579 return false; |
517 } | 580 } |
518 | 581 |
519 /* In case of multiple types we need to detect the smallest type. */ | 582 /* In case of multiple types we need to detect the smallest type. */ |
520 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype)) | 583 vect_update_max_nunits (max_nunits, vectype); |
521 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype); | |
522 | |
523 return true; | 584 return true; |
585 } | |
586 | |
587 /* STMTS is a group of GROUP_SIZE SLP statements in which some | |
588 statements do the same operation as the first statement and in which | |
589 the others do ALT_STMT_CODE. Return true if we can take one vector | |
590 of the first operation and one vector of the second and permute them | |
591 to get the required result. VECTYPE is the type of the vector that | |
592 would be permuted. */ | |
593 | |
594 static bool | |
595 vect_two_operations_perm_ok_p (vec<stmt_vec_info> stmts, | |
596 unsigned int group_size, tree vectype, | |
597 tree_code alt_stmt_code) | |
598 { | |
599 unsigned HOST_WIDE_INT count; | |
600 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&count)) | |
601 return false; | |
602 | |
603 vec_perm_builder sel (count, count, 1); | |
604 for (unsigned int i = 0; i < count; ++i) | |
605 { | |
606 unsigned int elt = i; | |
607 gassign *stmt = as_a <gassign *> (stmts[i % group_size]->stmt); | |
608 if (gimple_assign_rhs_code (stmt) == alt_stmt_code) | |
609 elt += count; | |
610 sel.quick_push (elt); | |
611 } | |
612 vec_perm_indices indices (sel, 2, count); | |
613 return can_vec_perm_const_p (TYPE_MODE (vectype), indices); | |
524 } | 614 } |
525 | 615 |
526 /* Verify if the scalar stmts STMTS are isomorphic, require data | 616 /* Verify if the scalar stmts STMTS are isomorphic, require data |
527 permutation or are of unsupported types of operation. Return | 617 permutation or are of unsupported types of operation. Return |
528 true if they are, otherwise return false and indicate in *MATCHES | 618 true if they are, otherwise return false and indicate in *MATCHES |
536 to 2 if stmt I is isormorphic to the first stmt by inverting the code | 626 to 2 if stmt I is isormorphic to the first stmt by inverting the code |
537 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped | 627 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped |
538 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */ | 628 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */ |
539 | 629 |
540 static bool | 630 static bool |
541 vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, | 631 vect_build_slp_tree_1 (unsigned char *swap, |
542 vec<gimple *> stmts, unsigned int group_size, | 632 vec<stmt_vec_info> stmts, unsigned int group_size, |
543 unsigned nops, unsigned int *max_nunits, | 633 poly_uint64 *max_nunits, bool *matches, |
544 bool *matches, bool *two_operators) | 634 bool *two_operators) |
545 { | 635 { |
546 unsigned int i; | 636 unsigned int i; |
547 gimple *first_stmt = stmts[0], *stmt = stmts[0]; | 637 stmt_vec_info first_stmt_info = stmts[0]; |
548 enum tree_code first_stmt_code = ERROR_MARK; | 638 enum tree_code first_stmt_code = ERROR_MARK; |
549 enum tree_code alt_stmt_code = ERROR_MARK; | 639 enum tree_code alt_stmt_code = ERROR_MARK; |
550 enum tree_code rhs_code = ERROR_MARK; | 640 enum tree_code rhs_code = ERROR_MARK; |
551 enum tree_code first_cond_code = ERROR_MARK; | 641 enum tree_code first_cond_code = ERROR_MARK; |
552 tree lhs; | 642 tree lhs; |
553 bool need_same_oprnds = false; | 643 bool need_same_oprnds = false; |
554 tree vectype = NULL_TREE, scalar_type, first_op1 = NULL_TREE; | 644 tree vectype = NULL_TREE, first_op1 = NULL_TREE; |
555 optab optab; | 645 optab optab; |
556 int icode; | 646 int icode; |
557 machine_mode optab_op2_mode; | 647 machine_mode optab_op2_mode; |
558 machine_mode vec_mode; | 648 machine_mode vec_mode; |
559 HOST_WIDE_INT dummy; | 649 stmt_vec_info first_load = NULL, prev_first_load = NULL; |
560 gimple *first_load = NULL, *prev_first_load = NULL; | |
561 | 650 |
562 /* For every stmt in NODE find its def stmt/s. */ | 651 /* For every stmt in NODE find its def stmt/s. */ |
563 FOR_EACH_VEC_ELT (stmts, i, stmt) | 652 stmt_vec_info stmt_info; |
564 { | 653 FOR_EACH_VEC_ELT (stmts, i, stmt_info) |
654 { | |
655 gimple *stmt = stmt_info->stmt; | |
565 swap[i] = 0; | 656 swap[i] = 0; |
566 matches[i] = false; | 657 matches[i] = false; |
567 | 658 |
568 if (dump_enabled_p ()) | 659 if (dump_enabled_p ()) |
569 { | 660 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for %G", stmt); |
570 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for "); | |
571 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); | |
572 } | |
573 | 661 |
574 /* Fail to vectorize statements marked as unvectorizable. */ | 662 /* Fail to vectorize statements marked as unvectorizable. */ |
575 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt))) | 663 if (!STMT_VINFO_VECTORIZABLE (stmt_info)) |
576 { | 664 { |
577 if (dump_enabled_p ()) | 665 if (dump_enabled_p ()) |
578 { | 666 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
579 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 667 "Build SLP failed: unvectorizable statement %G", |
580 "Build SLP failed: unvectorizable statement "); | 668 stmt); |
581 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); | |
582 } | |
583 /* Fatal mismatch. */ | 669 /* Fatal mismatch. */ |
584 matches[0] = false; | 670 matches[0] = false; |
585 return false; | 671 return false; |
586 } | 672 } |
587 | 673 |
588 lhs = gimple_get_lhs (stmt); | 674 lhs = gimple_get_lhs (stmt); |
589 if (lhs == NULL_TREE) | 675 if (lhs == NULL_TREE) |
590 { | 676 { |
591 if (dump_enabled_p ()) | 677 if (dump_enabled_p ()) |
592 { | 678 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
593 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 679 "Build SLP failed: not GIMPLE_ASSIGN nor " |
594 "Build SLP failed: not GIMPLE_ASSIGN nor " | 680 "GIMPLE_CALL %G", stmt); |
595 "GIMPLE_CALL "); | |
596 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); | |
597 } | |
598 /* Fatal mismatch. */ | 681 /* Fatal mismatch. */ |
599 matches[0] = false; | 682 matches[0] = false; |
600 return false; | 683 return false; |
601 } | 684 } |
602 | 685 |
603 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy); | 686 tree nunits_vectype; |
604 vectype = get_vectype_for_scalar_type (scalar_type); | 687 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype, |
605 if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype, | 688 &nunits_vectype) |
606 max_nunits)) | 689 || (nunits_vectype |
690 && !vect_record_max_nunits (stmt_info, group_size, | |
691 nunits_vectype, max_nunits))) | |
607 { | 692 { |
608 /* Fatal mismatch. */ | 693 /* Fatal mismatch. */ |
609 matches[0] = false; | 694 matches[0] = false; |
610 return false; | 695 return false; |
611 } | 696 } |
697 | |
698 gcc_assert (vectype); | |
612 | 699 |
613 if (gcall *call_stmt = dyn_cast <gcall *> (stmt)) | 700 if (gcall *call_stmt = dyn_cast <gcall *> (stmt)) |
614 { | 701 { |
615 rhs_code = CALL_EXPR; | 702 rhs_code = CALL_EXPR; |
616 if (gimple_call_internal_p (call_stmt) | 703 if ((gimple_call_internal_p (call_stmt) |
704 && (!vectorizable_internal_fn_p | |
705 (gimple_call_internal_fn (call_stmt)))) | |
617 || gimple_call_tail_p (call_stmt) | 706 || gimple_call_tail_p (call_stmt) |
618 || gimple_call_noreturn_p (call_stmt) | 707 || gimple_call_noreturn_p (call_stmt) |
619 || !gimple_call_nothrow_p (call_stmt) | 708 || !gimple_call_nothrow_p (call_stmt) |
620 || gimple_call_chain (call_stmt)) | 709 || gimple_call_chain (call_stmt)) |
621 { | 710 { |
622 if (dump_enabled_p ()) | 711 if (dump_enabled_p ()) |
623 { | 712 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
624 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 713 "Build SLP failed: unsupported call type %G", |
625 "Build SLP failed: unsupported call type "); | 714 call_stmt); |
626 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, | |
627 call_stmt, 0); | |
628 } | |
629 /* Fatal mismatch. */ | 715 /* Fatal mismatch. */ |
630 matches[0] = false; | 716 matches[0] = false; |
631 return false; | 717 return false; |
632 } | 718 } |
633 } | 719 } |
643 vector shift with scalar shift operand. */ | 729 vector shift with scalar shift operand. */ |
644 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR | 730 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR |
645 || rhs_code == LROTATE_EXPR | 731 || rhs_code == LROTATE_EXPR |
646 || rhs_code == RROTATE_EXPR) | 732 || rhs_code == RROTATE_EXPR) |
647 { | 733 { |
734 if (vectype == boolean_type_node) | |
735 { | |
736 if (dump_enabled_p ()) | |
737 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
738 "Build SLP failed: shift of a" | |
739 " boolean.\n"); | |
740 /* Fatal mismatch. */ | |
741 matches[0] = false; | |
742 return false; | |
743 } | |
744 | |
648 vec_mode = TYPE_MODE (vectype); | 745 vec_mode = TYPE_MODE (vectype); |
649 | 746 |
650 /* First see if we have a vector/vector shift. */ | 747 /* First see if we have a vector/vector shift. */ |
651 optab = optab_for_tree_code (rhs_code, vectype, | 748 optab = optab_for_tree_code (rhs_code, vectype, |
652 optab_vector); | 749 optab_vector); |
707 && !((first_stmt_code == PLUS_EXPR | 804 && !((first_stmt_code == PLUS_EXPR |
708 || first_stmt_code == MINUS_EXPR) | 805 || first_stmt_code == MINUS_EXPR) |
709 && (alt_stmt_code == PLUS_EXPR | 806 && (alt_stmt_code == PLUS_EXPR |
710 || alt_stmt_code == MINUS_EXPR) | 807 || alt_stmt_code == MINUS_EXPR) |
711 && rhs_code == alt_stmt_code) | 808 && rhs_code == alt_stmt_code) |
712 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)) | 809 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info) |
713 && (first_stmt_code == ARRAY_REF | 810 && (first_stmt_code == ARRAY_REF |
714 || first_stmt_code == BIT_FIELD_REF | 811 || first_stmt_code == BIT_FIELD_REF |
715 || first_stmt_code == INDIRECT_REF | 812 || first_stmt_code == INDIRECT_REF |
716 || first_stmt_code == COMPONENT_REF | 813 || first_stmt_code == COMPONENT_REF |
717 || first_stmt_code == MEM_REF))) | 814 || first_stmt_code == MEM_REF))) |
718 { | 815 { |
719 if (dump_enabled_p ()) | 816 if (dump_enabled_p ()) |
720 { | 817 { |
721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 818 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
722 "Build SLP failed: different operation " | 819 "Build SLP failed: different operation " |
723 "in stmt "); | 820 "in stmt %G", stmt); |
724 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); | |
725 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 821 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
726 "original stmt "); | 822 "original stmt %G", first_stmt_info->stmt); |
727 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, | |
728 first_stmt, 0); | |
729 } | 823 } |
730 /* Mismatch. */ | 824 /* Mismatch. */ |
731 continue; | 825 continue; |
732 } | 826 } |
733 | 827 |
734 if (need_same_oprnds | 828 if (need_same_oprnds |
735 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0)) | 829 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0)) |
736 { | 830 { |
737 if (dump_enabled_p ()) | 831 if (dump_enabled_p ()) |
738 { | 832 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
739 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 833 "Build SLP failed: different shift " |
740 "Build SLP failed: different shift " | 834 "arguments in %G", stmt); |
741 "arguments in "); | |
742 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); | |
743 } | |
744 /* Mismatch. */ | 835 /* Mismatch. */ |
745 continue; | 836 continue; |
746 } | 837 } |
747 | 838 |
748 if (rhs_code == CALL_EXPR) | 839 if (rhs_code == CALL_EXPR) |
749 { | 840 { |
750 gimple *first_stmt = stmts[0]; | 841 if (!compatible_calls_p (as_a <gcall *> (stmts[0]->stmt), |
751 if (gimple_call_num_args (stmt) != nops | 842 as_a <gcall *> (stmt))) |
752 || !operand_equal_p (gimple_call_fn (first_stmt), | |
753 gimple_call_fn (stmt), 0) | |
754 || gimple_call_fntype (first_stmt) | |
755 != gimple_call_fntype (stmt)) | |
756 { | 843 { |
757 if (dump_enabled_p ()) | 844 if (dump_enabled_p ()) |
758 { | 845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
759 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 846 "Build SLP failed: different calls in %G", |
760 "Build SLP failed: different calls in "); | 847 stmt); |
761 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, | |
762 stmt, 0); | |
763 } | |
764 /* Mismatch. */ | 848 /* Mismatch. */ |
765 continue; | 849 continue; |
766 } | 850 } |
767 } | 851 } |
768 } | 852 } |
769 | 853 |
770 /* Grouped store or load. */ | 854 /* Grouped store or load. */ |
771 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))) | 855 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) |
772 { | 856 { |
773 if (REFERENCE_CLASS_P (lhs)) | 857 if (REFERENCE_CLASS_P (lhs)) |
774 { | 858 { |
775 /* Store. */ | 859 /* Store. */ |
776 ; | 860 ; |
777 } | 861 } |
778 else | 862 else |
779 { | 863 { |
780 /* Load. */ | 864 /* Load. */ |
781 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)); | 865 first_load = DR_GROUP_FIRST_ELEMENT (stmt_info); |
782 if (prev_first_load) | 866 if (prev_first_load) |
783 { | 867 { |
784 /* Check that there are no loads from different interleaving | 868 /* Check that there are no loads from different interleaving |
785 chains in the same node. */ | 869 chains in the same node. */ |
786 if (prev_first_load != first_load) | 870 if (prev_first_load != first_load) |
787 { | 871 { |
788 if (dump_enabled_p ()) | 872 if (dump_enabled_p ()) |
789 { | 873 dump_printf_loc (MSG_MISSED_OPTIMIZATION, |
790 dump_printf_loc (MSG_MISSED_OPTIMIZATION, | 874 vect_location, |
791 vect_location, | 875 "Build SLP failed: different " |
792 "Build SLP failed: different " | 876 "interleaving chains in one node %G", |
793 "interleaving chains in one node "); | 877 stmt); |
794 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, | |
795 stmt, 0); | |
796 } | |
797 /* Mismatch. */ | 878 /* Mismatch. */ |
798 continue; | 879 continue; |
799 } | 880 } |
800 } | 881 } |
801 else | 882 else |
806 { | 887 { |
807 if (TREE_CODE_CLASS (rhs_code) == tcc_reference) | 888 if (TREE_CODE_CLASS (rhs_code) == tcc_reference) |
808 { | 889 { |
809 /* Not grouped load. */ | 890 /* Not grouped load. */ |
810 if (dump_enabled_p ()) | 891 if (dump_enabled_p ()) |
811 { | 892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
812 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 893 "Build SLP failed: not grouped load %G", stmt); |
813 "Build SLP failed: not grouped load "); | |
814 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); | |
815 } | |
816 | 894 |
817 /* FORNOW: Not grouped loads are not supported. */ | 895 /* FORNOW: Not grouped loads are not supported. */ |
818 /* Fatal mismatch. */ | 896 /* Fatal mismatch. */ |
819 matches[0] = false; | 897 matches[0] = false; |
820 return false; | 898 return false; |
826 && TREE_CODE_CLASS (rhs_code) != tcc_expression | 904 && TREE_CODE_CLASS (rhs_code) != tcc_expression |
827 && TREE_CODE_CLASS (rhs_code) != tcc_comparison | 905 && TREE_CODE_CLASS (rhs_code) != tcc_comparison |
828 && rhs_code != CALL_EXPR) | 906 && rhs_code != CALL_EXPR) |
829 { | 907 { |
830 if (dump_enabled_p ()) | 908 if (dump_enabled_p ()) |
831 { | 909 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
832 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 910 "Build SLP failed: operation unsupported %G", |
833 "Build SLP failed: operation"); | 911 stmt); |
834 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported "); | |
835 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); | |
836 } | |
837 /* Fatal mismatch. */ | 912 /* Fatal mismatch. */ |
838 matches[0] = false; | 913 matches[0] = false; |
839 return false; | 914 return false; |
840 } | 915 } |
841 | 916 |
864 else if (first_cond_code == invert_code) | 939 else if (first_cond_code == invert_code) |
865 swap[i] = 2; | 940 swap[i] = 2; |
866 else | 941 else |
867 { | 942 { |
868 if (dump_enabled_p ()) | 943 if (dump_enabled_p ()) |
869 { | 944 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
870 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 945 "Build SLP failed: different" |
871 "Build SLP failed: different" | 946 " operation %G", stmt); |
872 " operation"); | |
873 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, | |
874 stmt, 0); | |
875 } | |
876 /* Mismatch. */ | 947 /* Mismatch. */ |
877 continue; | 948 continue; |
878 } | 949 } |
879 } | 950 } |
880 } | 951 } |
889 /* If we allowed a two-operation SLP node verify the target can cope | 960 /* If we allowed a two-operation SLP node verify the target can cope |
890 with the permute we are going to use. */ | 961 with the permute we are going to use. */ |
891 if (alt_stmt_code != ERROR_MARK | 962 if (alt_stmt_code != ERROR_MARK |
892 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference) | 963 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference) |
893 { | 964 { |
894 unsigned int count = TYPE_VECTOR_SUBPARTS (vectype); | 965 if (vectype == boolean_type_node |
895 auto_vec_perm_indices sel (count); | 966 || !vect_two_operations_perm_ok_p (stmts, group_size, |
896 for (i = 0; i < count; ++i) | 967 vectype, alt_stmt_code)) |
897 { | |
898 unsigned int elt = i; | |
899 if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code) | |
900 elt += count; | |
901 sel.quick_push (elt); | |
902 } | |
903 if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel)) | |
904 { | 968 { |
905 for (i = 0; i < group_size; ++i) | 969 for (i = 0; i < group_size; ++i) |
906 if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code) | 970 if (gimple_assign_rhs_code (stmts[i]->stmt) == alt_stmt_code) |
907 { | 971 { |
908 matches[i] = false; | 972 matches[i] = false; |
909 if (dump_enabled_p ()) | 973 if (dump_enabled_p ()) |
910 { | 974 { |
911 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 975 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
912 "Build SLP failed: different operation " | 976 "Build SLP failed: different operation " |
913 "in stmt "); | 977 "in stmt %G", stmts[i]->stmt); |
914 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, | |
915 stmts[i], 0); | |
916 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 978 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
917 "original stmt "); | 979 "original stmt %G", first_stmt_info->stmt); |
918 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, | |
919 first_stmt, 0); | |
920 } | 980 } |
921 } | 981 } |
922 return false; | 982 return false; |
923 } | 983 } |
924 *two_operators = true; | 984 *two_operators = true; |
930 /* Traits for the hash_set to record failed SLP builds for a stmt set. | 990 /* Traits for the hash_set to record failed SLP builds for a stmt set. |
931 Note we never remove apart from at destruction time so we do not | 991 Note we never remove apart from at destruction time so we do not |
932 need a special value for deleted that differs from empty. */ | 992 need a special value for deleted that differs from empty. */ |
933 struct bst_traits | 993 struct bst_traits |
934 { | 994 { |
935 typedef vec <gimple *> value_type; | 995 typedef vec <stmt_vec_info> value_type; |
936 typedef vec <gimple *> compare_type; | 996 typedef vec <stmt_vec_info> compare_type; |
937 static inline hashval_t hash (value_type); | 997 static inline hashval_t hash (value_type); |
938 static inline bool equal (value_type existing, value_type candidate); | 998 static inline bool equal (value_type existing, value_type candidate); |
939 static inline bool is_empty (value_type x) { return !x.exists (); } | 999 static inline bool is_empty (value_type x) { return !x.exists (); } |
940 static inline bool is_deleted (value_type x) { return !x.exists (); } | 1000 static inline bool is_deleted (value_type x) { return !x.exists (); } |
941 static inline void mark_empty (value_type &x) { x.release (); } | 1001 static inline void mark_empty (value_type &x) { x.release (); } |
945 inline hashval_t | 1005 inline hashval_t |
946 bst_traits::hash (value_type x) | 1006 bst_traits::hash (value_type x) |
947 { | 1007 { |
948 inchash::hash h; | 1008 inchash::hash h; |
949 for (unsigned i = 0; i < x.length (); ++i) | 1009 for (unsigned i = 0; i < x.length (); ++i) |
950 h.add_int (gimple_uid (x[i])); | 1010 h.add_int (gimple_uid (x[i]->stmt)); |
951 return h.end (); | 1011 return h.end (); |
952 } | 1012 } |
953 inline bool | 1013 inline bool |
954 bst_traits::equal (value_type existing, value_type candidate) | 1014 bst_traits::equal (value_type existing, value_type candidate) |
955 { | 1015 { |
959 if (existing[i] != candidate[i]) | 1019 if (existing[i] != candidate[i]) |
960 return false; | 1020 return false; |
961 return true; | 1021 return true; |
962 } | 1022 } |
963 | 1023 |
964 static hash_set <vec <gimple *>, bst_traits> *bst_fail; | 1024 typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t; |
1025 static scalar_stmts_set_t *bst_fail; | |
1026 | |
1027 typedef hash_map <vec <gimple *>, slp_tree, | |
1028 simple_hashmap_traits <bst_traits, slp_tree> > | |
1029 scalar_stmts_to_slp_tree_map_t; | |
965 | 1030 |
966 static slp_tree | 1031 static slp_tree |
967 vect_build_slp_tree_2 (vec_info *vinfo, | 1032 vect_build_slp_tree_2 (vec_info *vinfo, |
968 vec<gimple *> stmts, unsigned int group_size, | 1033 vec<stmt_vec_info> stmts, unsigned int group_size, |
969 unsigned int *max_nunits, | 1034 poly_uint64 *max_nunits, |
970 vec<slp_tree> *loads, | 1035 vec<slp_tree> *loads, |
971 bool *matches, unsigned *npermutes, unsigned *tree_size, | 1036 bool *matches, unsigned *npermutes, unsigned *tree_size, |
972 unsigned max_tree_size); | 1037 unsigned max_tree_size); |
973 | 1038 |
974 static slp_tree | 1039 static slp_tree |
975 vect_build_slp_tree (vec_info *vinfo, | 1040 vect_build_slp_tree (vec_info *vinfo, |
976 vec<gimple *> stmts, unsigned int group_size, | 1041 vec<stmt_vec_info> stmts, unsigned int group_size, |
977 unsigned int *max_nunits, | 1042 poly_uint64 *max_nunits, vec<slp_tree> *loads, |
978 vec<slp_tree> *loads, | |
979 bool *matches, unsigned *npermutes, unsigned *tree_size, | 1043 bool *matches, unsigned *npermutes, unsigned *tree_size, |
980 unsigned max_tree_size) | 1044 unsigned max_tree_size) |
981 { | 1045 { |
982 if (bst_fail->contains (stmts)) | 1046 if (bst_fail->contains (stmts)) |
983 return NULL; | 1047 return NULL; |
987 /* When SLP build fails for stmts record this, otherwise SLP build | 1051 /* When SLP build fails for stmts record this, otherwise SLP build |
988 can be exponential in time when we allow to construct parts from | 1052 can be exponential in time when we allow to construct parts from |
989 scalars, see PR81723. */ | 1053 scalars, see PR81723. */ |
990 if (! res) | 1054 if (! res) |
991 { | 1055 { |
992 vec <gimple *> x; | 1056 vec <stmt_vec_info> x; |
993 x.create (stmts.length ()); | 1057 x.create (stmts.length ()); |
994 x.splice (stmts); | 1058 x.splice (stmts); |
995 bst_fail->add (x); | 1059 bst_fail->add (x); |
996 } | 1060 } |
997 return res; | 1061 return res; |
1004 The value returned is the depth in the SLP tree where a mismatch | 1068 The value returned is the depth in the SLP tree where a mismatch |
1005 was found. */ | 1069 was found. */ |
1006 | 1070 |
1007 static slp_tree | 1071 static slp_tree |
1008 vect_build_slp_tree_2 (vec_info *vinfo, | 1072 vect_build_slp_tree_2 (vec_info *vinfo, |
1009 vec<gimple *> stmts, unsigned int group_size, | 1073 vec<stmt_vec_info> stmts, unsigned int group_size, |
1010 unsigned int *max_nunits, | 1074 poly_uint64 *max_nunits, |
1011 vec<slp_tree> *loads, | 1075 vec<slp_tree> *loads, |
1012 bool *matches, unsigned *npermutes, unsigned *tree_size, | 1076 bool *matches, unsigned *npermutes, unsigned *tree_size, |
1013 unsigned max_tree_size) | 1077 unsigned max_tree_size) |
1014 { | 1078 { |
1015 unsigned nops, i, this_tree_size = 0, this_max_nunits = *max_nunits; | 1079 unsigned nops, i, this_tree_size = 0; |
1016 gimple *stmt; | 1080 poly_uint64 this_max_nunits = *max_nunits; |
1017 slp_tree node; | 1081 slp_tree node; |
1018 | 1082 |
1019 matches[0] = false; | 1083 matches[0] = false; |
1020 | 1084 |
1021 stmt = stmts[0]; | 1085 stmt_vec_info stmt_info = stmts[0]; |
1022 if (is_gimple_call (stmt)) | 1086 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt)) |
1023 nops = gimple_call_num_args (stmt); | 1087 nops = gimple_call_num_args (stmt); |
1024 else if (is_gimple_assign (stmt)) | 1088 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt)) |
1025 { | 1089 { |
1026 nops = gimple_num_ops (stmt) - 1; | 1090 nops = gimple_num_ops (stmt) - 1; |
1027 if (gimple_assign_rhs_code (stmt) == COND_EXPR) | 1091 if (gimple_assign_rhs_code (stmt) == COND_EXPR) |
1028 nops++; | 1092 nops++; |
1029 } | 1093 } |
1030 else if (gimple_code (stmt) == GIMPLE_PHI) | 1094 else if (is_a <gphi *> (stmt_info->stmt)) |
1031 nops = 0; | 1095 nops = 0; |
1032 else | 1096 else |
1033 return NULL; | 1097 return NULL; |
1034 | 1098 |
1035 /* If the SLP node is a PHI (induction or reduction), terminate | 1099 /* If the SLP node is a PHI (induction or reduction), terminate |
1036 the recursion. */ | 1100 the recursion. */ |
1037 if (gimple_code (stmt) == GIMPLE_PHI) | 1101 if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt)) |
1038 { | 1102 { |
1039 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt)); | 1103 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt)); |
1040 tree vectype = get_vectype_for_scalar_type (scalar_type); | 1104 tree vectype = get_vectype_for_scalar_type (scalar_type); |
1041 if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype, | 1105 if (!vect_record_max_nunits (stmt_info, group_size, vectype, max_nunits)) |
1042 max_nunits)) | |
1043 return NULL; | 1106 return NULL; |
1044 | 1107 |
1045 vect_def_type def_type = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)); | 1108 vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info); |
1046 /* Induction from different IVs is not supported. */ | 1109 /* Induction from different IVs is not supported. */ |
1047 if (def_type == vect_induction_def) | 1110 if (def_type == vect_induction_def) |
1048 { | 1111 { |
1049 FOR_EACH_VEC_ELT (stmts, i, stmt) | 1112 stmt_vec_info other_info; |
1050 if (stmt != stmts[0]) | 1113 FOR_EACH_VEC_ELT (stmts, i, other_info) |
1114 if (stmt_info != other_info) | |
1051 return NULL; | 1115 return NULL; |
1052 } | 1116 } |
1053 else | 1117 else |
1054 { | 1118 { |
1055 /* Else def types have to match. */ | 1119 /* Else def types have to match. */ |
1056 FOR_EACH_VEC_ELT (stmts, i, stmt) | 1120 stmt_vec_info other_info; |
1121 FOR_EACH_VEC_ELT (stmts, i, other_info) | |
1057 { | 1122 { |
1058 /* But for reduction chains only check on the first stmt. */ | 1123 /* But for reduction chains only check on the first stmt. */ |
1059 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) | 1124 if (REDUC_GROUP_FIRST_ELEMENT (other_info) |
1060 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt) | 1125 && REDUC_GROUP_FIRST_ELEMENT (other_info) != stmt_info) |
1061 continue; | 1126 continue; |
1062 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != def_type) | 1127 if (STMT_VINFO_DEF_TYPE (other_info) != def_type) |
1063 return NULL; | 1128 return NULL; |
1064 } | 1129 } |
1065 } | 1130 } |
1066 node = vect_create_new_slp_node (stmts); | 1131 node = vect_create_new_slp_node (stmts); |
1067 return node; | 1132 return node; |
1068 } | 1133 } |
1069 | 1134 |
1070 | 1135 |
1071 bool two_operators = false; | 1136 bool two_operators = false; |
1072 unsigned char *swap = XALLOCAVEC (unsigned char, group_size); | 1137 unsigned char *swap = XALLOCAVEC (unsigned char, group_size); |
1073 if (!vect_build_slp_tree_1 (vinfo, swap, | 1138 if (!vect_build_slp_tree_1 (swap, stmts, group_size, |
1074 stmts, group_size, nops, | |
1075 &this_max_nunits, matches, &two_operators)) | 1139 &this_max_nunits, matches, &two_operators)) |
1076 return NULL; | 1140 return NULL; |
1077 | 1141 |
1078 /* If the SLP node is a load, terminate the recursion. */ | 1142 /* If the SLP node is a load, terminate the recursion. */ |
1079 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)) | 1143 if (STMT_VINFO_GROUPED_ACCESS (stmt_info) |
1080 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))) | 1144 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info))) |
1081 { | 1145 { |
1082 *max_nunits = this_max_nunits; | 1146 *max_nunits = this_max_nunits; |
1083 node = vect_create_new_slp_node (stmts); | 1147 node = vect_create_new_slp_node (stmts); |
1084 loads->safe_push (node); | 1148 loads->safe_push (node); |
1085 return node; | 1149 return node; |
1086 } | 1150 } |
1087 | 1151 |
1088 /* Get at the operands, verifying they are compatible. */ | 1152 /* Get at the operands, verifying they are compatible. */ |
1089 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size); | 1153 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size); |
1090 slp_oprnd_info oprnd_info; | 1154 slp_oprnd_info oprnd_info; |
1091 FOR_EACH_VEC_ELT (stmts, i, stmt) | 1155 FOR_EACH_VEC_ELT (stmts, i, stmt_info) |
1092 { | 1156 { |
1093 int res = vect_get_and_check_slp_defs (vinfo, &swap[i], | 1157 int res = vect_get_and_check_slp_defs (vinfo, &swap[i], |
1094 stmt, i, &oprnds_info); | 1158 stmts, i, &oprnds_info); |
1095 if (res != 0) | 1159 if (res != 0) |
1096 matches[(res == -1) ? 0 : i] = false; | 1160 matches[(res == -1) ? 0 : i] = false; |
1097 if (!matches[0]) | 1161 if (!matches[0]) |
1098 break; | 1162 break; |
1099 } | 1163 } |
1105 } | 1169 } |
1106 | 1170 |
1107 auto_vec<slp_tree, 4> children; | 1171 auto_vec<slp_tree, 4> children; |
1108 auto_vec<slp_tree> this_loads; | 1172 auto_vec<slp_tree> this_loads; |
1109 | 1173 |
1110 stmt = stmts[0]; | 1174 stmt_info = stmts[0]; |
1111 | 1175 |
1112 if (tree_size) | 1176 if (tree_size) |
1113 max_tree_size -= *tree_size; | 1177 max_tree_size -= *tree_size; |
1114 | 1178 |
1115 /* Create SLP_TREE nodes for the definition node/s. */ | 1179 /* Create SLP_TREE nodes for the definition node/s. */ |
1125 && oprnd_info->first_dt != vect_induction_def) | 1189 && oprnd_info->first_dt != vect_induction_def) |
1126 continue; | 1190 continue; |
1127 | 1191 |
1128 if (++this_tree_size > max_tree_size) | 1192 if (++this_tree_size > max_tree_size) |
1129 { | 1193 { |
1194 if (dump_enabled_p ()) | |
1195 dump_printf_loc (MSG_MISSED_OPTIMIZATION, | |
1196 vect_location, | |
1197 "Build SLP failed: SLP tree too large\n"); | |
1130 FOR_EACH_VEC_ELT (children, j, child) | 1198 FOR_EACH_VEC_ELT (children, j, child) |
1131 vect_free_slp_tree (child); | 1199 vect_free_slp_tree (child, false); |
1132 vect_free_oprnd_info (oprnds_info); | 1200 vect_free_oprnd_info (oprnds_info); |
1133 return NULL; | 1201 return NULL; |
1134 } | 1202 } |
1135 | 1203 |
1136 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts, | 1204 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts, |
1143 throw that away and build it up this node from scalars. */ | 1211 throw that away and build it up this node from scalars. */ |
1144 if (!SLP_TREE_CHILDREN (child).is_empty () | 1212 if (!SLP_TREE_CHILDREN (child).is_empty () |
1145 /* ??? Rejecting patterns this way doesn't work. We'd have to | 1213 /* ??? Rejecting patterns this way doesn't work. We'd have to |
1146 do extra work to cancel the pattern so the uses see the | 1214 do extra work to cancel the pattern so the uses see the |
1147 scalar version. */ | 1215 scalar version. */ |
1148 && !is_pattern_stmt_p | 1216 && !is_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (child)[0])) |
1149 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))) | |
1150 { | 1217 { |
1151 slp_tree grandchild; | 1218 slp_tree grandchild; |
1152 | 1219 |
1153 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) | 1220 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) |
1154 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def) | 1221 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def) |
1157 { | 1224 { |
1158 /* Roll back. */ | 1225 /* Roll back. */ |
1159 this_loads.truncate (old_nloads); | 1226 this_loads.truncate (old_nloads); |
1160 this_tree_size = old_tree_size; | 1227 this_tree_size = old_tree_size; |
1161 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) | 1228 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) |
1162 vect_free_slp_tree (grandchild); | 1229 vect_free_slp_tree (grandchild, false); |
1163 SLP_TREE_CHILDREN (child).truncate (0); | 1230 SLP_TREE_CHILDREN (child).truncate (0); |
1164 | 1231 |
1165 dump_printf_loc (MSG_NOTE, vect_location, | 1232 dump_printf_loc (MSG_NOTE, vect_location, |
1166 "Building parent vector operands from " | 1233 "Building parent vector operands from " |
1167 "scalars instead\n"); | 1234 "scalars instead\n"); |
1188 if (is_a <bb_vec_info> (vinfo) | 1255 if (is_a <bb_vec_info> (vinfo) |
1189 && !matches[0] | 1256 && !matches[0] |
1190 /* ??? Rejecting patterns this way doesn't work. We'd have to | 1257 /* ??? Rejecting patterns this way doesn't work. We'd have to |
1191 do extra work to cancel the pattern so the uses see the | 1258 do extra work to cancel the pattern so the uses see the |
1192 scalar version. */ | 1259 scalar version. */ |
1193 && !is_pattern_stmt_p (vinfo_for_stmt (stmt))) | 1260 && !is_pattern_stmt_p (stmt_info)) |
1194 { | 1261 { |
1195 dump_printf_loc (MSG_NOTE, vect_location, | 1262 dump_printf_loc (MSG_NOTE, vect_location, |
1196 "Building vector operands from scalars\n"); | 1263 "Building vector operands from scalars\n"); |
1197 child = vect_create_new_slp_node (oprnd_info->def_stmts); | 1264 child = vect_create_new_slp_node (oprnd_info->def_stmts); |
1198 SLP_TREE_DEF_TYPE (child) = vect_external_def; | 1265 SLP_TREE_DEF_TYPE (child) = vect_external_def; |
1209 && matches[0] | 1276 && matches[0] |
1210 /* ??? For COND_EXPRs we can swap the comparison operands | 1277 /* ??? For COND_EXPRs we can swap the comparison operands |
1211 as well as the arms under some constraints. */ | 1278 as well as the arms under some constraints. */ |
1212 && nops == 2 | 1279 && nops == 2 |
1213 && oprnds_info[1]->first_dt == vect_internal_def | 1280 && oprnds_info[1]->first_dt == vect_internal_def |
1214 && is_gimple_assign (stmt) | 1281 && is_gimple_assign (stmt_info->stmt) |
1215 && commutative_tree_code (gimple_assign_rhs_code (stmt)) | |
1216 && ! two_operators | |
1217 /* Do so only if the number of not successful permutes was nor more | 1282 /* Do so only if the number of not successful permutes was nor more |
1218 than a cut-ff as re-trying the recursive match on | 1283 than a cut-ff as re-trying the recursive match on |
1219 possibly each level of the tree would expose exponential | 1284 possibly each level of the tree would expose exponential |
1220 behavior. */ | 1285 behavior. */ |
1221 && *npermutes < 4) | 1286 && *npermutes < 4) |
1222 { | 1287 { |
1223 /* Verify if we can safely swap or if we committed to a specific | 1288 /* See whether we can swap the matching or the non-matching |
1224 operand order already. */ | 1289 stmt operands. */ |
1225 for (j = 0; j < group_size; ++j) | 1290 bool swap_not_matching = true; |
1226 if (!matches[j] | 1291 do |
1227 && (swap[j] != 0 | 1292 { |
1228 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmts[j])))) | 1293 for (j = 0; j < group_size; ++j) |
1229 { | 1294 { |
1230 if (dump_enabled_p ()) | 1295 if (matches[j] != !swap_not_matching) |
1231 { | 1296 continue; |
1232 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 1297 stmt_vec_info stmt_info = stmts[j]; |
1233 "Build SLP failed: cannot swap operands " | 1298 /* Verify if we can swap operands of this stmt. */ |
1234 "of shared stmt "); | 1299 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt); |
1235 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, | 1300 if (!stmt |
1236 stmts[j], 0); | 1301 || !commutative_tree_code (gimple_assign_rhs_code (stmt))) |
1237 } | 1302 { |
1238 goto fail; | 1303 if (!swap_not_matching) |
1239 } | 1304 goto fail; |
1305 swap_not_matching = false; | |
1306 break; | |
1307 } | |
1308 /* Verify if we can safely swap or if we committed to a | |
1309 specific operand order already. | |
1310 ??? Instead of modifying GIMPLE stmts here we could | |
1311 record whether we want to swap operands in the SLP | |
1312 node and temporarily do that when processing it | |
1313 (or wrap operand accessors in a helper). */ | |
1314 else if (swap[j] != 0 | |
1315 || STMT_VINFO_NUM_SLP_USES (stmt_info)) | |
1316 { | |
1317 if (!swap_not_matching) | |
1318 { | |
1319 if (dump_enabled_p ()) | |
1320 dump_printf_loc (MSG_MISSED_OPTIMIZATION, | |
1321 vect_location, | |
1322 "Build SLP failed: cannot swap " | |
1323 "operands of shared stmt %G", | |
1324 stmts[j]->stmt); | |
1325 goto fail; | |
1326 } | |
1327 swap_not_matching = false; | |
1328 break; | |
1329 } | |
1330 } | |
1331 } | |
1332 while (j != group_size); | |
1240 | 1333 |
1241 /* Swap mismatched definition stmts. */ | 1334 /* Swap mismatched definition stmts. */ |
1242 dump_printf_loc (MSG_NOTE, vect_location, | 1335 dump_printf_loc (MSG_NOTE, vect_location, |
1243 "Re-trying with swapped operands of stmts "); | 1336 "Re-trying with swapped operands of stmts "); |
1244 for (j = 0; j < group_size; ++j) | 1337 for (j = 0; j < group_size; ++j) |
1245 if (!matches[j]) | 1338 if (matches[j] == !swap_not_matching) |
1246 { | 1339 { |
1247 std::swap (oprnds_info[0]->def_stmts[j], | 1340 std::swap (oprnds_info[0]->def_stmts[j], |
1248 oprnds_info[1]->def_stmts[j]); | 1341 oprnds_info[1]->def_stmts[j]); |
1249 dump_printf (MSG_NOTE, "%d ", j); | 1342 dump_printf (MSG_NOTE, "%d ", j); |
1250 } | 1343 } |
1262 vect_get_slp_defs uses operand indexes and thus expects | 1355 vect_get_slp_defs uses operand indexes and thus expects |
1263 canonical operand order. This is also necessary even | 1356 canonical operand order. This is also necessary even |
1264 if we end up building the operand from scalars as | 1357 if we end up building the operand from scalars as |
1265 we'll continue to process swapped operand two. */ | 1358 we'll continue to process swapped operand two. */ |
1266 for (j = 0; j < group_size; ++j) | 1359 for (j = 0; j < group_size; ++j) |
1267 { | 1360 gimple_set_plf (stmts[j]->stmt, GF_PLF_1, false); |
1268 gimple *stmt = stmts[j]; | |
1269 gimple_set_plf (stmt, GF_PLF_1, false); | |
1270 } | |
1271 for (j = 0; j < group_size; ++j) | 1361 for (j = 0; j < group_size; ++j) |
1272 { | 1362 if (matches[j] == !swap_not_matching) |
1273 gimple *stmt = stmts[j]; | 1363 { |
1274 if (!matches[j]) | 1364 gassign *stmt = as_a <gassign *> (stmts[j]->stmt); |
1275 { | 1365 /* Avoid swapping operands twice. */ |
1276 /* Avoid swapping operands twice. */ | 1366 if (gimple_plf (stmt, GF_PLF_1)) |
1277 if (gimple_plf (stmt, GF_PLF_1)) | 1367 continue; |
1278 continue; | 1368 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt), |
1279 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt), | 1369 gimple_assign_rhs2_ptr (stmt)); |
1280 gimple_assign_rhs2_ptr (stmt)); | 1370 gimple_set_plf (stmt, GF_PLF_1, true); |
1281 gimple_set_plf (stmt, GF_PLF_1, true); | 1371 } |
1282 } | |
1283 } | |
1284 /* Verify we swap all duplicates or none. */ | 1372 /* Verify we swap all duplicates or none. */ |
1285 if (flag_checking) | 1373 if (flag_checking) |
1286 for (j = 0; j < group_size; ++j) | 1374 for (j = 0; j < group_size; ++j) |
1287 { | 1375 gcc_assert (gimple_plf (stmts[j]->stmt, GF_PLF_1) |
1288 gimple *stmt = stmts[j]; | 1376 == (matches[j] == !swap_not_matching)); |
1289 gcc_assert (gimple_plf (stmt, GF_PLF_1) == ! matches[j]); | |
1290 } | |
1291 | 1377 |
1292 /* If we have all children of child built up from scalars then | 1378 /* If we have all children of child built up from scalars then |
1293 just throw that away and build it up this node from scalars. */ | 1379 just throw that away and build it up this node from scalars. */ |
1294 if (!SLP_TREE_CHILDREN (child).is_empty () | 1380 if (!SLP_TREE_CHILDREN (child).is_empty () |
1295 /* ??? Rejecting patterns this way doesn't work. We'd have | 1381 /* ??? Rejecting patterns this way doesn't work. We'd have |
1296 to do extra work to cancel the pattern so the uses see the | 1382 to do extra work to cancel the pattern so the uses see the |
1297 scalar version. */ | 1383 scalar version. */ |
1298 && !is_pattern_stmt_p | 1384 && !is_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (child)[0])) |
1299 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0]))) | |
1300 { | 1385 { |
1301 unsigned int j; | 1386 unsigned int j; |
1302 slp_tree grandchild; | 1387 slp_tree grandchild; |
1303 | 1388 |
1304 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) | 1389 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) |
1308 { | 1393 { |
1309 /* Roll back. */ | 1394 /* Roll back. */ |
1310 this_loads.truncate (old_nloads); | 1395 this_loads.truncate (old_nloads); |
1311 this_tree_size = old_tree_size; | 1396 this_tree_size = old_tree_size; |
1312 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) | 1397 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) |
1313 vect_free_slp_tree (grandchild); | 1398 vect_free_slp_tree (grandchild, false); |
1314 SLP_TREE_CHILDREN (child).truncate (0); | 1399 SLP_TREE_CHILDREN (child).truncate (0); |
1315 | 1400 |
1316 dump_printf_loc (MSG_NOTE, vect_location, | 1401 dump_printf_loc (MSG_NOTE, vect_location, |
1317 "Building parent vector operands from " | 1402 "Building parent vector operands from " |
1318 "scalars instead\n"); | 1403 "scalars instead\n"); |
1332 } | 1417 } |
1333 | 1418 |
1334 fail: | 1419 fail: |
1335 gcc_assert (child == NULL); | 1420 gcc_assert (child == NULL); |
1336 FOR_EACH_VEC_ELT (children, j, child) | 1421 FOR_EACH_VEC_ELT (children, j, child) |
1337 vect_free_slp_tree (child); | 1422 vect_free_slp_tree (child, false); |
1338 vect_free_oprnd_info (oprnds_info); | 1423 vect_free_oprnd_info (oprnds_info); |
1339 return NULL; | 1424 return NULL; |
1340 } | 1425 } |
1341 | 1426 |
1342 vect_free_oprnd_info (oprnds_info); | 1427 vect_free_oprnd_info (oprnds_info); |
1353 } | 1438 } |
1354 | 1439 |
1355 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */ | 1440 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */ |
1356 | 1441 |
1357 static void | 1442 static void |
1358 vect_print_slp_tree (dump_flags_t dump_kind, location_t loc, slp_tree node) | 1443 vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc, |
1444 slp_tree node) | |
1359 { | 1445 { |
1360 int i; | 1446 int i; |
1361 gimple *stmt; | 1447 stmt_vec_info stmt_info; |
1362 slp_tree child; | 1448 slp_tree child; |
1363 | 1449 |
1364 dump_printf_loc (dump_kind, loc, "node%s\n", | 1450 dump_printf_loc (dump_kind, loc, "node%s\n", |
1365 SLP_TREE_DEF_TYPE (node) != vect_internal_def | 1451 SLP_TREE_DEF_TYPE (node) != vect_internal_def |
1366 ? " (external)" : ""); | 1452 ? " (external)" : ""); |
1367 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt) | 1453 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info) |
1368 { | 1454 dump_printf_loc (dump_kind, loc, "\tstmt %d %G", i, stmt_info->stmt); |
1369 dump_printf_loc (dump_kind, loc, "\tstmt %d ", i); | |
1370 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0); | |
1371 } | |
1372 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) | 1455 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |
1373 vect_print_slp_tree (dump_kind, loc, child); | 1456 vect_print_slp_tree (dump_kind, loc, child); |
1374 } | 1457 } |
1375 | 1458 |
1376 | 1459 |
1381 | 1464 |
1382 static void | 1465 static void |
1383 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j) | 1466 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j) |
1384 { | 1467 { |
1385 int i; | 1468 int i; |
1386 gimple *stmt; | 1469 stmt_vec_info stmt_info; |
1387 slp_tree child; | 1470 slp_tree child; |
1388 | 1471 |
1389 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def) | 1472 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def) |
1390 return; | 1473 return; |
1391 | 1474 |
1392 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt) | 1475 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info) |
1393 if (j < 0 || i == j) | 1476 if (j < 0 || i == j) |
1394 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark; | 1477 STMT_SLP_TYPE (stmt_info) = mark; |
1395 | 1478 |
1396 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) | 1479 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |
1397 vect_mark_slp_stmts (child, mark, j); | 1480 vect_mark_slp_stmts (child, mark, j); |
1398 } | 1481 } |
1399 | 1482 |
1402 | 1485 |
1403 static void | 1486 static void |
1404 vect_mark_slp_stmts_relevant (slp_tree node) | 1487 vect_mark_slp_stmts_relevant (slp_tree node) |
1405 { | 1488 { |
1406 int i; | 1489 int i; |
1407 gimple *stmt; | |
1408 stmt_vec_info stmt_info; | 1490 stmt_vec_info stmt_info; |
1409 slp_tree child; | 1491 slp_tree child; |
1410 | 1492 |
1411 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def) | 1493 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def) |
1412 return; | 1494 return; |
1413 | 1495 |
1414 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt) | 1496 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info) |
1415 { | 1497 { |
1416 stmt_info = vinfo_for_stmt (stmt); | |
1417 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info) | 1498 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info) |
1418 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope); | 1499 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope); |
1419 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope; | 1500 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope; |
1420 } | 1501 } |
1421 | 1502 |
1428 | 1509 |
1429 static void | 1510 static void |
1430 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size, | 1511 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size, |
1431 vec<unsigned> permutation) | 1512 vec<unsigned> permutation) |
1432 { | 1513 { |
1433 gimple *stmt; | 1514 stmt_vec_info stmt_info; |
1434 vec<gimple *> tmp_stmts; | 1515 vec<stmt_vec_info> tmp_stmts; |
1435 unsigned int i; | 1516 unsigned int i; |
1436 slp_tree child; | 1517 slp_tree child; |
1437 | 1518 |
1438 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) | 1519 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |
1439 vect_slp_rearrange_stmts (child, group_size, permutation); | 1520 vect_slp_rearrange_stmts (child, group_size, permutation); |
1440 | 1521 |
1441 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ()); | 1522 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ()); |
1442 tmp_stmts.create (group_size); | 1523 tmp_stmts.create (group_size); |
1443 tmp_stmts.quick_grow_cleared (group_size); | 1524 tmp_stmts.quick_grow_cleared (group_size); |
1444 | 1525 |
1445 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt) | 1526 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info) |
1446 tmp_stmts[permutation[i]] = stmt; | 1527 tmp_stmts[permutation[i]] = stmt_info; |
1447 | 1528 |
1448 SLP_TREE_SCALAR_STMTS (node).release (); | 1529 SLP_TREE_SCALAR_STMTS (node).release (); |
1449 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts; | 1530 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts; |
1450 } | 1531 } |
1451 | 1532 |
1499 according to the order of the loads. */ | 1580 according to the order of the loads. */ |
1500 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size, | 1581 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size, |
1501 node->load_permutation); | 1582 node->load_permutation); |
1502 | 1583 |
1503 /* We are done, no actual permutations need to be generated. */ | 1584 /* We are done, no actual permutations need to be generated. */ |
1504 unsigned int unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn); | 1585 poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn); |
1505 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) | 1586 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) |
1506 { | 1587 { |
1507 gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0]; | 1588 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0]; |
1508 first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)); | 1589 first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info); |
1509 /* But we have to keep those permutations that are required because | 1590 /* But we have to keep those permutations that are required because |
1510 of handling of gaps. */ | 1591 of handling of gaps. */ |
1511 if (unrolling_factor == 1 | 1592 if (known_eq (unrolling_factor, 1U) |
1512 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt)) | 1593 || (group_size == DR_GROUP_SIZE (first_stmt_info) |
1513 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)) | 1594 && DR_GROUP_GAP (first_stmt_info) == 0)) |
1514 SLP_TREE_LOAD_PERMUTATION (node).release (); | 1595 SLP_TREE_LOAD_PERMUTATION (node).release (); |
1515 else | 1596 else |
1516 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j) | 1597 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j) |
1517 SLP_TREE_LOAD_PERMUTATION (node)[j] = j; | 1598 SLP_TREE_LOAD_PERMUTATION (node)[j] = j; |
1518 } | 1599 } |
1527 vect_supported_load_permutation_p (slp_instance slp_instn) | 1608 vect_supported_load_permutation_p (slp_instance slp_instn) |
1528 { | 1609 { |
1529 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn); | 1610 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn); |
1530 unsigned int i, j, k, next; | 1611 unsigned int i, j, k, next; |
1531 slp_tree node; | 1612 slp_tree node; |
1532 gimple *stmt, *load, *next_load; | |
1533 | 1613 |
1534 if (dump_enabled_p ()) | 1614 if (dump_enabled_p ()) |
1535 { | 1615 { |
1536 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation "); | 1616 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation "); |
1537 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) | 1617 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) |
1556 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) | 1636 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) |
1557 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size) | 1637 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size) |
1558 return false; | 1638 return false; |
1559 | 1639 |
1560 node = SLP_INSTANCE_TREE (slp_instn); | 1640 node = SLP_INSTANCE_TREE (slp_instn); |
1561 stmt = SLP_TREE_SCALAR_STMTS (node)[0]; | 1641 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0]; |
1562 | 1642 |
1563 /* Reduction (there are no data-refs in the root). | 1643 /* Reduction (there are no data-refs in the root). |
1564 In reduction chain the order of the loads is not important. */ | 1644 In reduction chain the order of the loads is not important. */ |
1565 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)) | 1645 if (!STMT_VINFO_DATA_REF (stmt_info) |
1566 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))) | 1646 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info)) |
1567 vect_attempt_slp_rearrange_stmts (slp_instn); | 1647 vect_attempt_slp_rearrange_stmts (slp_instn); |
1568 | 1648 |
1569 /* In basic block vectorization we allow any subchain of an interleaving | 1649 /* In basic block vectorization we allow any subchain of an interleaving |
1570 chain. | 1650 chain. |
1571 FORNOW: not supported in loop SLP because of realignment compications. */ | 1651 FORNOW: not supported in loop SLP because of realignment compications. */ |
1572 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt))) | 1652 if (STMT_VINFO_BB_VINFO (stmt_info)) |
1573 { | 1653 { |
1574 /* Check whether the loads in an instance form a subchain and thus | 1654 /* Check whether the loads in an instance form a subchain and thus |
1575 no permutation is necessary. */ | 1655 no permutation is necessary. */ |
1576 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) | 1656 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) |
1577 { | 1657 { |
1578 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ()) | 1658 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ()) |
1579 continue; | 1659 continue; |
1580 bool subchain_p = true; | 1660 bool subchain_p = true; |
1581 next_load = NULL; | 1661 stmt_vec_info next_load_info = NULL; |
1582 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load) | 1662 stmt_vec_info load_info; |
1583 { | 1663 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info) |
1584 if (j != 0 | 1664 { |
1585 && (next_load != load | 1665 if (j != 0 |
1586 || GROUP_GAP (vinfo_for_stmt (load)) != 1)) | 1666 && (next_load_info != load_info |
1667 || DR_GROUP_GAP (load_info) != 1)) | |
1587 { | 1668 { |
1588 subchain_p = false; | 1669 subchain_p = false; |
1589 break; | 1670 break; |
1590 } | 1671 } |
1591 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load)); | 1672 next_load_info = DR_GROUP_NEXT_ELEMENT (load_info); |
1592 } | 1673 } |
1593 if (subchain_p) | 1674 if (subchain_p) |
1594 SLP_TREE_LOAD_PERMUTATION (node).release (); | 1675 SLP_TREE_LOAD_PERMUTATION (node).release (); |
1595 else | 1676 else |
1596 { | 1677 { |
1597 stmt_vec_info group_info | 1678 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (node)[0]; |
1598 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]); | 1679 group_info = DR_GROUP_FIRST_ELEMENT (group_info); |
1599 group_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info)); | 1680 unsigned HOST_WIDE_INT nunits; |
1600 unsigned nunits | |
1601 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (group_info)); | |
1602 unsigned k, maxk = 0; | 1681 unsigned k, maxk = 0; |
1603 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k) | 1682 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k) |
1604 if (k > maxk) | 1683 if (k > maxk) |
1605 maxk = k; | 1684 maxk = k; |
1606 /* In BB vectorization we may not actually use a loaded vector | 1685 /* In BB vectorization we may not actually use a loaded vector |
1607 accessing elements in excess of GROUP_SIZE. */ | 1686 accessing elements in excess of DR_GROUP_SIZE. */ |
1608 if (maxk >= (GROUP_SIZE (group_info) & ~(nunits - 1))) | 1687 tree vectype = STMT_VINFO_VECTYPE (group_info); |
1688 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits) | |
1689 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1))) | |
1609 { | 1690 { |
1610 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 1691 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
1611 "BB vectorization with gaps at the end of " | 1692 "BB vectorization with gaps at the end of " |
1612 "a load is not supported\n"); | 1693 "a load is not supported\n"); |
1613 return false; | 1694 return false; |
1633 conservative about the vectorization factor, there are permutations | 1714 conservative about the vectorization factor, there are permutations |
1634 that will use three vector inputs only starting from a specific factor | 1715 that will use three vector inputs only starting from a specific factor |
1635 and the vectorization factor is not yet final. | 1716 and the vectorization factor is not yet final. |
1636 ??? The SLP instance unrolling factor might not be the maximum one. */ | 1717 ??? The SLP instance unrolling factor might not be the maximum one. */ |
1637 unsigned n_perms; | 1718 unsigned n_perms; |
1638 unsigned test_vf | 1719 poly_uint64 test_vf |
1639 = least_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), | 1720 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), |
1640 LOOP_VINFO_VECT_FACTOR | 1721 LOOP_VINFO_VECT_FACTOR |
1641 (STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt)))); | 1722 (STMT_VINFO_LOOP_VINFO (stmt_info))); |
1642 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) | 1723 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) |
1643 if (node->load_permutation.exists () | 1724 if (node->load_permutation.exists () |
1644 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf, | 1725 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf, |
1645 slp_instn, true, &n_perms)) | 1726 slp_instn, true, &n_perms)) |
1646 return false; | 1727 return false; |
1649 } | 1730 } |
1650 | 1731 |
1651 | 1732 |
1652 /* Find the last store in SLP INSTANCE. */ | 1733 /* Find the last store in SLP INSTANCE. */ |
1653 | 1734 |
1654 gimple * | 1735 stmt_vec_info |
1655 vect_find_last_scalar_stmt_in_slp (slp_tree node) | 1736 vect_find_last_scalar_stmt_in_slp (slp_tree node) |
1656 { | 1737 { |
1657 gimple *last = NULL, *stmt; | 1738 stmt_vec_info last = NULL; |
1658 | 1739 stmt_vec_info stmt_vinfo; |
1659 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++) | 1740 |
1660 { | 1741 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt_vinfo); i++) |
1661 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); | 1742 { |
1662 if (is_pattern_stmt_p (stmt_vinfo)) | 1743 stmt_vinfo = vect_orig_stmt (stmt_vinfo); |
1663 last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last); | 1744 last = last ? get_later_stmt (stmt_vinfo, last) : stmt_vinfo; |
1664 else | |
1665 last = get_later_stmt (stmt, last); | |
1666 } | 1745 } |
1667 | 1746 |
1668 return last; | 1747 return last; |
1669 } | 1748 } |
1670 | 1749 |
1671 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */ | 1750 /* Splits a group of stores, currently beginning at FIRST_VINFO, into |
1672 | 1751 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE |
1673 static void | 1752 (also containing the first GROUP1_SIZE stmts, since stores are |
1674 vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node, | 1753 consecutive), the second containing the remainder. |
1675 stmt_vector_for_cost *prologue_cost_vec, | |
1676 stmt_vector_for_cost *body_cost_vec, | |
1677 unsigned ncopies_for_cost) | |
1678 { | |
1679 unsigned i, j; | |
1680 slp_tree child; | |
1681 gimple *stmt; | |
1682 stmt_vec_info stmt_info; | |
1683 tree lhs; | |
1684 | |
1685 /* Recurse down the SLP tree. */ | |
1686 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) | |
1687 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def) | |
1688 vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec, | |
1689 body_cost_vec, ncopies_for_cost); | |
1690 | |
1691 /* Look at the first scalar stmt to determine the cost. */ | |
1692 stmt = SLP_TREE_SCALAR_STMTS (node)[0]; | |
1693 stmt_info = vinfo_for_stmt (stmt); | |
1694 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) | |
1695 { | |
1696 vect_memory_access_type memory_access_type | |
1697 = (STMT_VINFO_STRIDED_P (stmt_info) | |
1698 ? VMAT_STRIDED_SLP | |
1699 : VMAT_CONTIGUOUS); | |
1700 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info))) | |
1701 vect_model_store_cost (stmt_info, ncopies_for_cost, | |
1702 memory_access_type, vect_uninitialized_def, | |
1703 node, prologue_cost_vec, body_cost_vec); | |
1704 else | |
1705 { | |
1706 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info))); | |
1707 if (SLP_TREE_LOAD_PERMUTATION (node).exists ()) | |
1708 { | |
1709 /* If the load is permuted then the alignment is determined by | |
1710 the first group element not by the first scalar stmt DR. */ | |
1711 stmt = GROUP_FIRST_ELEMENT (stmt_info); | |
1712 stmt_info = vinfo_for_stmt (stmt); | |
1713 /* Record the cost for the permutation. */ | |
1714 unsigned n_perms; | |
1715 vect_transform_slp_perm_load (node, vNULL, NULL, | |
1716 ncopies_for_cost, instance, true, | |
1717 &n_perms); | |
1718 record_stmt_cost (body_cost_vec, n_perms, vec_perm, | |
1719 stmt_info, 0, vect_body); | |
1720 unsigned nunits | |
1721 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)); | |
1722 /* And adjust the number of loads performed. This handles | |
1723 redundancies as well as loads that are later dead. */ | |
1724 auto_sbitmap perm (GROUP_SIZE (stmt_info)); | |
1725 bitmap_clear (perm); | |
1726 for (i = 0; i < SLP_TREE_LOAD_PERMUTATION (node).length (); ++i) | |
1727 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (node)[i]); | |
1728 ncopies_for_cost = 0; | |
1729 bool load_seen = false; | |
1730 for (i = 0; i < GROUP_SIZE (stmt_info); ++i) | |
1731 { | |
1732 if (i % nunits == 0) | |
1733 { | |
1734 if (load_seen) | |
1735 ncopies_for_cost++; | |
1736 load_seen = false; | |
1737 } | |
1738 if (bitmap_bit_p (perm, i)) | |
1739 load_seen = true; | |
1740 } | |
1741 if (load_seen) | |
1742 ncopies_for_cost++; | |
1743 gcc_assert (ncopies_for_cost | |
1744 <= (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info) | |
1745 + nunits - 1) / nunits); | |
1746 ncopies_for_cost *= SLP_INSTANCE_UNROLLING_FACTOR (instance); | |
1747 } | |
1748 /* Record the cost for the vector loads. */ | |
1749 vect_model_load_cost (stmt_info, ncopies_for_cost, | |
1750 memory_access_type, node, prologue_cost_vec, | |
1751 body_cost_vec); | |
1752 return; | |
1753 } | |
1754 } | |
1755 else if (STMT_VINFO_TYPE (stmt_info) == induc_vec_info_type) | |
1756 { | |
1757 /* ncopies_for_cost is the number of IVs we generate. */ | |
1758 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt, | |
1759 stmt_info, 0, vect_body); | |
1760 | |
1761 /* Prologue cost for the initial values and step vector. */ | |
1762 record_stmt_cost (prologue_cost_vec, ncopies_for_cost, | |
1763 CONSTANT_CLASS_P | |
1764 (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED | |
1765 (stmt_info)) | |
1766 ? vector_load : vec_construct, | |
1767 stmt_info, 0, vect_prologue); | |
1768 record_stmt_cost (prologue_cost_vec, 1, | |
1769 CONSTANT_CLASS_P | |
1770 (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info)) | |
1771 ? vector_load : vec_construct, | |
1772 stmt_info, 0, vect_prologue); | |
1773 | |
1774 /* ??? No easy way to get at the actual number of vector stmts | |
1775 to be geneated and thus the derived IVs. */ | |
1776 } | |
1777 else | |
1778 { | |
1779 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt, | |
1780 stmt_info, 0, vect_body); | |
1781 if (SLP_TREE_TWO_OPERATORS (node)) | |
1782 { | |
1783 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt, | |
1784 stmt_info, 0, vect_body); | |
1785 record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm, | |
1786 stmt_info, 0, vect_body); | |
1787 } | |
1788 } | |
1789 | |
1790 /* Push SLP node def-type to stmts. */ | |
1791 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) | |
1792 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def) | |
1793 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt) | |
1794 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child); | |
1795 | |
1796 /* Scan operands and account for prologue cost of constants/externals. | |
1797 ??? This over-estimates cost for multiple uses and should be | |
1798 re-engineered. */ | |
1799 stmt = SLP_TREE_SCALAR_STMTS (node)[0]; | |
1800 lhs = gimple_get_lhs (stmt); | |
1801 for (i = 0; i < gimple_num_ops (stmt); ++i) | |
1802 { | |
1803 tree op = gimple_op (stmt, i); | |
1804 gimple *def_stmt; | |
1805 enum vect_def_type dt; | |
1806 if (!op || op == lhs) | |
1807 continue; | |
1808 if (vect_is_simple_use (op, stmt_info->vinfo, &def_stmt, &dt)) | |
1809 { | |
1810 /* Without looking at the actual initializer a vector of | |
1811 constants can be implemented as load from the constant pool. | |
1812 ??? We need to pass down stmt_info for a vector type | |
1813 even if it points to the wrong stmt. */ | |
1814 if (dt == vect_constant_def) | |
1815 record_stmt_cost (prologue_cost_vec, 1, vector_load, | |
1816 stmt_info, 0, vect_prologue); | |
1817 else if (dt == vect_external_def) | |
1818 record_stmt_cost (prologue_cost_vec, 1, vec_construct, | |
1819 stmt_info, 0, vect_prologue); | |
1820 } | |
1821 } | |
1822 | |
1823 /* Restore stmt def-types. */ | |
1824 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) | |
1825 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def) | |
1826 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt) | |
1827 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def; | |
1828 } | |
1829 | |
1830 /* Compute the cost for the SLP instance INSTANCE. */ | |
1831 | |
1832 static void | |
1833 vect_analyze_slp_cost (slp_instance instance, void *data) | |
1834 { | |
1835 stmt_vector_for_cost body_cost_vec, prologue_cost_vec; | |
1836 unsigned ncopies_for_cost; | |
1837 stmt_info_for_cost *si; | |
1838 unsigned i; | |
1839 | |
1840 if (dump_enabled_p ()) | |
1841 dump_printf_loc (MSG_NOTE, vect_location, | |
1842 "=== vect_analyze_slp_cost ===\n"); | |
1843 | |
1844 /* Calculate the number of vector stmts to create based on the unrolling | |
1845 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is | |
1846 GROUP_SIZE / NUNITS otherwise. */ | |
1847 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance); | |
1848 slp_tree node = SLP_INSTANCE_TREE (instance); | |
1849 stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]); | |
1850 /* Adjust the group_size by the vectorization factor which is always one | |
1851 for basic-block vectorization. */ | |
1852 if (STMT_VINFO_LOOP_VINFO (stmt_info)) | |
1853 group_size *= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info)); | |
1854 unsigned nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)); | |
1855 /* For reductions look at a reduction operand in case the reduction | |
1856 operation is widening like DOT_PROD or SAD. */ | |
1857 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)) | |
1858 { | |
1859 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]; | |
1860 switch (gimple_assign_rhs_code (stmt)) | |
1861 { | |
1862 case DOT_PROD_EXPR: | |
1863 case SAD_EXPR: | |
1864 nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type | |
1865 (TREE_TYPE (gimple_assign_rhs1 (stmt)))); | |
1866 break; | |
1867 default:; | |
1868 } | |
1869 } | |
1870 ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits; | |
1871 | |
1872 prologue_cost_vec.create (10); | |
1873 body_cost_vec.create (10); | |
1874 vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance), | |
1875 &prologue_cost_vec, &body_cost_vec, | |
1876 ncopies_for_cost); | |
1877 | |
1878 /* Record the prologue costs, which were delayed until we were | |
1879 sure that SLP was successful. */ | |
1880 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si) | |
1881 { | |
1882 struct _stmt_vec_info *stmt_info | |
1883 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL; | |
1884 (void) add_stmt_cost (data, si->count, si->kind, stmt_info, | |
1885 si->misalign, vect_prologue); | |
1886 } | |
1887 | |
1888 /* Record the instance's instructions in the target cost model. */ | |
1889 FOR_EACH_VEC_ELT (body_cost_vec, i, si) | |
1890 { | |
1891 struct _stmt_vec_info *stmt_info | |
1892 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL; | |
1893 (void) add_stmt_cost (data, si->count, si->kind, stmt_info, | |
1894 si->misalign, vect_body); | |
1895 } | |
1896 | |
1897 prologue_cost_vec.release (); | |
1898 body_cost_vec.release (); | |
1899 } | |
1900 | |
1901 /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups: | |
1902 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing | |
1903 the first GROUP1_SIZE stmts, since stores are consecutive), the second | |
1904 containing the remainder. | |
1905 Return the first stmt in the second group. */ | 1754 Return the first stmt in the second group. */ |
1906 | 1755 |
1907 static gimple * | 1756 static stmt_vec_info |
1908 vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size) | 1757 vect_split_slp_store_group (stmt_vec_info first_vinfo, unsigned group1_size) |
1909 { | 1758 { |
1910 stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt); | 1759 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_vinfo); |
1911 gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo) == first_stmt); | |
1912 gcc_assert (group1_size > 0); | 1760 gcc_assert (group1_size > 0); |
1913 int group2_size = GROUP_SIZE (first_vinfo) - group1_size; | 1761 int group2_size = DR_GROUP_SIZE (first_vinfo) - group1_size; |
1914 gcc_assert (group2_size > 0); | 1762 gcc_assert (group2_size > 0); |
1915 GROUP_SIZE (first_vinfo) = group1_size; | 1763 DR_GROUP_SIZE (first_vinfo) = group1_size; |
1916 | 1764 |
1917 gimple *stmt = first_stmt; | 1765 stmt_vec_info stmt_info = first_vinfo; |
1918 for (unsigned i = group1_size; i > 1; i--) | 1766 for (unsigned i = group1_size; i > 1; i--) |
1919 { | 1767 { |
1920 stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)); | 1768 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info); |
1921 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1); | 1769 gcc_assert (DR_GROUP_GAP (stmt_info) == 1); |
1922 } | 1770 } |
1923 /* STMT is now the last element of the first group. */ | 1771 /* STMT is now the last element of the first group. */ |
1924 gimple *group2 = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)); | 1772 stmt_vec_info group2 = DR_GROUP_NEXT_ELEMENT (stmt_info); |
1925 GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0; | 1773 DR_GROUP_NEXT_ELEMENT (stmt_info) = 0; |
1926 | 1774 |
1927 GROUP_SIZE (vinfo_for_stmt (group2)) = group2_size; | 1775 DR_GROUP_SIZE (group2) = group2_size; |
1928 for (stmt = group2; stmt; stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt))) | 1776 for (stmt_info = group2; stmt_info; |
1929 { | 1777 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info)) |
1930 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = group2; | 1778 { |
1931 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1); | 1779 DR_GROUP_FIRST_ELEMENT (stmt_info) = group2; |
1932 } | 1780 gcc_assert (DR_GROUP_GAP (stmt_info) == 1); |
1933 | 1781 } |
1934 /* For the second group, the GROUP_GAP is that before the original group, | 1782 |
1783 /* For the second group, the DR_GROUP_GAP is that before the original group, | |
1935 plus skipping over the first vector. */ | 1784 plus skipping over the first vector. */ |
1936 GROUP_GAP (vinfo_for_stmt (group2)) = | 1785 DR_GROUP_GAP (group2) = DR_GROUP_GAP (first_vinfo) + group1_size; |
1937 GROUP_GAP (first_vinfo) + group1_size; | 1786 |
1938 | 1787 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */ |
1939 /* GROUP_GAP of the first group now has to skip over the second group too. */ | 1788 DR_GROUP_GAP (first_vinfo) += group2_size; |
1940 GROUP_GAP (first_vinfo) += group2_size; | |
1941 | 1789 |
1942 if (dump_enabled_p ()) | 1790 if (dump_enabled_p ()) |
1943 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n", | 1791 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n", |
1944 group1_size, group2_size); | 1792 group1_size, group2_size); |
1945 | 1793 |
1946 return group2; | 1794 return group2; |
1947 } | 1795 } |
1948 | 1796 |
1797 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE | |
1798 statements and a vector of NUNITS elements. */ | |
1799 | |
1800 static poly_uint64 | |
1801 calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size) | |
1802 { | |
1803 return exact_div (common_multiple (nunits, group_size), group_size); | |
1804 } | |
1805 | |
1949 /* Analyze an SLP instance starting from a group of grouped stores. Call | 1806 /* Analyze an SLP instance starting from a group of grouped stores. Call |
1950 vect_build_slp_tree to build a tree of packed stmts if possible. | 1807 vect_build_slp_tree to build a tree of packed stmts if possible. |
1951 Return FALSE if it's impossible to SLP any stmt in the loop. */ | 1808 Return FALSE if it's impossible to SLP any stmt in the loop. */ |
1952 | 1809 |
1953 static bool | 1810 static bool |
1954 vect_analyze_slp_instance (vec_info *vinfo, | 1811 vect_analyze_slp_instance (vec_info *vinfo, |
1955 gimple *stmt, unsigned max_tree_size) | 1812 stmt_vec_info stmt_info, unsigned max_tree_size) |
1956 { | 1813 { |
1957 slp_instance new_instance; | 1814 slp_instance new_instance; |
1958 slp_tree node; | 1815 slp_tree node; |
1959 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt)); | 1816 unsigned int group_size; |
1960 unsigned int unrolling_factor = 1, nunits; | |
1961 tree vectype, scalar_type = NULL_TREE; | 1817 tree vectype, scalar_type = NULL_TREE; |
1962 gimple *next; | |
1963 unsigned int i; | 1818 unsigned int i; |
1964 unsigned int max_nunits = 0; | |
1965 vec<slp_tree> loads; | 1819 vec<slp_tree> loads; |
1966 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)); | 1820 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info); |
1967 vec<gimple *> scalar_stmts; | 1821 vec<stmt_vec_info> scalar_stmts; |
1968 | 1822 |
1969 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))) | 1823 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) |
1970 { | 1824 { |
1971 if (dr) | 1825 scalar_type = TREE_TYPE (DR_REF (dr)); |
1972 { | 1826 vectype = get_vectype_for_scalar_type (scalar_type); |
1973 scalar_type = TREE_TYPE (DR_REF (dr)); | 1827 group_size = DR_GROUP_SIZE (stmt_info); |
1974 vectype = get_vectype_for_scalar_type (scalar_type); | 1828 } |
1975 } | 1829 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info)) |
1976 else | 1830 { |
1977 { | 1831 gcc_assert (is_a <loop_vec_info> (vinfo)); |
1978 gcc_assert (is_a <loop_vec_info> (vinfo)); | 1832 vectype = STMT_VINFO_VECTYPE (stmt_info); |
1979 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt)); | 1833 group_size = REDUC_GROUP_SIZE (stmt_info); |
1980 } | |
1981 | |
1982 group_size = GROUP_SIZE (vinfo_for_stmt (stmt)); | |
1983 } | 1834 } |
1984 else | 1835 else |
1985 { | 1836 { |
1986 gcc_assert (is_a <loop_vec_info> (vinfo)); | 1837 gcc_assert (is_a <loop_vec_info> (vinfo)); |
1987 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt)); | 1838 vectype = STMT_VINFO_VECTYPE (stmt_info); |
1988 group_size = as_a <loop_vec_info> (vinfo)->reductions.length (); | 1839 group_size = as_a <loop_vec_info> (vinfo)->reductions.length (); |
1989 } | 1840 } |
1990 | 1841 |
1991 if (!vectype) | 1842 if (!vectype) |
1992 { | 1843 { |
1993 if (dump_enabled_p ()) | 1844 if (dump_enabled_p ()) |
1994 { | 1845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
1995 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 1846 "Build SLP failed: unsupported data-type %T\n", |
1996 "Build SLP failed: unsupported data-type "); | 1847 scalar_type); |
1997 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type); | |
1998 dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); | |
1999 } | |
2000 | 1848 |
2001 return false; | 1849 return false; |
2002 } | 1850 } |
2003 nunits = TYPE_VECTOR_SUBPARTS (vectype); | 1851 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); |
2004 | 1852 |
2005 /* Create a node (a root of the SLP tree) for the packed grouped stores. */ | 1853 /* Create a node (a root of the SLP tree) for the packed grouped stores. */ |
2006 scalar_stmts.create (group_size); | 1854 scalar_stmts.create (group_size); |
2007 next = stmt; | 1855 stmt_vec_info next_info = stmt_info; |
2008 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))) | 1856 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) |
2009 { | 1857 { |
2010 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */ | 1858 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */ |
2011 while (next) | 1859 while (next_info) |
2012 { | 1860 { |
2013 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next)) | 1861 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info)); |
2014 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next))) | 1862 next_info = DR_GROUP_NEXT_ELEMENT (next_info); |
2015 scalar_stmts.safe_push ( | 1863 } |
2016 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next))); | 1864 } |
2017 else | 1865 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info)) |
2018 scalar_stmts.safe_push (next); | 1866 { |
2019 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next)); | 1867 /* Collect the reduction stmts and store them in |
1868 SLP_TREE_SCALAR_STMTS. */ | |
1869 while (next_info) | |
1870 { | |
1871 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info)); | |
1872 next_info = REDUC_GROUP_NEXT_ELEMENT (next_info); | |
2020 } | 1873 } |
2021 /* Mark the first element of the reduction chain as reduction to properly | 1874 /* Mark the first element of the reduction chain as reduction to properly |
2022 transform the node. In the reduction analysis phase only the last | 1875 transform the node. In the reduction analysis phase only the last |
2023 element of the chain is marked as reduction. */ | 1876 element of the chain is marked as reduction. */ |
2024 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))) | 1877 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def; |
2025 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def; | |
2026 } | 1878 } |
2027 else | 1879 else |
2028 { | 1880 { |
2029 /* Collect reduction statements. */ | 1881 /* Collect reduction statements. */ |
2030 vec<gimple *> reductions = as_a <loop_vec_info> (vinfo)->reductions; | 1882 vec<stmt_vec_info> reductions = as_a <loop_vec_info> (vinfo)->reductions; |
2031 for (i = 0; reductions.iterate (i, &next); i++) | 1883 for (i = 0; reductions.iterate (i, &next_info); i++) |
2032 scalar_stmts.safe_push (next); | 1884 scalar_stmts.safe_push (next_info); |
2033 } | 1885 } |
2034 | 1886 |
2035 loads.create (group_size); | 1887 loads.create (group_size); |
2036 | 1888 |
2037 /* Build the tree for the SLP instance. */ | 1889 /* Build the tree for the SLP instance. */ |
2038 bool *matches = XALLOCAVEC (bool, group_size); | 1890 bool *matches = XALLOCAVEC (bool, group_size); |
2039 unsigned npermutes = 0; | 1891 unsigned npermutes = 0; |
2040 bst_fail = new hash_set <vec <gimple *>, bst_traits> (); | 1892 bst_fail = new scalar_stmts_set_t (); |
1893 poly_uint64 max_nunits = nunits; | |
2041 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size, | 1894 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size, |
2042 &max_nunits, &loads, matches, &npermutes, | 1895 &max_nunits, &loads, matches, &npermutes, |
2043 NULL, max_tree_size); | 1896 NULL, max_tree_size); |
2044 delete bst_fail; | 1897 delete bst_fail; |
2045 if (node != NULL) | 1898 if (node != NULL) |
2046 { | 1899 { |
2047 /* Calculate the unrolling factor based on the smallest type. */ | 1900 /* Calculate the unrolling factor based on the smallest type. */ |
2048 unrolling_factor | 1901 poly_uint64 unrolling_factor |
2049 = least_common_multiple (max_nunits, group_size) / group_size; | 1902 = calculate_unrolling_factor (max_nunits, group_size); |
2050 | 1903 |
2051 if (unrolling_factor != 1 | 1904 if (maybe_ne (unrolling_factor, 1U) |
2052 && is_a <bb_vec_info> (vinfo)) | 1905 && is_a <bb_vec_info> (vinfo)) |
2053 { | 1906 { |
2054 | 1907 unsigned HOST_WIDE_INT const_max_nunits; |
2055 if (max_nunits > group_size) | 1908 if (!max_nunits.is_constant (&const_max_nunits) |
2056 { | 1909 || const_max_nunits > group_size) |
2057 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 1910 { |
2058 "Build SLP failed: store group " | 1911 if (dump_enabled_p ()) |
2059 "size not a multiple of the vector size " | 1912 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
2060 "in basic block SLP\n"); | 1913 "Build SLP failed: store group " |
2061 vect_free_slp_tree (node); | 1914 "size not a multiple of the vector size " |
2062 loads.release (); | 1915 "in basic block SLP\n"); |
2063 return false; | 1916 vect_free_slp_tree (node, false); |
2064 } | 1917 loads.release (); |
1918 return false; | |
1919 } | |
2065 /* Fatal mismatch. */ | 1920 /* Fatal mismatch. */ |
2066 matches[group_size/max_nunits * max_nunits] = false; | 1921 matches[group_size / const_max_nunits * const_max_nunits] = false; |
2067 vect_free_slp_tree (node); | 1922 vect_free_slp_tree (node, false); |
2068 loads.release (); | 1923 loads.release (); |
2069 } | 1924 } |
2070 else | 1925 else |
2071 { | 1926 { |
2072 /* Create a new SLP instance. */ | 1927 /* Create a new SLP instance. */ |
2081 bool loads_permuted = false; | 1936 bool loads_permuted = false; |
2082 FOR_EACH_VEC_ELT (loads, i, load_node) | 1937 FOR_EACH_VEC_ELT (loads, i, load_node) |
2083 { | 1938 { |
2084 vec<unsigned> load_permutation; | 1939 vec<unsigned> load_permutation; |
2085 int j; | 1940 int j; |
2086 gimple *load, *first_stmt; | 1941 stmt_vec_info load_info; |
2087 bool this_load_permuted = false; | 1942 bool this_load_permuted = false; |
2088 load_permutation.create (group_size); | 1943 load_permutation.create (group_size); |
2089 first_stmt = GROUP_FIRST_ELEMENT | 1944 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT |
2090 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0])); | 1945 (SLP_TREE_SCALAR_STMTS (load_node)[0]); |
2091 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load) | 1946 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info) |
2092 { | 1947 { |
2093 int load_place = vect_get_place_in_interleaving_chain | 1948 int load_place = vect_get_place_in_interleaving_chain |
2094 (load, first_stmt); | 1949 (load_info, first_stmt_info); |
2095 gcc_assert (load_place != -1); | 1950 gcc_assert (load_place != -1); |
2096 if (load_place != j) | 1951 if (load_place != j) |
2097 this_load_permuted = true; | 1952 this_load_permuted = true; |
2098 load_permutation.safe_push (load_place); | 1953 load_permutation.safe_push (load_place); |
2099 } | 1954 } |
2100 if (!this_load_permuted | 1955 if (!this_load_permuted |
2101 /* The load requires permutation when unrolling exposes | 1956 /* The load requires permutation when unrolling exposes |
2102 a gap either because the group is larger than the SLP | 1957 a gap either because the group is larger than the SLP |
2103 group-size or because there is a gap between the groups. */ | 1958 group-size or because there is a gap between the groups. */ |
2104 && (unrolling_factor == 1 | 1959 && (known_eq (unrolling_factor, 1U) |
2105 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt)) | 1960 || (group_size == DR_GROUP_SIZE (first_stmt_info) |
2106 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0))) | 1961 && DR_GROUP_GAP (first_stmt_info) == 0))) |
2107 { | 1962 { |
2108 load_permutation.release (); | 1963 load_permutation.release (); |
2109 continue; | 1964 continue; |
2110 } | 1965 } |
2111 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation; | 1966 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation; |
2115 if (loads_permuted) | 1970 if (loads_permuted) |
2116 { | 1971 { |
2117 if (!vect_supported_load_permutation_p (new_instance)) | 1972 if (!vect_supported_load_permutation_p (new_instance)) |
2118 { | 1973 { |
2119 if (dump_enabled_p ()) | 1974 if (dump_enabled_p ()) |
2120 { | 1975 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
2121 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 1976 "Build SLP failed: unsupported load " |
2122 "Build SLP failed: unsupported load " | 1977 "permutation %G", stmt_info->stmt); |
2123 "permutation "); | 1978 vect_free_slp_instance (new_instance, false); |
2124 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, | |
2125 TDF_SLIM, stmt, 0); | |
2126 } | |
2127 vect_free_slp_instance (new_instance); | |
2128 return false; | 1979 return false; |
2129 } | 1980 } |
2130 } | 1981 } |
2131 | 1982 |
2132 /* If the loads and stores can be handled with load/store-lan | 1983 /* If the loads and stores can be handled with load/store-lan |
2133 instructions do not generate this SLP instance. */ | 1984 instructions do not generate this SLP instance. */ |
2134 if (is_a <loop_vec_info> (vinfo) | 1985 if (is_a <loop_vec_info> (vinfo) |
2135 && loads_permuted | 1986 && loads_permuted |
2136 && dr && vect_store_lanes_supported (vectype, group_size)) | 1987 && dr && vect_store_lanes_supported (vectype, group_size, false)) |
2137 { | 1988 { |
2138 slp_tree load_node; | 1989 slp_tree load_node; |
2139 FOR_EACH_VEC_ELT (loads, i, load_node) | 1990 FOR_EACH_VEC_ELT (loads, i, load_node) |
2140 { | 1991 { |
2141 gimple *first_stmt = GROUP_FIRST_ELEMENT | 1992 stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT |
2142 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0])); | 1993 (SLP_TREE_SCALAR_STMTS (load_node)[0]); |
2143 stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt); | 1994 /* Use SLP for strided accesses (or if we can't load-lanes). */ |
2144 /* Use SLP for strided accesses (or if we | |
2145 can't load-lanes). */ | |
2146 if (STMT_VINFO_STRIDED_P (stmt_vinfo) | 1995 if (STMT_VINFO_STRIDED_P (stmt_vinfo) |
2147 || ! vect_load_lanes_supported | 1996 || ! vect_load_lanes_supported |
2148 (STMT_VINFO_VECTYPE (stmt_vinfo), | 1997 (STMT_VINFO_VECTYPE (stmt_vinfo), |
2149 GROUP_SIZE (stmt_vinfo))) | 1998 DR_GROUP_SIZE (stmt_vinfo), false)) |
2150 break; | 1999 break; |
2151 } | 2000 } |
2152 if (i == loads.length ()) | 2001 if (i == loads.length ()) |
2153 { | 2002 { |
2154 if (dump_enabled_p ()) | 2003 if (dump_enabled_p ()) |
2155 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 2004 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
2156 "Built SLP cancelled: can use " | 2005 "Built SLP cancelled: can use " |
2157 "load/store-lanes\n"); | 2006 "load/store-lanes\n"); |
2158 vect_free_slp_instance (new_instance); | 2007 vect_free_slp_instance (new_instance, false); |
2159 return false; | 2008 return false; |
2160 } | 2009 } |
2161 } | 2010 } |
2162 | 2011 |
2163 vinfo->slp_instances.safe_push (new_instance); | 2012 vinfo->slp_instances.safe_push (new_instance); |
2180 loads.release (); | 2029 loads.release (); |
2181 } | 2030 } |
2182 | 2031 |
2183 /* For basic block SLP, try to break the group up into multiples of the | 2032 /* For basic block SLP, try to break the group up into multiples of the |
2184 vector size. */ | 2033 vector size. */ |
2034 unsigned HOST_WIDE_INT const_nunits; | |
2185 if (is_a <bb_vec_info> (vinfo) | 2035 if (is_a <bb_vec_info> (vinfo) |
2186 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) | 2036 && STMT_VINFO_GROUPED_ACCESS (stmt_info) |
2187 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))) | 2037 && DR_GROUP_FIRST_ELEMENT (stmt_info) |
2038 && nunits.is_constant (&const_nunits)) | |
2188 { | 2039 { |
2189 /* We consider breaking the group only on VF boundaries from the existing | 2040 /* We consider breaking the group only on VF boundaries from the existing |
2190 start. */ | 2041 start. */ |
2191 for (i = 0; i < group_size; i++) | 2042 for (i = 0; i < group_size; i++) |
2192 if (!matches[i]) break; | 2043 if (!matches[i]) break; |
2193 | 2044 |
2194 if (i >= nunits && i < group_size) | 2045 if (i >= const_nunits && i < group_size) |
2195 { | 2046 { |
2196 /* Split into two groups at the first vector boundary before i. */ | 2047 /* Split into two groups at the first vector boundary before i. */ |
2197 gcc_assert ((nunits & (nunits - 1)) == 0); | 2048 gcc_assert ((const_nunits & (const_nunits - 1)) == 0); |
2198 unsigned group1_size = i & ~(nunits - 1); | 2049 unsigned group1_size = i & ~(const_nunits - 1); |
2199 | 2050 |
2200 gimple *rest = vect_split_slp_store_group (stmt, group1_size); | 2051 stmt_vec_info rest = vect_split_slp_store_group (stmt_info, |
2201 bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size); | 2052 group1_size); |
2053 bool res = vect_analyze_slp_instance (vinfo, stmt_info, | |
2054 max_tree_size); | |
2202 /* If the first non-match was in the middle of a vector, | 2055 /* If the first non-match was in the middle of a vector, |
2203 skip the rest of that vector. */ | 2056 skip the rest of that vector. */ |
2204 if (group1_size < i) | 2057 if (group1_size < i) |
2205 { | 2058 { |
2206 i = group1_size + nunits; | 2059 i = group1_size + const_nunits; |
2207 if (i < group_size) | 2060 if (i < group_size) |
2208 rest = vect_split_slp_store_group (rest, nunits); | 2061 rest = vect_split_slp_store_group (rest, const_nunits); |
2209 } | 2062 } |
2210 if (i < group_size) | 2063 if (i < group_size) |
2211 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size); | 2064 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size); |
2212 return res; | 2065 return res; |
2213 } | 2066 } |
2220 | 2073 |
2221 | 2074 |
2222 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP | 2075 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP |
2223 trees of packed scalar stmts if SLP is possible. */ | 2076 trees of packed scalar stmts if SLP is possible. */ |
2224 | 2077 |
2225 bool | 2078 opt_result |
2226 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size) | 2079 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size) |
2227 { | 2080 { |
2228 unsigned int i; | 2081 unsigned int i; |
2229 gimple *first_element; | 2082 stmt_vec_info first_element; |
2230 | 2083 |
2231 if (dump_enabled_p ()) | 2084 DUMP_VECT_SCOPE ("vect_analyze_slp"); |
2232 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n"); | |
2233 | 2085 |
2234 /* Find SLP sequences starting from groups of grouped stores. */ | 2086 /* Find SLP sequences starting from groups of grouped stores. */ |
2235 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element) | 2087 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element) |
2236 vect_analyze_slp_instance (vinfo, first_element, max_tree_size); | 2088 vect_analyze_slp_instance (vinfo, first_element, max_tree_size); |
2237 | 2089 |
2243 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element) | 2095 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element) |
2244 if (! vect_analyze_slp_instance (vinfo, first_element, | 2096 if (! vect_analyze_slp_instance (vinfo, first_element, |
2245 max_tree_size)) | 2097 max_tree_size)) |
2246 { | 2098 { |
2247 /* Dissolve reduction chain group. */ | 2099 /* Dissolve reduction chain group. */ |
2248 gimple *next, *stmt = first_element; | 2100 stmt_vec_info vinfo = first_element; |
2249 while (stmt) | 2101 while (vinfo) |
2250 { | 2102 { |
2251 stmt_vec_info vinfo = vinfo_for_stmt (stmt); | 2103 stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (vinfo); |
2252 next = GROUP_NEXT_ELEMENT (vinfo); | 2104 REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL; |
2253 GROUP_FIRST_ELEMENT (vinfo) = NULL; | 2105 REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL; |
2254 GROUP_NEXT_ELEMENT (vinfo) = NULL; | 2106 vinfo = next; |
2255 stmt = next; | |
2256 } | 2107 } |
2257 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element)) | 2108 STMT_VINFO_DEF_TYPE (first_element) = vect_internal_def; |
2258 = vect_internal_def; | |
2259 } | 2109 } |
2260 } | 2110 } |
2261 | 2111 |
2262 /* Find SLP sequences starting from groups of reductions. */ | 2112 /* Find SLP sequences starting from groups of reductions. */ |
2263 if (loop_vinfo->reductions.length () > 1) | 2113 if (loop_vinfo->reductions.length () > 1) |
2264 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0], | 2114 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0], |
2265 max_tree_size); | 2115 max_tree_size); |
2266 } | 2116 } |
2267 | 2117 |
2268 return true; | 2118 return opt_result::success (); |
2269 } | 2119 } |
2270 | 2120 |
2271 | 2121 |
2272 /* For each possible SLP instance decide whether to SLP it and calculate overall | 2122 /* For each possible SLP instance decide whether to SLP it and calculate overall |
2273 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at | 2123 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at |
2274 least one instance. */ | 2124 least one instance. */ |
2275 | 2125 |
2276 bool | 2126 bool |
2277 vect_make_slp_decision (loop_vec_info loop_vinfo) | 2127 vect_make_slp_decision (loop_vec_info loop_vinfo) |
2278 { | 2128 { |
2279 unsigned int i, unrolling_factor = 1; | 2129 unsigned int i; |
2130 poly_uint64 unrolling_factor = 1; | |
2280 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo); | 2131 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo); |
2281 slp_instance instance; | 2132 slp_instance instance; |
2282 int decided_to_slp = 0; | 2133 int decided_to_slp = 0; |
2283 | 2134 |
2284 if (dump_enabled_p ()) | 2135 DUMP_VECT_SCOPE ("vect_make_slp_decision"); |
2285 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ===" | |
2286 "\n"); | |
2287 | 2136 |
2288 FOR_EACH_VEC_ELT (slp_instances, i, instance) | 2137 FOR_EACH_VEC_ELT (slp_instances, i, instance) |
2289 { | 2138 { |
2290 /* FORNOW: SLP if you can. */ | 2139 /* FORNOW: SLP if you can. */ |
2291 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance)) | 2140 /* All unroll factors have the form current_vector_size * X for some |
2292 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance); | 2141 rational X, so they must have a common multiple. */ |
2142 unrolling_factor | |
2143 = force_common_multiple (unrolling_factor, | |
2144 SLP_INSTANCE_UNROLLING_FACTOR (instance)); | |
2293 | 2145 |
2294 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we | 2146 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we |
2295 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and | 2147 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and |
2296 loop-based vectorization. Such stmts will be marked as HYBRID. */ | 2148 loop-based vectorization. Such stmts will be marked as HYBRID. */ |
2297 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1); | 2149 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1); |
2299 } | 2151 } |
2300 | 2152 |
2301 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor; | 2153 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor; |
2302 | 2154 |
2303 if (decided_to_slp && dump_enabled_p ()) | 2155 if (decided_to_slp && dump_enabled_p ()) |
2304 dump_printf_loc (MSG_NOTE, vect_location, | 2156 { |
2305 "Decided to SLP %d instances. Unrolling factor %d\n", | 2157 dump_printf_loc (MSG_NOTE, vect_location, |
2306 decided_to_slp, unrolling_factor); | 2158 "Decided to SLP %d instances. Unrolling factor ", |
2159 decided_to_slp); | |
2160 dump_dec (MSG_NOTE, unrolling_factor); | |
2161 dump_printf (MSG_NOTE, "\n"); | |
2162 } | |
2307 | 2163 |
2308 return (decided_to_slp > 0); | 2164 return (decided_to_slp > 0); |
2309 } | 2165 } |
2310 | 2166 |
2311 | 2167 |
2313 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */ | 2169 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */ |
2314 | 2170 |
2315 static void | 2171 static void |
2316 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype) | 2172 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype) |
2317 { | 2173 { |
2318 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[i]; | 2174 stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (node)[i]; |
2319 imm_use_iterator imm_iter; | 2175 imm_use_iterator imm_iter; |
2320 gimple *use_stmt; | 2176 gimple *use_stmt; |
2321 stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt); | 2177 stmt_vec_info use_vinfo; |
2322 slp_tree child; | 2178 slp_tree child; |
2323 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); | 2179 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); |
2324 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
2325 int j; | 2180 int j; |
2326 | 2181 |
2327 /* Propagate hybrid down the SLP tree. */ | 2182 /* Propagate hybrid down the SLP tree. */ |
2328 if (stype == hybrid) | 2183 if (stype == hybrid) |
2329 ; | 2184 ; |
2333 { | 2188 { |
2334 /* Check if a pure SLP stmt has uses in non-SLP stmts. */ | 2189 /* Check if a pure SLP stmt has uses in non-SLP stmts. */ |
2335 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo)); | 2190 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo)); |
2336 /* If we get a pattern stmt here we have to use the LHS of the | 2191 /* If we get a pattern stmt here we have to use the LHS of the |
2337 original stmt for immediate uses. */ | 2192 original stmt for immediate uses. */ |
2338 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo) | 2193 gimple *stmt = vect_orig_stmt (stmt_vinfo)->stmt; |
2339 && STMT_VINFO_RELATED_STMT (stmt_vinfo)) | |
2340 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo); | |
2341 tree def; | 2194 tree def; |
2342 if (gimple_code (stmt) == GIMPLE_PHI) | 2195 if (gimple_code (stmt) == GIMPLE_PHI) |
2343 def = gimple_phi_result (stmt); | 2196 def = gimple_phi_result (stmt); |
2344 else | 2197 else |
2345 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF); | 2198 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF); |
2346 if (def) | 2199 if (def) |
2347 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def) | 2200 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def) |
2348 { | 2201 { |
2349 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))) | 2202 use_vinfo = loop_vinfo->lookup_stmt (use_stmt); |
2203 if (!use_vinfo) | |
2350 continue; | 2204 continue; |
2351 use_vinfo = vinfo_for_stmt (use_stmt); | 2205 use_vinfo = vect_stmt_to_vectorize (use_vinfo); |
2352 if (STMT_VINFO_IN_PATTERN_P (use_vinfo) | |
2353 && STMT_VINFO_RELATED_STMT (use_vinfo)) | |
2354 use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo)); | |
2355 if (!STMT_SLP_TYPE (use_vinfo) | 2206 if (!STMT_SLP_TYPE (use_vinfo) |
2356 && (STMT_VINFO_RELEVANT (use_vinfo) | 2207 && (STMT_VINFO_RELEVANT (use_vinfo) |
2357 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo))) | 2208 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo))) |
2358 && !(gimple_code (use_stmt) == GIMPLE_PHI | 2209 && !(gimple_code (use_stmt) == GIMPLE_PHI |
2359 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def)) | 2210 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def)) |
2360 { | 2211 { |
2361 if (dump_enabled_p ()) | 2212 if (dump_enabled_p ()) |
2362 { | 2213 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP " |
2363 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP " | 2214 "def in non-SLP stmt: %G", use_stmt); |
2364 "def in non-SLP stmt: "); | |
2365 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0); | |
2366 } | |
2367 stype = hybrid; | 2215 stype = hybrid; |
2368 } | 2216 } |
2369 } | 2217 } |
2370 } | 2218 } |
2371 | 2219 |
2372 if (stype == hybrid | 2220 if (stype == hybrid |
2373 && !HYBRID_SLP_STMT (stmt_vinfo)) | 2221 && !HYBRID_SLP_STMT (stmt_vinfo)) |
2374 { | 2222 { |
2375 if (dump_enabled_p ()) | 2223 if (dump_enabled_p ()) |
2376 { | 2224 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G", |
2377 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: "); | 2225 stmt_vinfo->stmt); |
2378 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); | |
2379 } | |
2380 STMT_SLP_TYPE (stmt_vinfo) = hybrid; | 2226 STMT_SLP_TYPE (stmt_vinfo) = hybrid; |
2381 } | 2227 } |
2382 | 2228 |
2383 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child) | 2229 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child) |
2384 if (SLP_TREE_DEF_TYPE (child) != vect_external_def) | 2230 if (SLP_TREE_DEF_TYPE (child) != vect_external_def) |
2389 | 2235 |
2390 static tree | 2236 static tree |
2391 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data) | 2237 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data) |
2392 { | 2238 { |
2393 walk_stmt_info *wi = (walk_stmt_info *)data; | 2239 walk_stmt_info *wi = (walk_stmt_info *)data; |
2394 struct loop *loopp = (struct loop *)wi->info; | 2240 loop_vec_info loop_vinfo = (loop_vec_info) wi->info; |
2395 | 2241 |
2396 if (wi->is_lhs) | 2242 if (wi->is_lhs) |
2397 return NULL_TREE; | 2243 return NULL_TREE; |
2398 | 2244 |
2399 if (TREE_CODE (*tp) == SSA_NAME | 2245 stmt_vec_info def_stmt_info = loop_vinfo->lookup_def (*tp); |
2400 && !SSA_NAME_IS_DEFAULT_DEF (*tp)) | 2246 if (def_stmt_info && PURE_SLP_STMT (def_stmt_info)) |
2401 { | 2247 { |
2402 gimple *def_stmt = SSA_NAME_DEF_STMT (*tp); | 2248 if (dump_enabled_p ()) |
2403 if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt)) | 2249 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G", |
2404 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt))) | 2250 def_stmt_info->stmt); |
2405 { | 2251 STMT_SLP_TYPE (def_stmt_info) = hybrid; |
2406 if (dump_enabled_p ()) | |
2407 { | |
2408 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: "); | |
2409 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0); | |
2410 } | |
2411 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid; | |
2412 } | |
2413 } | 2252 } |
2414 | 2253 |
2415 return NULL_TREE; | 2254 return NULL_TREE; |
2416 } | 2255 } |
2417 | 2256 |
2418 static tree | 2257 static tree |
2419 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled, | 2258 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled, |
2420 walk_stmt_info *) | 2259 walk_stmt_info *wi) |
2421 { | 2260 { |
2422 stmt_vec_info use_vinfo = vinfo_for_stmt (gsi_stmt (*gsi)); | 2261 loop_vec_info loop_vinfo = (loop_vec_info) wi->info; |
2262 stmt_vec_info use_vinfo = loop_vinfo->lookup_stmt (gsi_stmt (*gsi)); | |
2423 /* If the stmt is in a SLP instance then this isn't a reason | 2263 /* If the stmt is in a SLP instance then this isn't a reason |
2424 to mark use definitions in other SLP instances as hybrid. */ | 2264 to mark use definitions in other SLP instances as hybrid. */ |
2425 if (! STMT_SLP_TYPE (use_vinfo) | 2265 if (! STMT_SLP_TYPE (use_vinfo) |
2426 && (STMT_VINFO_RELEVANT (use_vinfo) | 2266 && (STMT_VINFO_RELEVANT (use_vinfo) |
2427 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo))) | 2267 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo))) |
2440 { | 2280 { |
2441 unsigned int i; | 2281 unsigned int i; |
2442 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo); | 2282 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo); |
2443 slp_instance instance; | 2283 slp_instance instance; |
2444 | 2284 |
2445 if (dump_enabled_p ()) | 2285 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp"); |
2446 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ===" | |
2447 "\n"); | |
2448 | 2286 |
2449 /* First walk all pattern stmt in the loop and mark defs of uses as | 2287 /* First walk all pattern stmt in the loop and mark defs of uses as |
2450 hybrid because immediate uses in them are not recorded. */ | 2288 hybrid because immediate uses in them are not recorded. */ |
2451 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i) | 2289 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i) |
2452 { | 2290 { |
2453 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i]; | 2291 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i]; |
2454 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); | 2292 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); |
2455 gsi_next (&gsi)) | 2293 gsi_next (&gsi)) |
2456 { | 2294 { |
2457 gimple *stmt = gsi_stmt (gsi); | 2295 gimple *stmt = gsi_stmt (gsi); |
2458 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | 2296 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt); |
2459 if (STMT_VINFO_IN_PATTERN_P (stmt_info)) | 2297 if (STMT_VINFO_IN_PATTERN_P (stmt_info)) |
2460 { | 2298 { |
2461 walk_stmt_info wi; | 2299 walk_stmt_info wi; |
2462 memset (&wi, 0, sizeof (wi)); | 2300 memset (&wi, 0, sizeof (wi)); |
2463 wi.info = LOOP_VINFO_LOOP (loop_vinfo); | 2301 wi.info = loop_vinfo; |
2464 gimple_stmt_iterator gsi2 | 2302 gimple_stmt_iterator gsi2 |
2465 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)); | 2303 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)->stmt); |
2466 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2, | 2304 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2, |
2467 vect_detect_hybrid_slp_1, &wi); | 2305 vect_detect_hybrid_slp_1, &wi); |
2468 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info), | 2306 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info), |
2469 vect_detect_hybrid_slp_2, | 2307 vect_detect_hybrid_slp_2, |
2470 vect_detect_hybrid_slp_1, &wi); | 2308 vect_detect_hybrid_slp_1, &wi); |
2486 | 2324 |
2487 /* Initialize a bb_vec_info struct for the statements between | 2325 /* Initialize a bb_vec_info struct for the statements between |
2488 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */ | 2326 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */ |
2489 | 2327 |
2490 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in, | 2328 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in, |
2491 gimple_stmt_iterator region_end_in) | 2329 gimple_stmt_iterator region_end_in, |
2492 : vec_info (vec_info::bb, init_cost (NULL)), | 2330 vec_info_shared *shared) |
2331 : vec_info (vec_info::bb, init_cost (NULL), shared), | |
2493 bb (gsi_bb (region_begin_in)), | 2332 bb (gsi_bb (region_begin_in)), |
2494 region_begin (region_begin_in), | 2333 region_begin (region_begin_in), |
2495 region_end (region_end_in) | 2334 region_end (region_end_in) |
2496 { | 2335 { |
2497 gimple_stmt_iterator gsi; | 2336 gimple_stmt_iterator gsi; |
2499 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end); | 2338 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end); |
2500 gsi_next (&gsi)) | 2339 gsi_next (&gsi)) |
2501 { | 2340 { |
2502 gimple *stmt = gsi_stmt (gsi); | 2341 gimple *stmt = gsi_stmt (gsi); |
2503 gimple_set_uid (stmt, 0); | 2342 gimple_set_uid (stmt, 0); |
2504 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, this)); | 2343 add_stmt (stmt); |
2505 } | 2344 } |
2506 | 2345 |
2507 bb->aux = this; | 2346 bb->aux = this; |
2508 } | 2347 } |
2509 | 2348 |
2513 | 2352 |
2514 _bb_vec_info::~_bb_vec_info () | 2353 _bb_vec_info::~_bb_vec_info () |
2515 { | 2354 { |
2516 for (gimple_stmt_iterator si = region_begin; | 2355 for (gimple_stmt_iterator si = region_begin; |
2517 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si)) | 2356 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si)) |
2518 { | 2357 /* Reset region marker. */ |
2519 gimple *stmt = gsi_stmt (si); | 2358 gimple_set_uid (gsi_stmt (si), -1); |
2520 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
2521 | |
2522 if (stmt_info) | |
2523 /* Free stmt_vec_info. */ | |
2524 free_stmt_vec_info (stmt); | |
2525 | |
2526 /* Reset region marker. */ | |
2527 gimple_set_uid (stmt, -1); | |
2528 } | |
2529 | 2359 |
2530 bb->aux = NULL; | 2360 bb->aux = NULL; |
2531 } | 2361 } |
2532 | 2362 |
2533 | 2363 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE, |
2534 /* Analyze statements contained in SLP tree NODE after recursively analyzing | 2364 given then that child nodes have already been processed, and that |
2535 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE. | 2365 their def types currently match their SLP node's def type. */ |
2536 | |
2537 Return true if the operations are supported. */ | |
2538 | 2366 |
2539 static bool | 2367 static bool |
2540 vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node, | 2368 vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node, |
2541 slp_instance node_instance) | 2369 slp_instance node_instance, |
2542 { | 2370 stmt_vector_for_cost *cost_vec) |
2543 bool dummy; | 2371 { |
2544 int i, j; | 2372 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0]; |
2545 gimple *stmt; | |
2546 slp_tree child; | |
2547 | |
2548 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def) | |
2549 return true; | |
2550 | |
2551 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) | |
2552 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance)) | |
2553 return false; | |
2554 | |
2555 stmt = SLP_TREE_SCALAR_STMTS (node)[0]; | |
2556 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
2557 gcc_assert (stmt_info); | |
2558 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect); | 2373 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect); |
2559 | 2374 |
2560 /* For BB vectorization vector types are assigned here. | 2375 /* For BB vectorization vector types are assigned here. |
2561 Memory accesses already got their vector type assigned | 2376 Memory accesses already got their vector type assigned |
2562 in vect_analyze_data_refs. */ | 2377 in vect_analyze_data_refs. */ |
2563 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); | 2378 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); |
2564 if (bb_vinfo | 2379 if (bb_vinfo |
2565 && ! STMT_VINFO_DATA_REF (stmt_info)) | 2380 && ! STMT_VINFO_DATA_REF (stmt_info)) |
2566 { | 2381 { |
2567 gcc_assert (PURE_SLP_STMT (stmt_info)); | 2382 tree vectype, nunits_vectype; |
2568 | 2383 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype, |
2569 tree scalar_type = TREE_TYPE (gimple_get_lhs (stmt)); | 2384 &nunits_vectype)) |
2570 if (dump_enabled_p ()) | 2385 /* We checked this when building the node. */ |
2571 { | 2386 gcc_unreachable (); |
2572 dump_printf_loc (MSG_NOTE, vect_location, | 2387 if (vectype == boolean_type_node) |
2573 "get vectype for scalar type: "); | 2388 { |
2574 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type); | 2389 vectype = vect_get_mask_type_for_stmt (stmt_info); |
2575 dump_printf (MSG_NOTE, "\n"); | 2390 if (!vectype) |
2576 } | 2391 /* vect_get_mask_type_for_stmt has already explained the |
2577 | 2392 failure. */ |
2578 tree vectype = get_vectype_for_scalar_type (scalar_type); | 2393 return false; |
2579 if (!vectype) | 2394 } |
2580 { | 2395 |
2581 if (dump_enabled_p ()) | 2396 stmt_vec_info sstmt_info; |
2582 { | 2397 unsigned int i; |
2583 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 2398 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt_info) |
2584 "not SLPed: unsupported data-type "); | 2399 STMT_VINFO_VECTYPE (sstmt_info) = vectype; |
2585 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, | |
2586 scalar_type); | |
2587 dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); | |
2588 } | |
2589 return false; | |
2590 } | |
2591 | |
2592 if (dump_enabled_p ()) | |
2593 { | |
2594 dump_printf_loc (MSG_NOTE, vect_location, "vectype: "); | |
2595 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype); | |
2596 dump_printf (MSG_NOTE, "\n"); | |
2597 } | |
2598 | |
2599 gimple *sstmt; | |
2600 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt) | |
2601 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt)) = vectype; | |
2602 } | 2400 } |
2603 | 2401 |
2604 /* Calculate the number of vector statements to be created for the | 2402 /* Calculate the number of vector statements to be created for the |
2605 scalar stmts in this node. For SLP reductions it is equal to the | 2403 scalar stmts in this node. For SLP reductions it is equal to the |
2606 number of vector statements in the children (which has already been | 2404 number of vector statements in the children (which has already been |
2607 calculated by the recursive call). Otherwise it is the number of | 2405 calculated by the recursive call). Otherwise it is the number of |
2608 scalar elements in one scalar iteration (GROUP_SIZE) multiplied by | 2406 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by |
2609 VF divided by the number of elements in a vector. */ | 2407 VF divided by the number of elements in a vector. */ |
2610 if (GROUP_FIRST_ELEMENT (stmt_info) | 2408 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info) |
2611 && !STMT_VINFO_GROUPED_ACCESS (stmt_info)) | 2409 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)) |
2612 SLP_TREE_NUMBER_OF_VEC_STMTS (node) | 2410 SLP_TREE_NUMBER_OF_VEC_STMTS (node) |
2613 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]); | 2411 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]); |
2614 else | 2412 else |
2615 { | 2413 { |
2616 int vf; | 2414 poly_uint64 vf; |
2617 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo)) | 2415 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo)) |
2618 vf = loop_vinfo->vectorization_factor; | 2416 vf = loop_vinfo->vectorization_factor; |
2619 else | 2417 else |
2620 vf = 1; | 2418 vf = 1; |
2621 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance); | 2419 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance); |
2622 tree vectype = STMT_VINFO_VECTYPE (stmt_info); | 2420 tree vectype = STMT_VINFO_VECTYPE (stmt_info); |
2623 SLP_TREE_NUMBER_OF_VEC_STMTS (node) | 2421 SLP_TREE_NUMBER_OF_VEC_STMTS (node) |
2624 = vf * group_size / TYPE_VECTOR_SUBPARTS (vectype); | 2422 = vect_get_num_vectors (vf * group_size, vectype); |
2625 } | 2423 } |
2424 | |
2425 bool dummy; | |
2426 return vect_analyze_stmt (stmt_info, &dummy, node, node_instance, cost_vec); | |
2427 } | |
2428 | |
2429 /* Analyze statements contained in SLP tree NODE after recursively analyzing | |
2430 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE. | |
2431 | |
2432 Return true if the operations are supported. */ | |
2433 | |
2434 static bool | |
2435 vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node, | |
2436 slp_instance node_instance, | |
2437 scalar_stmts_to_slp_tree_map_t *visited, | |
2438 scalar_stmts_to_slp_tree_map_t *lvisited, | |
2439 stmt_vector_for_cost *cost_vec) | |
2440 { | |
2441 int i, j; | |
2442 slp_tree child; | |
2443 | |
2444 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def) | |
2445 return true; | |
2446 | |
2447 /* If we already analyzed the exact same set of scalar stmts we're done. | |
2448 We share the generated vector stmts for those. */ | |
2449 slp_tree *leader; | |
2450 if ((leader = visited->get (SLP_TREE_SCALAR_STMTS (node))) | |
2451 || (leader = lvisited->get (SLP_TREE_SCALAR_STMTS (node)))) | |
2452 { | |
2453 SLP_TREE_NUMBER_OF_VEC_STMTS (node) | |
2454 = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader); | |
2455 return true; | |
2456 } | |
2457 | |
2458 /* The SLP graph is acyclic so not caching whether we failed or succeeded | |
2459 doesn't result in any issue since we throw away the lvisited set | |
2460 when we fail. */ | |
2461 lvisited->put (SLP_TREE_SCALAR_STMTS (node).copy (), node); | |
2462 | |
2463 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) | |
2464 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance, | |
2465 visited, lvisited, cost_vec)) | |
2466 return false; | |
2626 | 2467 |
2627 /* Push SLP node def-type to stmt operands. */ | 2468 /* Push SLP node def-type to stmt operands. */ |
2628 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child) | 2469 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child) |
2629 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def) | 2470 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def) |
2630 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])) | 2471 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) |
2631 = SLP_TREE_DEF_TYPE (child); | 2472 = SLP_TREE_DEF_TYPE (child); |
2632 bool res = vect_analyze_stmt (stmt, &dummy, node, node_instance); | 2473 bool res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance, |
2474 cost_vec); | |
2633 /* Restore def-types. */ | 2475 /* Restore def-types. */ |
2634 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child) | 2476 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child) |
2635 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def) | 2477 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def) |
2636 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])) | 2478 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0]) |
2637 = vect_internal_def; | 2479 = vect_internal_def; |
2638 if (! res) | 2480 if (! res) |
2639 return false; | 2481 return false; |
2640 | 2482 |
2641 return true; | 2483 return true; |
2649 vect_slp_analyze_operations (vec_info *vinfo) | 2491 vect_slp_analyze_operations (vec_info *vinfo) |
2650 { | 2492 { |
2651 slp_instance instance; | 2493 slp_instance instance; |
2652 int i; | 2494 int i; |
2653 | 2495 |
2654 if (dump_enabled_p ()) | 2496 DUMP_VECT_SCOPE ("vect_slp_analyze_operations"); |
2655 dump_printf_loc (MSG_NOTE, vect_location, | 2497 |
2656 "=== vect_slp_analyze_operations ===\n"); | 2498 scalar_stmts_to_slp_tree_map_t *visited |
2657 | 2499 = new scalar_stmts_to_slp_tree_map_t (); |
2658 for (i = 0; vinfo->slp_instances.iterate (i, &instance); ) | 2500 for (i = 0; vinfo->slp_instances.iterate (i, &instance); ) |
2659 { | 2501 { |
2502 scalar_stmts_to_slp_tree_map_t lvisited; | |
2503 stmt_vector_for_cost cost_vec; | |
2504 cost_vec.create (2); | |
2660 if (!vect_slp_analyze_node_operations (vinfo, | 2505 if (!vect_slp_analyze_node_operations (vinfo, |
2661 SLP_INSTANCE_TREE (instance), | 2506 SLP_INSTANCE_TREE (instance), |
2662 instance)) | 2507 instance, visited, &lvisited, |
2508 &cost_vec)) | |
2663 { | 2509 { |
2510 slp_tree node = SLP_INSTANCE_TREE (instance); | |
2511 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0]; | |
2664 dump_printf_loc (MSG_NOTE, vect_location, | 2512 dump_printf_loc (MSG_NOTE, vect_location, |
2665 "removing SLP instance operations starting from: "); | 2513 "removing SLP instance operations starting from: %G", |
2666 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, | 2514 stmt_info->stmt); |
2667 SLP_TREE_SCALAR_STMTS | 2515 vect_free_slp_instance (instance, false); |
2668 (SLP_INSTANCE_TREE (instance))[0], 0); | |
2669 vect_free_slp_instance (instance); | |
2670 vinfo->slp_instances.ordered_remove (i); | 2516 vinfo->slp_instances.ordered_remove (i); |
2517 cost_vec.release (); | |
2671 } | 2518 } |
2672 else | 2519 else |
2673 { | 2520 { |
2674 /* Compute the costs of the SLP instance. */ | 2521 for (scalar_stmts_to_slp_tree_map_t::iterator x = lvisited.begin(); |
2675 vect_analyze_slp_cost (instance, vinfo->target_cost_data); | 2522 x != lvisited.end(); ++x) |
2523 visited->put ((*x).first.copy (), (*x).second); | |
2676 i++; | 2524 i++; |
2677 } | 2525 |
2678 } | 2526 add_stmt_costs (vinfo->target_cost_data, &cost_vec); |
2527 cost_vec.release (); | |
2528 } | |
2529 } | |
2530 delete visited; | |
2679 | 2531 |
2680 return !vinfo->slp_instances.is_empty (); | 2532 return !vinfo->slp_instances.is_empty (); |
2681 } | 2533 } |
2682 | 2534 |
2683 | 2535 |
2684 /* Compute the scalar cost of the SLP node NODE and its children | 2536 /* Compute the scalar cost of the SLP node NODE and its children |
2685 and return it. Do not account defs that are marked in LIFE and | 2537 and return it. Do not account defs that are marked in LIFE and |
2686 update LIFE according to uses of NODE. */ | 2538 update LIFE according to uses of NODE. */ |
2687 | 2539 |
2688 static unsigned | 2540 static void |
2689 vect_bb_slp_scalar_cost (basic_block bb, | 2541 vect_bb_slp_scalar_cost (basic_block bb, |
2690 slp_tree node, vec<bool, va_heap> *life) | 2542 slp_tree node, vec<bool, va_heap> *life, |
2691 { | 2543 stmt_vector_for_cost *cost_vec) |
2692 unsigned scalar_cost = 0; | 2544 { |
2693 unsigned i; | 2545 unsigned i; |
2694 gimple *stmt; | 2546 stmt_vec_info stmt_info; |
2695 slp_tree child; | 2547 slp_tree child; |
2696 | 2548 |
2697 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt) | 2549 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info) |
2698 { | 2550 { |
2699 unsigned stmt_cost; | 2551 gimple *stmt = stmt_info->stmt; |
2552 vec_info *vinfo = stmt_info->vinfo; | |
2700 ssa_op_iter op_iter; | 2553 ssa_op_iter op_iter; |
2701 def_operand_p def_p; | 2554 def_operand_p def_p; |
2702 stmt_vec_info stmt_info; | |
2703 | 2555 |
2704 if ((*life)[i]) | 2556 if ((*life)[i]) |
2705 continue; | 2557 continue; |
2706 | 2558 |
2707 /* If there is a non-vectorized use of the defs then the scalar | 2559 /* If there is a non-vectorized use of the defs then the scalar |
2712 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF) | 2564 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF) |
2713 { | 2565 { |
2714 imm_use_iterator use_iter; | 2566 imm_use_iterator use_iter; |
2715 gimple *use_stmt; | 2567 gimple *use_stmt; |
2716 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p)) | 2568 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p)) |
2717 if (!is_gimple_debug (use_stmt) | 2569 if (!is_gimple_debug (use_stmt)) |
2718 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo, | |
2719 use_stmt) | |
2720 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt)))) | |
2721 { | 2570 { |
2722 (*life)[i] = true; | 2571 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt); |
2723 BREAK_FROM_IMM_USE_STMT (use_iter); | 2572 if (!use_stmt_info || !PURE_SLP_STMT (use_stmt_info)) |
2573 { | |
2574 (*life)[i] = true; | |
2575 BREAK_FROM_IMM_USE_STMT (use_iter); | |
2576 } | |
2724 } | 2577 } |
2725 } | 2578 } |
2726 if ((*life)[i]) | 2579 if ((*life)[i]) |
2727 continue; | 2580 continue; |
2728 | 2581 |
2729 /* Count scalar stmts only once. */ | 2582 /* Count scalar stmts only once. */ |
2730 if (gimple_visited_p (stmt)) | 2583 if (gimple_visited_p (stmt)) |
2731 continue; | 2584 continue; |
2732 gimple_set_visited (stmt, true); | 2585 gimple_set_visited (stmt, true); |
2733 | 2586 |
2734 stmt_info = vinfo_for_stmt (stmt); | 2587 vect_cost_for_stmt kind; |
2735 if (STMT_VINFO_DATA_REF (stmt_info)) | 2588 if (STMT_VINFO_DATA_REF (stmt_info)) |
2736 { | 2589 { |
2737 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info))) | 2590 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info))) |
2738 stmt_cost = vect_get_stmt_cost (scalar_load); | 2591 kind = scalar_load; |
2739 else | 2592 else |
2740 stmt_cost = vect_get_stmt_cost (scalar_store); | 2593 kind = scalar_store; |
2741 } | 2594 } |
2742 else | 2595 else |
2743 stmt_cost = vect_get_stmt_cost (scalar_stmt); | 2596 kind = scalar_stmt; |
2744 | 2597 record_stmt_cost (cost_vec, 1, kind, stmt_info, 0, vect_body); |
2745 scalar_cost += stmt_cost; | |
2746 } | 2598 } |
2747 | 2599 |
2748 auto_vec<bool, 20> subtree_life; | 2600 auto_vec<bool, 20> subtree_life; |
2749 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) | 2601 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |
2750 { | 2602 { |
2751 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def) | 2603 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def) |
2752 { | 2604 { |
2753 /* Do not directly pass LIFE to the recursive call, copy it to | 2605 /* Do not directly pass LIFE to the recursive call, copy it to |
2754 confine changes in the callee to the current child/subtree. */ | 2606 confine changes in the callee to the current child/subtree. */ |
2755 subtree_life.safe_splice (*life); | 2607 subtree_life.safe_splice (*life); |
2756 scalar_cost += vect_bb_slp_scalar_cost (bb, child, &subtree_life); | 2608 vect_bb_slp_scalar_cost (bb, child, &subtree_life, cost_vec); |
2757 subtree_life.truncate (0); | 2609 subtree_life.truncate (0); |
2758 } | 2610 } |
2759 } | 2611 } |
2760 | |
2761 return scalar_cost; | |
2762 } | 2612 } |
2763 | 2613 |
2764 /* Check if vectorization of the basic block is profitable. */ | 2614 /* Check if vectorization of the basic block is profitable. */ |
2765 | 2615 |
2766 static bool | 2616 static bool |
2771 int i; | 2621 int i; |
2772 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0; | 2622 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0; |
2773 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0; | 2623 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0; |
2774 | 2624 |
2775 /* Calculate scalar cost. */ | 2625 /* Calculate scalar cost. */ |
2626 stmt_vector_for_cost scalar_costs; | |
2627 scalar_costs.create (0); | |
2776 FOR_EACH_VEC_ELT (slp_instances, i, instance) | 2628 FOR_EACH_VEC_ELT (slp_instances, i, instance) |
2777 { | 2629 { |
2778 auto_vec<bool, 20> life; | 2630 auto_vec<bool, 20> life; |
2779 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance)); | 2631 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance)); |
2780 scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo), | 2632 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo), |
2781 SLP_INSTANCE_TREE (instance), | 2633 SLP_INSTANCE_TREE (instance), |
2782 &life); | 2634 &life, &scalar_costs); |
2783 } | 2635 } |
2636 void *target_cost_data = init_cost (NULL); | |
2637 add_stmt_costs (target_cost_data, &scalar_costs); | |
2638 scalar_costs.release (); | |
2639 unsigned dummy; | |
2640 finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy); | |
2641 destroy_cost_data (target_cost_data); | |
2784 | 2642 |
2785 /* Unset visited flag. */ | 2643 /* Unset visited flag. */ |
2786 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin; | 2644 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin; |
2787 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi)) | 2645 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi)) |
2788 gimple_set_visited (gsi_stmt (gsi), false); | 2646 gimple_set_visited (gsi_stmt (gsi), false); |
2820 | 2678 |
2821 static bb_vec_info | 2679 static bb_vec_info |
2822 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin, | 2680 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin, |
2823 gimple_stmt_iterator region_end, | 2681 gimple_stmt_iterator region_end, |
2824 vec<data_reference_p> datarefs, int n_stmts, | 2682 vec<data_reference_p> datarefs, int n_stmts, |
2825 bool &fatal) | 2683 bool &fatal, vec_info_shared *shared) |
2826 { | 2684 { |
2827 bb_vec_info bb_vinfo; | 2685 bb_vec_info bb_vinfo; |
2828 slp_instance instance; | 2686 slp_instance instance; |
2829 int i; | 2687 int i; |
2830 int min_vf = 2; | 2688 poly_uint64 min_vf = 2; |
2831 | 2689 |
2832 /* The first group of checks is independent of the vector size. */ | 2690 /* The first group of checks is independent of the vector size. */ |
2833 fatal = true; | 2691 fatal = true; |
2834 | 2692 |
2835 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB)) | 2693 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB)) |
2840 "basic block.\n"); | 2698 "basic block.\n"); |
2841 free_data_refs (datarefs); | 2699 free_data_refs (datarefs); |
2842 return NULL; | 2700 return NULL; |
2843 } | 2701 } |
2844 | 2702 |
2845 bb_vinfo = new _bb_vec_info (region_begin, region_end); | 2703 bb_vinfo = new _bb_vec_info (region_begin, region_end, shared); |
2846 if (!bb_vinfo) | 2704 if (!bb_vinfo) |
2847 return NULL; | 2705 return NULL; |
2848 | 2706 |
2849 BB_VINFO_DATAREFS (bb_vinfo) = datarefs; | 2707 BB_VINFO_DATAREFS (bb_vinfo) = datarefs; |
2708 bb_vinfo->shared->save_datarefs (); | |
2850 | 2709 |
2851 /* Analyze the data references. */ | 2710 /* Analyze the data references. */ |
2852 | 2711 |
2853 if (!vect_analyze_data_refs (bb_vinfo, &min_vf)) | 2712 if (!vect_analyze_data_refs (bb_vinfo, &min_vf)) |
2854 { | 2713 { |
2926 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); ) | 2785 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); ) |
2927 { | 2786 { |
2928 if (! vect_slp_analyze_and_verify_instance_alignment (instance) | 2787 if (! vect_slp_analyze_and_verify_instance_alignment (instance) |
2929 || ! vect_slp_analyze_instance_dependence (instance)) | 2788 || ! vect_slp_analyze_instance_dependence (instance)) |
2930 { | 2789 { |
2790 slp_tree node = SLP_INSTANCE_TREE (instance); | |
2791 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0]; | |
2931 dump_printf_loc (MSG_NOTE, vect_location, | 2792 dump_printf_loc (MSG_NOTE, vect_location, |
2932 "removing SLP instance operations starting from: "); | 2793 "removing SLP instance operations starting from: %G", |
2933 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, | 2794 stmt_info->stmt); |
2934 SLP_TREE_SCALAR_STMTS | 2795 vect_free_slp_instance (instance, false); |
2935 (SLP_INSTANCE_TREE (instance))[0], 0); | |
2936 vect_free_slp_instance (instance); | |
2937 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i); | 2796 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i); |
2938 continue; | 2797 continue; |
2939 } | 2798 } |
2940 | 2799 |
2941 /* Mark all the statements that we want to vectorize as pure SLP and | 2800 /* Mark all the statements that we want to vectorize as pure SLP and |
2988 bool | 2847 bool |
2989 vect_slp_bb (basic_block bb) | 2848 vect_slp_bb (basic_block bb) |
2990 { | 2849 { |
2991 bb_vec_info bb_vinfo; | 2850 bb_vec_info bb_vinfo; |
2992 gimple_stmt_iterator gsi; | 2851 gimple_stmt_iterator gsi; |
2993 unsigned int vector_sizes; | |
2994 bool any_vectorized = false; | 2852 bool any_vectorized = false; |
2995 | 2853 auto_vector_sizes vector_sizes; |
2996 if (dump_enabled_p ()) | 2854 |
2997 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n"); | 2855 DUMP_VECT_SCOPE ("vect_slp_analyze_bb"); |
2998 | 2856 |
2999 /* Autodetect first vector size we try. */ | 2857 /* Autodetect first vector size we try. */ |
3000 current_vector_size = 0; | 2858 current_vector_size = 0; |
3001 vector_sizes = targetm.vectorize.autovectorize_vector_sizes (); | 2859 targetm.vectorize.autovectorize_vector_sizes (&vector_sizes); |
2860 unsigned int next_size = 0; | |
3002 | 2861 |
3003 gsi = gsi_start_bb (bb); | 2862 gsi = gsi_start_bb (bb); |
3004 | 2863 |
2864 poly_uint64 autodetected_vector_size = 0; | |
3005 while (1) | 2865 while (1) |
3006 { | 2866 { |
3007 if (gsi_end_p (gsi)) | 2867 if (gsi_end_p (gsi)) |
3008 break; | 2868 break; |
3009 | 2869 |
3017 if (is_gimple_debug (stmt)) | 2877 if (is_gimple_debug (stmt)) |
3018 continue; | 2878 continue; |
3019 insns++; | 2879 insns++; |
3020 | 2880 |
3021 if (gimple_location (stmt) != UNKNOWN_LOCATION) | 2881 if (gimple_location (stmt) != UNKNOWN_LOCATION) |
3022 vect_location = gimple_location (stmt); | 2882 vect_location = stmt; |
3023 | 2883 |
3024 if (!find_data_references_in_stmt (NULL, stmt, &datarefs)) | 2884 if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs)) |
3025 break; | 2885 break; |
3026 } | 2886 } |
3027 | 2887 |
3028 /* Skip leading unhandled stmts. */ | 2888 /* Skip leading unhandled stmts. */ |
3029 if (gsi_stmt (region_begin) == gsi_stmt (gsi)) | 2889 if (gsi_stmt (region_begin) == gsi_stmt (gsi)) |
3034 | 2894 |
3035 gimple_stmt_iterator region_end = gsi; | 2895 gimple_stmt_iterator region_end = gsi; |
3036 | 2896 |
3037 bool vectorized = false; | 2897 bool vectorized = false; |
3038 bool fatal = false; | 2898 bool fatal = false; |
2899 vec_info_shared shared; | |
3039 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end, | 2900 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end, |
3040 datarefs, insns, fatal); | 2901 datarefs, insns, fatal, &shared); |
3041 if (bb_vinfo | 2902 if (bb_vinfo |
3042 && dbg_cnt (vect_slp)) | 2903 && dbg_cnt (vect_slp)) |
3043 { | 2904 { |
3044 if (dump_enabled_p ()) | 2905 if (dump_enabled_p ()) |
3045 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n"); | 2906 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n"); |
3046 | 2907 |
2908 bb_vinfo->shared->check_datarefs (); | |
3047 vect_schedule_slp (bb_vinfo); | 2909 vect_schedule_slp (bb_vinfo); |
3048 | 2910 |
3049 if (dump_enabled_p ()) | 2911 unsigned HOST_WIDE_INT bytes; |
3050 dump_printf_loc (MSG_NOTE, vect_location, | 2912 if (current_vector_size.is_constant (&bytes)) |
3051 "basic block part vectorized\n"); | 2913 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, |
2914 "basic block part vectorized using %wu byte " | |
2915 "vectors\n", bytes); | |
2916 else | |
2917 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, | |
2918 "basic block part vectorized using variable " | |
2919 "length vectors\n"); | |
3052 | 2920 |
3053 vectorized = true; | 2921 vectorized = true; |
3054 } | 2922 } |
3055 delete bb_vinfo; | 2923 delete bb_vinfo; |
3056 | 2924 |
3057 any_vectorized |= vectorized; | 2925 any_vectorized |= vectorized; |
3058 | 2926 |
3059 vector_sizes &= ~current_vector_size; | 2927 if (next_size == 0) |
2928 autodetected_vector_size = current_vector_size; | |
2929 | |
2930 if (next_size < vector_sizes.length () | |
2931 && known_eq (vector_sizes[next_size], autodetected_vector_size)) | |
2932 next_size += 1; | |
2933 | |
3060 if (vectorized | 2934 if (vectorized |
3061 || vector_sizes == 0 | 2935 || next_size == vector_sizes.length () |
3062 || current_vector_size == 0 | 2936 || known_eq (current_vector_size, 0U) |
3063 /* If vect_slp_analyze_bb_1 signaled that analysis for all | 2937 /* If vect_slp_analyze_bb_1 signaled that analysis for all |
3064 vector sizes will fail do not bother iterating. */ | 2938 vector sizes will fail do not bother iterating. */ |
3065 || fatal) | 2939 || fatal) |
3066 { | 2940 { |
3067 if (gsi_end_p (region_end)) | 2941 if (gsi_end_p (region_end)) |
3070 /* Skip the unhandled stmt. */ | 2944 /* Skip the unhandled stmt. */ |
3071 gsi_next (&gsi); | 2945 gsi_next (&gsi); |
3072 | 2946 |
3073 /* And reset vector sizes. */ | 2947 /* And reset vector sizes. */ |
3074 current_vector_size = 0; | 2948 current_vector_size = 0; |
3075 vector_sizes = targetm.vectorize.autovectorize_vector_sizes (); | 2949 next_size = 0; |
3076 } | 2950 } |
3077 else | 2951 else |
3078 { | 2952 { |
3079 /* Try the next biggest vector size. */ | 2953 /* Try the next biggest vector size. */ |
3080 current_vector_size = 1 << floor_log2 (vector_sizes); | 2954 current_vector_size = vector_sizes[next_size++]; |
3081 if (dump_enabled_p ()) | 2955 if (dump_enabled_p ()) |
3082 dump_printf_loc (MSG_NOTE, vect_location, | 2956 { |
3083 "***** Re-trying analysis with " | 2957 dump_printf_loc (MSG_NOTE, vect_location, |
3084 "vector size %d\n", current_vector_size); | 2958 "***** Re-trying analysis with " |
2959 "vector size "); | |
2960 dump_dec (MSG_NOTE, current_vector_size); | |
2961 dump_printf (MSG_NOTE, "\n"); | |
2962 } | |
3085 | 2963 |
3086 /* Start over. */ | 2964 /* Start over. */ |
3087 gsi = region_begin; | 2965 gsi = region_begin; |
3088 } | 2966 } |
3089 } | 2967 } |
3091 return any_vectorized; | 2969 return any_vectorized; |
3092 } | 2970 } |
3093 | 2971 |
3094 | 2972 |
3095 /* Return 1 if vector type of boolean constant which is OPNUM | 2973 /* Return 1 if vector type of boolean constant which is OPNUM |
3096 operand in statement STMT is a boolean vector. */ | 2974 operand in statement STMT_VINFO is a boolean vector. */ |
3097 | 2975 |
3098 static bool | 2976 static bool |
3099 vect_mask_constant_operand_p (gimple *stmt, int opnum) | 2977 vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo, int opnum) |
3100 { | 2978 { |
3101 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); | 2979 enum tree_code code = gimple_expr_code (stmt_vinfo->stmt); |
3102 enum tree_code code = gimple_expr_code (stmt); | |
3103 tree op, vectype; | 2980 tree op, vectype; |
3104 gimple *def_stmt; | |
3105 enum vect_def_type dt; | 2981 enum vect_def_type dt; |
3106 | 2982 |
3107 /* For comparison and COND_EXPR type is chosen depending | 2983 /* For comparison and COND_EXPR type is chosen depending |
3108 on the other comparison operand. */ | 2984 on the other comparison operand. */ |
3109 if (TREE_CODE_CLASS (code) == tcc_comparison) | 2985 if (TREE_CODE_CLASS (code) == tcc_comparison) |
3110 { | 2986 { |
2987 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt); | |
3111 if (opnum) | 2988 if (opnum) |
3112 op = gimple_assign_rhs1 (stmt); | 2989 op = gimple_assign_rhs1 (stmt); |
3113 else | 2990 else |
3114 op = gimple_assign_rhs2 (stmt); | 2991 op = gimple_assign_rhs2 (stmt); |
3115 | 2992 |
3116 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt, | 2993 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype)) |
3117 &dt, &vectype)) | |
3118 gcc_unreachable (); | 2994 gcc_unreachable (); |
3119 | 2995 |
3120 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype); | 2996 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype); |
3121 } | 2997 } |
3122 | 2998 |
3123 if (code == COND_EXPR) | 2999 if (code == COND_EXPR) |
3124 { | 3000 { |
3001 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt); | |
3125 tree cond = gimple_assign_rhs1 (stmt); | 3002 tree cond = gimple_assign_rhs1 (stmt); |
3126 | 3003 |
3127 if (TREE_CODE (cond) == SSA_NAME) | 3004 if (TREE_CODE (cond) == SSA_NAME) |
3128 op = cond; | 3005 op = cond; |
3129 else if (opnum) | 3006 else if (opnum) |
3130 op = TREE_OPERAND (cond, 1); | 3007 op = TREE_OPERAND (cond, 1); |
3131 else | 3008 else |
3132 op = TREE_OPERAND (cond, 0); | 3009 op = TREE_OPERAND (cond, 0); |
3133 | 3010 |
3134 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt, | 3011 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype)) |
3135 &dt, &vectype)) | |
3136 gcc_unreachable (); | 3012 gcc_unreachable (); |
3137 | 3013 |
3138 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype); | 3014 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype); |
3139 } | 3015 } |
3140 | 3016 |
3141 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo)); | 3017 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo)); |
3018 } | |
3019 | |
3020 /* Build a variable-length vector in which the elements in ELTS are repeated | |
3021 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in | |
3022 RESULTS and add any new instructions to SEQ. | |
3023 | |
3024 The approach we use is: | |
3025 | |
3026 (1) Find a vector mode VM with integer elements of mode IM. | |
3027 | |
3028 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of | |
3029 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs | |
3030 from small vectors to IM. | |
3031 | |
3032 (3) Duplicate each ELTS'[I] into a vector of mode VM. | |
3033 | |
3034 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the | |
3035 correct byte contents. | |
3036 | |
3037 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type. | |
3038 | |
3039 We try to find the largest IM for which this sequence works, in order | |
3040 to cut down on the number of interleaves. */ | |
3041 | |
3042 void | |
3043 duplicate_and_interleave (gimple_seq *seq, tree vector_type, vec<tree> elts, | |
3044 unsigned int nresults, vec<tree> &results) | |
3045 { | |
3046 unsigned int nelts = elts.length (); | |
3047 tree element_type = TREE_TYPE (vector_type); | |
3048 | |
3049 /* (1) Find a vector mode VM with integer elements of mode IM. */ | |
3050 unsigned int nvectors = 1; | |
3051 tree new_vector_type; | |
3052 tree permutes[2]; | |
3053 if (!can_duplicate_and_interleave_p (nelts, TYPE_MODE (element_type), | |
3054 &nvectors, &new_vector_type, | |
3055 permutes)) | |
3056 gcc_unreachable (); | |
3057 | |
3058 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */ | |
3059 unsigned int partial_nelts = nelts / nvectors; | |
3060 tree partial_vector_type = build_vector_type (element_type, partial_nelts); | |
3061 | |
3062 tree_vector_builder partial_elts; | |
3063 auto_vec<tree, 32> pieces (nvectors * 2); | |
3064 pieces.quick_grow (nvectors * 2); | |
3065 for (unsigned int i = 0; i < nvectors; ++i) | |
3066 { | |
3067 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of | |
3068 ELTS' has mode IM. */ | |
3069 partial_elts.new_vector (partial_vector_type, partial_nelts, 1); | |
3070 for (unsigned int j = 0; j < partial_nelts; ++j) | |
3071 partial_elts.quick_push (elts[i * partial_nelts + j]); | |
3072 tree t = gimple_build_vector (seq, &partial_elts); | |
3073 t = gimple_build (seq, VIEW_CONVERT_EXPR, | |
3074 TREE_TYPE (new_vector_type), t); | |
3075 | |
3076 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */ | |
3077 pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t); | |
3078 } | |
3079 | |
3080 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the | |
3081 correct byte contents. | |
3082 | |
3083 We need to repeat the following operation log2(nvectors) times: | |
3084 | |
3085 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute); | |
3086 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute); | |
3087 | |
3088 However, if each input repeats every N elements and the VF is | |
3089 a multiple of N * 2, the HI result is the same as the LO. */ | |
3090 unsigned int in_start = 0; | |
3091 unsigned int out_start = nvectors; | |
3092 unsigned int hi_start = nvectors / 2; | |
3093 /* A bound on the number of outputs needed to produce NRESULTS results | |
3094 in the final iteration. */ | |
3095 unsigned int noutputs_bound = nvectors * nresults; | |
3096 for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2) | |
3097 { | |
3098 noutputs_bound /= 2; | |
3099 unsigned int limit = MIN (noutputs_bound, nvectors); | |
3100 for (unsigned int i = 0; i < limit; ++i) | |
3101 { | |
3102 if ((i & 1) != 0 | |
3103 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type), | |
3104 2 * in_repeat)) | |
3105 { | |
3106 pieces[out_start + i] = pieces[out_start + i - 1]; | |
3107 continue; | |
3108 } | |
3109 | |
3110 tree output = make_ssa_name (new_vector_type); | |
3111 tree input1 = pieces[in_start + (i / 2)]; | |
3112 tree input2 = pieces[in_start + (i / 2) + hi_start]; | |
3113 gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR, | |
3114 input1, input2, | |
3115 permutes[i & 1]); | |
3116 gimple_seq_add_stmt (seq, stmt); | |
3117 pieces[out_start + i] = output; | |
3118 } | |
3119 std::swap (in_start, out_start); | |
3120 } | |
3121 | |
3122 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */ | |
3123 results.reserve (nresults); | |
3124 for (unsigned int i = 0; i < nresults; ++i) | |
3125 if (i < nvectors) | |
3126 results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type, | |
3127 pieces[in_start + i])); | |
3128 else | |
3129 results.quick_push (results[i - nvectors]); | |
3142 } | 3130 } |
3143 | 3131 |
3144 | 3132 |
3145 /* For constant and loop invariant defs of SLP_NODE this function returns | 3133 /* For constant and loop invariant defs of SLP_NODE this function returns |
3146 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts. | 3134 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts. |
3152 static void | 3140 static void |
3153 vect_get_constant_vectors (tree op, slp_tree slp_node, | 3141 vect_get_constant_vectors (tree op, slp_tree slp_node, |
3154 vec<tree> *vec_oprnds, | 3142 vec<tree> *vec_oprnds, |
3155 unsigned int op_num, unsigned int number_of_vectors) | 3143 unsigned int op_num, unsigned int number_of_vectors) |
3156 { | 3144 { |
3157 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node); | 3145 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node); |
3158 gimple *stmt = stmts[0]; | 3146 stmt_vec_info stmt_vinfo = stmts[0]; |
3159 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); | 3147 gimple *stmt = stmt_vinfo->stmt; |
3160 unsigned nunits; | 3148 unsigned HOST_WIDE_INT nunits; |
3161 tree vec_cst; | 3149 tree vec_cst; |
3162 unsigned j, number_of_places_left_in_vector; | 3150 unsigned j, number_of_places_left_in_vector; |
3163 tree vector_type; | 3151 tree vector_type; |
3164 tree vop; | 3152 tree vop; |
3165 int group_size = stmts.length (); | 3153 int group_size = stmts.length (); |
3169 voprnds.create (number_of_vectors); | 3157 voprnds.create (number_of_vectors); |
3170 bool constant_p, is_store; | 3158 bool constant_p, is_store; |
3171 tree neutral_op = NULL; | 3159 tree neutral_op = NULL; |
3172 enum tree_code code = gimple_expr_code (stmt); | 3160 enum tree_code code = gimple_expr_code (stmt); |
3173 gimple_seq ctor_seq = NULL; | 3161 gimple_seq ctor_seq = NULL; |
3162 auto_vec<tree, 16> permute_results; | |
3174 | 3163 |
3175 /* Check if vector type is a boolean vector. */ | 3164 /* Check if vector type is a boolean vector. */ |
3176 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op)) | 3165 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op)) |
3177 && vect_mask_constant_operand_p (stmt, op_num)) | 3166 && vect_mask_constant_operand_p (stmt_vinfo, op_num)) |
3178 vector_type | 3167 vector_type |
3179 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo)); | 3168 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo)); |
3180 else | 3169 else |
3181 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op)); | 3170 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op)); |
3182 nunits = TYPE_VECTOR_SUBPARTS (vector_type); | |
3183 | 3171 |
3184 if (STMT_VINFO_DATA_REF (stmt_vinfo)) | 3172 if (STMT_VINFO_DATA_REF (stmt_vinfo)) |
3185 { | 3173 { |
3186 is_store = true; | 3174 is_store = true; |
3187 op = gimple_assign_rhs1 (stmt); | 3175 op = gimple_assign_rhs1 (stmt); |
3205 | 3193 |
3206 For example, NUNITS is four as before, and the group size is 8 | 3194 For example, NUNITS is four as before, and the group size is 8 |
3207 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and | 3195 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and |
3208 {s5, s6, s7, s8}. */ | 3196 {s5, s6, s7, s8}. */ |
3209 | 3197 |
3198 /* When using duplicate_and_interleave, we just need one element for | |
3199 each scalar statement. */ | |
3200 if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits)) | |
3201 nunits = group_size; | |
3202 | |
3210 number_of_copies = nunits * number_of_vectors / group_size; | 3203 number_of_copies = nunits * number_of_vectors / group_size; |
3211 | 3204 |
3212 number_of_places_left_in_vector = nunits; | 3205 number_of_places_left_in_vector = nunits; |
3213 constant_p = true; | 3206 constant_p = true; |
3214 auto_vec<tree, 32> elts (nunits); | 3207 tree_vector_builder elts (vector_type, nunits, 1); |
3215 elts.quick_grow (nunits); | 3208 elts.quick_grow (nunits); |
3216 bool place_after_defs = false; | 3209 bool place_after_defs = false; |
3217 for (j = 0; j < number_of_copies; j++) | 3210 for (j = 0; j < number_of_copies; j++) |
3218 { | 3211 { |
3219 for (i = group_size - 1; stmts.iterate (i, &stmt); i--) | 3212 for (i = group_size - 1; stmts.iterate (i, &stmt_vinfo); i--) |
3220 { | 3213 { |
3214 stmt = stmt_vinfo->stmt; | |
3221 if (is_store) | 3215 if (is_store) |
3222 op = gimple_assign_rhs1 (stmt); | 3216 op = gimple_assign_rhs1 (stmt); |
3223 else | 3217 else |
3224 { | 3218 { |
3225 switch (code) | 3219 switch (code) |
3326 == gimple_bb (SSA_NAME_DEF_STMT (orig_op)))) | 3320 == gimple_bb (SSA_NAME_DEF_STMT (orig_op)))) |
3327 place_after_defs = true; | 3321 place_after_defs = true; |
3328 | 3322 |
3329 if (number_of_places_left_in_vector == 0) | 3323 if (number_of_places_left_in_vector == 0) |
3330 { | 3324 { |
3331 if (constant_p) | 3325 if (constant_p |
3332 vec_cst = build_vector (vector_type, elts); | 3326 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits) |
3327 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits)) | |
3328 vec_cst = gimple_build_vector (&ctor_seq, &elts); | |
3333 else | 3329 else |
3334 { | 3330 { |
3335 vec<constructor_elt, va_gc> *v; | 3331 if (vec_oprnds->is_empty ()) |
3336 unsigned k; | 3332 duplicate_and_interleave (&ctor_seq, vector_type, elts, |
3337 vec_alloc (v, nunits); | 3333 number_of_vectors, |
3338 for (k = 0; k < nunits; ++k) | 3334 permute_results); |
3339 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]); | 3335 vec_cst = permute_results[number_of_vectors - j - 1]; |
3340 vec_cst = build_constructor (vector_type, v); | |
3341 } | 3336 } |
3342 tree init; | 3337 tree init; |
3343 gimple_stmt_iterator gsi; | 3338 gimple_stmt_iterator gsi; |
3344 if (place_after_defs) | 3339 if (place_after_defs) |
3345 { | 3340 { |
3346 gsi = gsi_for_stmt | 3341 stmt_vec_info last_stmt_info |
3347 (vect_find_last_scalar_stmt_in_slp (slp_node)); | 3342 = vect_find_last_scalar_stmt_in_slp (slp_node); |
3348 init = vect_init_vector (stmt, vec_cst, vector_type, &gsi); | 3343 gsi = gsi_for_stmt (last_stmt_info->stmt); |
3344 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type, | |
3345 &gsi); | |
3349 } | 3346 } |
3350 else | 3347 else |
3351 init = vect_init_vector (stmt, vec_cst, vector_type, NULL); | 3348 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type, |
3349 NULL); | |
3352 if (ctor_seq != NULL) | 3350 if (ctor_seq != NULL) |
3353 { | 3351 { |
3354 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init)); | 3352 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init)); |
3355 gsi_insert_seq_before_without_update (&gsi, ctor_seq, | 3353 gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT); |
3356 GSI_SAME_STMT); | |
3357 ctor_seq = NULL; | 3354 ctor_seq = NULL; |
3358 } | 3355 } |
3359 voprnds.quick_push (init); | 3356 voprnds.quick_push (init); |
3360 place_after_defs = false; | 3357 place_after_defs = false; |
3361 number_of_places_left_in_vector = nunits; | 3358 number_of_places_left_in_vector = nunits; |
3362 constant_p = true; | 3359 constant_p = true; |
3360 elts.new_vector (vector_type, nunits, 1); | |
3361 elts.quick_grow (nunits); | |
3363 } | 3362 } |
3364 } | 3363 } |
3365 } | 3364 } |
3366 | 3365 |
3367 /* Since the vectors are created in the reverse order, we should invert | 3366 /* Since the vectors are created in the reverse order, we should invert |
3404 | 3403 |
3405 static void | 3404 static void |
3406 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds) | 3405 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds) |
3407 { | 3406 { |
3408 tree vec_oprnd; | 3407 tree vec_oprnd; |
3409 gimple *vec_def_stmt; | 3408 stmt_vec_info vec_def_stmt_info; |
3410 unsigned int i; | 3409 unsigned int i; |
3411 | 3410 |
3412 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ()); | 3411 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ()); |
3413 | 3412 |
3414 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt) | 3413 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt_info) |
3415 { | 3414 { |
3416 gcc_assert (vec_def_stmt); | 3415 gcc_assert (vec_def_stmt_info); |
3417 if (gimple_code (vec_def_stmt) == GIMPLE_PHI) | 3416 if (gphi *vec_def_phi = dyn_cast <gphi *> (vec_def_stmt_info->stmt)) |
3418 vec_oprnd = gimple_phi_result (vec_def_stmt); | 3417 vec_oprnd = gimple_phi_result (vec_def_phi); |
3419 else | 3418 else |
3420 vec_oprnd = gimple_get_lhs (vec_def_stmt); | 3419 vec_oprnd = gimple_get_lhs (vec_def_stmt_info->stmt); |
3421 vec_oprnds->quick_push (vec_oprnd); | 3420 vec_oprnds->quick_push (vec_oprnd); |
3422 } | 3421 } |
3423 } | 3422 } |
3424 | 3423 |
3425 | 3424 |
3432 | 3431 |
3433 void | 3432 void |
3434 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node, | 3433 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node, |
3435 vec<vec<tree> > *vec_oprnds) | 3434 vec<vec<tree> > *vec_oprnds) |
3436 { | 3435 { |
3437 gimple *first_stmt; | |
3438 int number_of_vects = 0, i; | 3436 int number_of_vects = 0, i; |
3439 unsigned int child_index = 0; | 3437 unsigned int child_index = 0; |
3440 HOST_WIDE_INT lhs_size_unit, rhs_size_unit; | 3438 HOST_WIDE_INT lhs_size_unit, rhs_size_unit; |
3441 slp_tree child = NULL; | 3439 slp_tree child = NULL; |
3442 vec<tree> vec_defs; | 3440 vec<tree> vec_defs; |
3443 tree oprnd; | 3441 tree oprnd; |
3444 bool vectorized_defs; | 3442 bool vectorized_defs; |
3445 | 3443 |
3446 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0]; | 3444 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0]; |
3447 FOR_EACH_VEC_ELT (ops, i, oprnd) | 3445 FOR_EACH_VEC_ELT (ops, i, oprnd) |
3448 { | 3446 { |
3449 /* For each operand we check if it has vectorized definitions in a child | 3447 /* For each operand we check if it has vectorized definitions in a child |
3450 node or we need to create them (for invariants and constants). We | 3448 node or we need to create them (for invariants and constants). We |
3451 check if the LHS of the first stmt of the next child matches OPRND. | 3449 check if the LHS of the first stmt of the next child matches OPRND. |
3458 child = SLP_TREE_CHILDREN (slp_node)[child_index]; | 3456 child = SLP_TREE_CHILDREN (slp_node)[child_index]; |
3459 | 3457 |
3460 /* We have to check both pattern and original def, if available. */ | 3458 /* We have to check both pattern and original def, if available. */ |
3461 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def) | 3459 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def) |
3462 { | 3460 { |
3463 gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0]; | 3461 stmt_vec_info first_def_info = SLP_TREE_SCALAR_STMTS (child)[0]; |
3464 gimple *related | 3462 stmt_vec_info related = STMT_VINFO_RELATED_STMT (first_def_info); |
3465 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def)); | |
3466 tree first_def_op; | 3463 tree first_def_op; |
3467 | 3464 |
3468 if (gimple_code (first_def) == GIMPLE_PHI) | 3465 if (gphi *first_def = dyn_cast <gphi *> (first_def_info->stmt)) |
3469 first_def_op = gimple_phi_result (first_def); | 3466 first_def_op = gimple_phi_result (first_def); |
3470 else | 3467 else |
3471 first_def_op = gimple_get_lhs (first_def); | 3468 first_def_op = gimple_get_lhs (first_def_info->stmt); |
3472 if (operand_equal_p (oprnd, first_def_op, 0) | 3469 if (operand_equal_p (oprnd, first_def_op, 0) |
3473 || (related | 3470 || (related |
3474 && operand_equal_p (oprnd, gimple_get_lhs (related), 0))) | 3471 && operand_equal_p (oprnd, |
3472 gimple_get_lhs (related->stmt), 0))) | |
3475 { | 3473 { |
3476 /* The number of vector defs is determined by the number of | 3474 /* The number of vector defs is determined by the number of |
3477 vector statements in the node from which we get those | 3475 vector statements in the node from which we get those |
3478 statements. */ | 3476 statements. */ |
3479 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child); | 3477 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child); |
3492 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); | 3490 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); |
3493 /* Number of vector stmts was calculated according to LHS in | 3491 /* Number of vector stmts was calculated according to LHS in |
3494 vect_schedule_slp_instance (), fix it by replacing LHS with | 3492 vect_schedule_slp_instance (), fix it by replacing LHS with |
3495 RHS, if necessary. See vect_get_smallest_scalar_type () for | 3493 RHS, if necessary. See vect_get_smallest_scalar_type () for |
3496 details. */ | 3494 details. */ |
3497 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit, | 3495 vect_get_smallest_scalar_type (first_stmt_info, &lhs_size_unit, |
3498 &rhs_size_unit); | 3496 &rhs_size_unit); |
3499 if (rhs_size_unit != lhs_size_unit) | 3497 if (rhs_size_unit != lhs_size_unit) |
3500 { | 3498 { |
3501 number_of_vects *= rhs_size_unit; | 3499 number_of_vects *= rhs_size_unit; |
3502 number_of_vects /= lhs_size_unit; | 3500 number_of_vects /= lhs_size_unit; |
3503 } | 3501 } |
3527 permute statements for the SLP node NODE of the SLP instance | 3525 permute statements for the SLP node NODE of the SLP instance |
3528 SLP_NODE_INSTANCE. */ | 3526 SLP_NODE_INSTANCE. */ |
3529 | 3527 |
3530 bool | 3528 bool |
3531 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain, | 3529 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain, |
3532 gimple_stmt_iterator *gsi, int vf, | 3530 gimple_stmt_iterator *gsi, poly_uint64 vf, |
3533 slp_instance slp_node_instance, bool analyze_only, | 3531 slp_instance slp_node_instance, bool analyze_only, |
3534 unsigned *n_perms) | 3532 unsigned *n_perms) |
3535 { | 3533 { |
3536 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]; | 3534 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0]; |
3537 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | 3535 vec_info *vinfo = stmt_info->vinfo; |
3538 tree mask_element_type = NULL_TREE, mask_type; | 3536 int vec_index = 0; |
3539 int nunits, vec_index = 0; | |
3540 tree vectype = STMT_VINFO_VECTYPE (stmt_info); | 3537 tree vectype = STMT_VINFO_VECTYPE (stmt_info); |
3541 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance); | 3538 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance); |
3542 int mask_element; | 3539 unsigned int mask_element; |
3543 machine_mode mode; | 3540 machine_mode mode; |
3544 | 3541 |
3545 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)) | 3542 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)) |
3546 return false; | 3543 return false; |
3547 | 3544 |
3548 stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)); | 3545 stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info); |
3549 | 3546 |
3550 mode = TYPE_MODE (vectype); | 3547 mode = TYPE_MODE (vectype); |
3551 | 3548 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); |
3552 /* The generic VEC_PERM_EXPR code always uses an integral type of the | |
3553 same size as the vector element being permuted. */ | |
3554 mask_element_type = lang_hooks.types.type_for_mode | |
3555 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1); | |
3556 mask_type = get_vectype_for_scalar_type (mask_element_type); | |
3557 nunits = TYPE_VECTOR_SUBPARTS (vectype); | |
3558 auto_vec_perm_indices mask (nunits); | |
3559 mask.quick_grow (nunits); | |
3560 | 3549 |
3561 /* Initialize the vect stmts of NODE to properly insert the generated | 3550 /* Initialize the vect stmts of NODE to properly insert the generated |
3562 stmts later. */ | 3551 stmts later. */ |
3563 if (! analyze_only) | 3552 if (! analyze_only) |
3564 for (unsigned i = SLP_TREE_VEC_STMTS (node).length (); | 3553 for (unsigned i = SLP_TREE_VEC_STMTS (node).length (); |
3582 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation | 3571 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation |
3583 we need the second and the third vectors: {b1,c1,a2,b2} and | 3572 we need the second and the third vectors: {b1,c1,a2,b2} and |
3584 {c2,a3,b3,c3}. */ | 3573 {c2,a3,b3,c3}. */ |
3585 | 3574 |
3586 int vect_stmts_counter = 0; | 3575 int vect_stmts_counter = 0; |
3587 int index = 0; | 3576 unsigned int index = 0; |
3588 int first_vec_index = -1; | 3577 int first_vec_index = -1; |
3589 int second_vec_index = -1; | 3578 int second_vec_index = -1; |
3590 bool noop_p = true; | 3579 bool noop_p = true; |
3591 *n_perms = 0; | 3580 *n_perms = 0; |
3592 | 3581 |
3593 for (int j = 0; j < vf; j++) | 3582 vec_perm_builder mask; |
3594 { | 3583 unsigned int nelts_to_build; |
3595 for (int k = 0; k < group_size; k++) | 3584 unsigned int nvectors_per_build; |
3596 { | 3585 bool repeating_p = (group_size == DR_GROUP_SIZE (stmt_info) |
3597 int i = (SLP_TREE_LOAD_PERMUTATION (node)[k] | 3586 && multiple_p (nunits, group_size)); |
3598 + j * STMT_VINFO_GROUP_SIZE (stmt_info)); | 3587 if (repeating_p) |
3599 vec_index = i / nunits; | 3588 { |
3600 mask_element = i % nunits; | 3589 /* A single vector contains a whole number of copies of the node, so: |
3590 (a) all permutes can use the same mask; and | |
3591 (b) the permutes only need a single vector input. */ | |
3592 mask.new_vector (nunits, group_size, 3); | |
3593 nelts_to_build = mask.encoded_nelts (); | |
3594 nvectors_per_build = SLP_TREE_VEC_STMTS (node).length (); | |
3595 } | |
3596 else | |
3597 { | |
3598 /* We need to construct a separate mask for each vector statement. */ | |
3599 unsigned HOST_WIDE_INT const_nunits, const_vf; | |
3600 if (!nunits.is_constant (&const_nunits) | |
3601 || !vf.is_constant (&const_vf)) | |
3602 return false; | |
3603 mask.new_vector (const_nunits, const_nunits, 1); | |
3604 nelts_to_build = const_vf * group_size; | |
3605 nvectors_per_build = 1; | |
3606 } | |
3607 | |
3608 unsigned int count = mask.encoded_nelts (); | |
3609 mask.quick_grow (count); | |
3610 vec_perm_indices indices; | |
3611 | |
3612 for (unsigned int j = 0; j < nelts_to_build; j++) | |
3613 { | |
3614 unsigned int iter_num = j / group_size; | |
3615 unsigned int stmt_num = j % group_size; | |
3616 unsigned int i = (iter_num * DR_GROUP_SIZE (stmt_info) | |
3617 + SLP_TREE_LOAD_PERMUTATION (node)[stmt_num]); | |
3618 if (repeating_p) | |
3619 { | |
3620 first_vec_index = 0; | |
3621 mask_element = i; | |
3622 } | |
3623 else | |
3624 { | |
3625 /* Enforced before the loop when !repeating_p. */ | |
3626 unsigned int const_nunits = nunits.to_constant (); | |
3627 vec_index = i / const_nunits; | |
3628 mask_element = i % const_nunits; | |
3601 if (vec_index == first_vec_index | 3629 if (vec_index == first_vec_index |
3602 || first_vec_index == -1) | 3630 || first_vec_index == -1) |
3603 { | 3631 { |
3604 first_vec_index = vec_index; | 3632 first_vec_index = vec_index; |
3605 } | 3633 } |
3606 else if (vec_index == second_vec_index | 3634 else if (vec_index == second_vec_index |
3607 || second_vec_index == -1) | 3635 || second_vec_index == -1) |
3608 { | 3636 { |
3609 second_vec_index = vec_index; | 3637 second_vec_index = vec_index; |
3610 mask_element += nunits; | 3638 mask_element += const_nunits; |
3611 } | 3639 } |
3612 else | 3640 else |
3613 { | 3641 { |
3614 if (dump_enabled_p ()) | 3642 if (dump_enabled_p ()) |
3643 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
3644 "permutation requires at " | |
3645 "least three vectors %G", | |
3646 stmt_info->stmt); | |
3647 gcc_assert (analyze_only); | |
3648 return false; | |
3649 } | |
3650 | |
3651 gcc_assert (mask_element < 2 * const_nunits); | |
3652 } | |
3653 | |
3654 if (mask_element != index) | |
3655 noop_p = false; | |
3656 mask[index++] = mask_element; | |
3657 | |
3658 if (index == count && !noop_p) | |
3659 { | |
3660 indices.new_vector (mask, second_vec_index == -1 ? 1 : 2, nunits); | |
3661 if (!can_vec_perm_const_p (mode, indices)) | |
3662 { | |
3663 if (dump_enabled_p ()) | |
3615 { | 3664 { |
3616 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 3665 dump_printf_loc (MSG_MISSED_OPTIMIZATION, |
3617 "permutation requires at " | 3666 vect_location, |
3618 "least three vectors "); | 3667 "unsupported vect permute { "); |
3619 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, | 3668 for (i = 0; i < count; ++i) |
3620 stmt, 0); | 3669 { |
3670 dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]); | |
3671 dump_printf (MSG_MISSED_OPTIMIZATION, " "); | |
3672 } | |
3673 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n"); | |
3621 } | 3674 } |
3622 gcc_assert (analyze_only); | 3675 gcc_assert (analyze_only); |
3623 return false; | 3676 return false; |
3624 } | 3677 } |
3625 | 3678 |
3626 gcc_assert (mask_element >= 0 | 3679 ++*n_perms; |
3627 && mask_element < 2 * nunits); | 3680 } |
3628 if (mask_element != index) | 3681 |
3629 noop_p = false; | 3682 if (index == count) |
3630 mask[index++] = mask_element; | 3683 { |
3631 | 3684 if (!analyze_only) |
3632 if (index == nunits) | |
3633 { | 3685 { |
3634 if (! noop_p | 3686 tree mask_vec = NULL_TREE; |
3635 && ! can_vec_perm_p (mode, false, &mask)) | 3687 |
3688 if (! noop_p) | |
3689 mask_vec = vect_gen_perm_mask_checked (vectype, indices); | |
3690 | |
3691 if (second_vec_index == -1) | |
3692 second_vec_index = first_vec_index; | |
3693 | |
3694 for (unsigned int ri = 0; ri < nvectors_per_build; ++ri) | |
3636 { | 3695 { |
3637 if (dump_enabled_p ()) | 3696 /* Generate the permute statement if necessary. */ |
3638 { | 3697 tree first_vec = dr_chain[first_vec_index + ri]; |
3639 dump_printf_loc (MSG_MISSED_OPTIMIZATION, | 3698 tree second_vec = dr_chain[second_vec_index + ri]; |
3640 vect_location, | 3699 stmt_vec_info perm_stmt_info; |
3641 "unsupported vect permute { "); | |
3642 for (i = 0; i < nunits; ++i) | |
3643 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ", mask[i]); | |
3644 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n"); | |
3645 } | |
3646 gcc_assert (analyze_only); | |
3647 return false; | |
3648 } | |
3649 | |
3650 if (! noop_p) | |
3651 ++*n_perms; | |
3652 | |
3653 if (!analyze_only) | |
3654 { | |
3655 tree mask_vec = NULL_TREE; | |
3656 | |
3657 if (! noop_p) | 3700 if (! noop_p) |
3658 { | 3701 { |
3659 auto_vec<tree, 32> mask_elts (nunits); | 3702 gassign *stmt = as_a <gassign *> (stmt_info->stmt); |
3660 for (int l = 0; l < nunits; ++l) | |
3661 mask_elts.quick_push (build_int_cst (mask_element_type, | |
3662 mask[l])); | |
3663 mask_vec = build_vector (mask_type, mask_elts); | |
3664 } | |
3665 | |
3666 if (second_vec_index == -1) | |
3667 second_vec_index = first_vec_index; | |
3668 | |
3669 /* Generate the permute statement if necessary. */ | |
3670 tree first_vec = dr_chain[first_vec_index]; | |
3671 tree second_vec = dr_chain[second_vec_index]; | |
3672 gimple *perm_stmt; | |
3673 if (! noop_p) | |
3674 { | |
3675 tree perm_dest | 3703 tree perm_dest |
3676 = vect_create_destination_var (gimple_assign_lhs (stmt), | 3704 = vect_create_destination_var (gimple_assign_lhs (stmt), |
3677 vectype); | 3705 vectype); |
3678 perm_dest = make_ssa_name (perm_dest); | 3706 perm_dest = make_ssa_name (perm_dest); |
3679 perm_stmt = gimple_build_assign (perm_dest, | 3707 gassign *perm_stmt |
3680 VEC_PERM_EXPR, | 3708 = gimple_build_assign (perm_dest, VEC_PERM_EXPR, |
3681 first_vec, second_vec, | 3709 first_vec, second_vec, |
3682 mask_vec); | 3710 mask_vec); |
3683 vect_finish_stmt_generation (stmt, perm_stmt, gsi); | 3711 perm_stmt_info |
3712 = vect_finish_stmt_generation (stmt_info, perm_stmt, | |
3713 gsi); | |
3684 } | 3714 } |
3685 else | 3715 else |
3686 /* If mask was NULL_TREE generate the requested | 3716 /* If mask was NULL_TREE generate the requested |
3687 identity transform. */ | 3717 identity transform. */ |
3688 perm_stmt = SSA_NAME_DEF_STMT (first_vec); | 3718 perm_stmt_info = vinfo->lookup_def (first_vec); |
3689 | 3719 |
3690 /* Store the vector statement in NODE. */ | 3720 /* Store the vector statement in NODE. */ |
3691 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt; | 3721 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] |
3722 = perm_stmt_info; | |
3692 } | 3723 } |
3693 | |
3694 index = 0; | |
3695 first_vec_index = -1; | |
3696 second_vec_index = -1; | |
3697 noop_p = true; | |
3698 } | 3724 } |
3725 | |
3726 index = 0; | |
3727 first_vec_index = -1; | |
3728 second_vec_index = -1; | |
3729 noop_p = true; | |
3699 } | 3730 } |
3700 } | 3731 } |
3701 | 3732 |
3702 return true; | 3733 return true; |
3703 } | 3734 } |
3704 | 3735 |
3705 | |
3706 | |
3707 /* Vectorize SLP instance tree in postorder. */ | 3736 /* Vectorize SLP instance tree in postorder. */ |
3708 | 3737 |
3709 static bool | 3738 static void |
3710 vect_schedule_slp_instance (slp_tree node, slp_instance instance) | 3739 vect_schedule_slp_instance (slp_tree node, slp_instance instance, |
3711 { | 3740 scalar_stmts_to_slp_tree_map_t *bst_map) |
3712 gimple *stmt; | 3741 { |
3713 bool grouped_store, is_store; | |
3714 gimple_stmt_iterator si; | 3742 gimple_stmt_iterator si; |
3715 stmt_vec_info stmt_info; | 3743 stmt_vec_info stmt_info; |
3716 unsigned int group_size; | 3744 unsigned int group_size; |
3717 tree vectype; | 3745 tree vectype; |
3718 int i, j; | 3746 int i, j; |
3719 slp_tree child; | 3747 slp_tree child; |
3720 | 3748 |
3721 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def) | 3749 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def) |
3722 return false; | 3750 return; |
3723 | 3751 |
3752 /* See if we have already vectorized the same set of stmts and reuse their | |
3753 vectorized stmts. */ | |
3754 if (slp_tree *leader = bst_map->get (SLP_TREE_SCALAR_STMTS (node))) | |
3755 { | |
3756 SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (*leader)); | |
3757 return; | |
3758 } | |
3759 | |
3760 bst_map->put (SLP_TREE_SCALAR_STMTS (node).copy (), node); | |
3724 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) | 3761 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |
3725 vect_schedule_slp_instance (child, instance); | 3762 vect_schedule_slp_instance (child, instance, bst_map); |
3726 | 3763 |
3727 /* Push SLP node def-type to stmts. */ | 3764 /* Push SLP node def-type to stmts. */ |
3728 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) | 3765 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |
3729 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def) | 3766 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def) |
3730 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt) | 3767 { |
3731 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child); | 3768 stmt_vec_info child_stmt_info; |
3732 | 3769 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info) |
3733 stmt = SLP_TREE_SCALAR_STMTS (node)[0]; | 3770 STMT_VINFO_DEF_TYPE (child_stmt_info) = SLP_TREE_DEF_TYPE (child); |
3734 stmt_info = vinfo_for_stmt (stmt); | 3771 } |
3772 | |
3773 stmt_info = SLP_TREE_SCALAR_STMTS (node)[0]; | |
3735 | 3774 |
3736 /* VECTYPE is the type of the destination. */ | 3775 /* VECTYPE is the type of the destination. */ |
3737 vectype = STMT_VINFO_VECTYPE (stmt_info); | 3776 vectype = STMT_VINFO_VECTYPE (stmt_info); |
3777 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); | |
3738 group_size = SLP_INSTANCE_GROUP_SIZE (instance); | 3778 group_size = SLP_INSTANCE_GROUP_SIZE (instance); |
3739 | 3779 |
3780 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0); | |
3740 if (!SLP_TREE_VEC_STMTS (node).exists ()) | 3781 if (!SLP_TREE_VEC_STMTS (node).exists ()) |
3741 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node)); | 3782 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node)); |
3742 | 3783 |
3743 if (dump_enabled_p ()) | 3784 if (dump_enabled_p ()) |
3744 { | 3785 dump_printf_loc (MSG_NOTE, vect_location, |
3745 dump_printf_loc (MSG_NOTE,vect_location, | 3786 "------>vectorizing SLP node starting from: %G", |
3746 "------>vectorizing SLP node starting from: "); | 3787 stmt_info->stmt); |
3747 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); | |
3748 } | |
3749 | 3788 |
3750 /* Vectorized stmts go before the last scalar stmt which is where | 3789 /* Vectorized stmts go before the last scalar stmt which is where |
3751 all uses are ready. */ | 3790 all uses are ready. */ |
3752 si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node)); | 3791 stmt_vec_info last_stmt_info = vect_find_last_scalar_stmt_in_slp (node); |
3792 si = gsi_for_stmt (last_stmt_info->stmt); | |
3753 | 3793 |
3754 /* Mark the first element of the reduction chain as reduction to properly | 3794 /* Mark the first element of the reduction chain as reduction to properly |
3755 transform the node. In the analysis phase only the last element of the | 3795 transform the node. In the analysis phase only the last element of the |
3756 chain is marked as reduction. */ | 3796 chain is marked as reduction. */ |
3757 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info) | 3797 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info) |
3758 && GROUP_FIRST_ELEMENT (stmt_info) == stmt) | 3798 && REDUC_GROUP_FIRST_ELEMENT (stmt_info) |
3799 && REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info) | |
3759 { | 3800 { |
3760 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def; | 3801 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def; |
3761 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type; | 3802 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type; |
3762 } | 3803 } |
3763 | 3804 |
3764 /* Handle two-operation SLP nodes by vectorizing the group with | 3805 /* Handle two-operation SLP nodes by vectorizing the group with |
3765 both operations and then performing a merge. */ | 3806 both operations and then performing a merge. */ |
3766 if (SLP_TREE_TWO_OPERATORS (node)) | 3807 if (SLP_TREE_TWO_OPERATORS (node)) |
3767 { | 3808 { |
3809 gassign *stmt = as_a <gassign *> (stmt_info->stmt); | |
3768 enum tree_code code0 = gimple_assign_rhs_code (stmt); | 3810 enum tree_code code0 = gimple_assign_rhs_code (stmt); |
3769 enum tree_code ocode = ERROR_MARK; | 3811 enum tree_code ocode = ERROR_MARK; |
3770 gimple *ostmt; | 3812 stmt_vec_info ostmt_info; |
3771 auto_vec_perm_indices mask (group_size); | 3813 vec_perm_builder mask (group_size, group_size, 1); |
3772 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt) | 3814 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt_info) |
3773 if (gimple_assign_rhs_code (ostmt) != code0) | 3815 { |
3774 { | 3816 gassign *ostmt = as_a <gassign *> (ostmt_info->stmt); |
3775 mask.quick_push (1); | 3817 if (gimple_assign_rhs_code (ostmt) != code0) |
3776 ocode = gimple_assign_rhs_code (ostmt); | 3818 { |
3777 } | 3819 mask.quick_push (1); |
3778 else | 3820 ocode = gimple_assign_rhs_code (ostmt); |
3779 mask.quick_push (0); | 3821 } |
3822 else | |
3823 mask.quick_push (0); | |
3824 } | |
3780 if (ocode != ERROR_MARK) | 3825 if (ocode != ERROR_MARK) |
3781 { | 3826 { |
3782 vec<gimple *> v0; | 3827 vec<stmt_vec_info> v0; |
3783 vec<gimple *> v1; | 3828 vec<stmt_vec_info> v1; |
3784 unsigned j; | 3829 unsigned j; |
3785 tree tmask = NULL_TREE; | 3830 tree tmask = NULL_TREE; |
3786 vect_transform_stmt (stmt, &si, &grouped_store, node, instance); | 3831 vect_transform_stmt (stmt_info, &si, node, instance); |
3787 v0 = SLP_TREE_VEC_STMTS (node).copy (); | 3832 v0 = SLP_TREE_VEC_STMTS (node).copy (); |
3788 SLP_TREE_VEC_STMTS (node).truncate (0); | 3833 SLP_TREE_VEC_STMTS (node).truncate (0); |
3789 gimple_assign_set_rhs_code (stmt, ocode); | 3834 gimple_assign_set_rhs_code (stmt, ocode); |
3790 vect_transform_stmt (stmt, &si, &grouped_store, node, instance); | 3835 vect_transform_stmt (stmt_info, &si, node, instance); |
3791 gimple_assign_set_rhs_code (stmt, code0); | 3836 gimple_assign_set_rhs_code (stmt, code0); |
3792 v1 = SLP_TREE_VEC_STMTS (node).copy (); | 3837 v1 = SLP_TREE_VEC_STMTS (node).copy (); |
3793 SLP_TREE_VEC_STMTS (node).truncate (0); | 3838 SLP_TREE_VEC_STMTS (node).truncate (0); |
3794 tree meltype = build_nonstandard_integer_type | 3839 tree meltype = build_nonstandard_integer_type |
3795 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1); | 3840 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1); |
3796 tree mvectype = get_same_sized_vectype (meltype, vectype); | 3841 tree mvectype = get_same_sized_vectype (meltype, vectype); |
3797 unsigned k = 0, l; | 3842 unsigned k = 0, l; |
3798 for (j = 0; j < v0.length (); ++j) | 3843 for (j = 0; j < v0.length (); ++j) |
3799 { | 3844 { |
3800 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype); | 3845 /* Enforced by vect_build_slp_tree, which rejects variable-length |
3801 auto_vec<tree, 32> melts (nunits); | 3846 vectors for SLP_TREE_TWO_OPERATORS. */ |
3802 for (l = 0; l < nunits; ++l) | 3847 unsigned int const_nunits = nunits.to_constant (); |
3848 tree_vector_builder melts (mvectype, const_nunits, 1); | |
3849 for (l = 0; l < const_nunits; ++l) | |
3803 { | 3850 { |
3804 if (k >= group_size) | 3851 if (k >= group_size) |
3805 k = 0; | 3852 k = 0; |
3806 tree t = build_int_cst (meltype, mask[k++] * nunits + l); | 3853 tree t = build_int_cst (meltype, |
3854 mask[k++] * const_nunits + l); | |
3807 melts.quick_push (t); | 3855 melts.quick_push (t); |
3808 } | 3856 } |
3809 tmask = build_vector (mvectype, melts); | 3857 tmask = melts.build (); |
3810 | 3858 |
3811 /* ??? Not all targets support a VEC_PERM_EXPR with a | 3859 /* ??? Not all targets support a VEC_PERM_EXPR with a |
3812 constant mask that would translate to a vec_merge RTX | 3860 constant mask that would translate to a vec_merge RTX |
3813 (with their vec_perm_const_ok). We can either not | 3861 (with their vec_perm_const_ok). We can either not |
3814 vectorize in that case or let veclower do its job. | 3862 vectorize in that case or let veclower do its job. |
3816 plus/minus we'd eventually like to match targets | 3864 plus/minus we'd eventually like to match targets |
3817 vector addsub instructions. */ | 3865 vector addsub instructions. */ |
3818 gimple *vstmt; | 3866 gimple *vstmt; |
3819 vstmt = gimple_build_assign (make_ssa_name (vectype), | 3867 vstmt = gimple_build_assign (make_ssa_name (vectype), |
3820 VEC_PERM_EXPR, | 3868 VEC_PERM_EXPR, |
3821 gimple_assign_lhs (v0[j]), | 3869 gimple_assign_lhs (v0[j]->stmt), |
3822 gimple_assign_lhs (v1[j]), tmask); | 3870 gimple_assign_lhs (v1[j]->stmt), |
3823 vect_finish_stmt_generation (stmt, vstmt, &si); | 3871 tmask); |
3824 SLP_TREE_VEC_STMTS (node).quick_push (vstmt); | 3872 SLP_TREE_VEC_STMTS (node).quick_push |
3873 (vect_finish_stmt_generation (stmt_info, vstmt, &si)); | |
3825 } | 3874 } |
3826 v0.release (); | 3875 v0.release (); |
3827 v1.release (); | 3876 v1.release (); |
3828 return false; | 3877 return; |
3829 } | 3878 } |
3830 } | 3879 } |
3831 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance); | 3880 vect_transform_stmt (stmt_info, &si, node, instance); |
3832 | 3881 |
3833 /* Restore stmt def-types. */ | 3882 /* Restore stmt def-types. */ |
3834 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) | 3883 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |
3835 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def) | 3884 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def) |
3836 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt) | 3885 { |
3837 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def; | 3886 stmt_vec_info child_stmt_info; |
3838 | 3887 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info) |
3839 return is_store; | 3888 STMT_VINFO_DEF_TYPE (child_stmt_info) = vect_internal_def; |
3889 } | |
3840 } | 3890 } |
3841 | 3891 |
3842 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero. | 3892 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero. |
3843 For loop vectorization this is done in vectorizable_call, but for SLP | 3893 For loop vectorization this is done in vectorizable_call, but for SLP |
3844 it needs to be deferred until end of vect_schedule_slp, because multiple | 3894 it needs to be deferred until end of vect_schedule_slp, because multiple |
3845 SLP instances may refer to the same scalar stmt. */ | 3895 SLP instances may refer to the same scalar stmt. */ |
3846 | 3896 |
3847 static void | 3897 static void |
3848 vect_remove_slp_scalar_calls (slp_tree node) | 3898 vect_remove_slp_scalar_calls (slp_tree node) |
3849 { | 3899 { |
3850 gimple *stmt, *new_stmt; | 3900 gimple *new_stmt; |
3851 gimple_stmt_iterator gsi; | 3901 gimple_stmt_iterator gsi; |
3852 int i; | 3902 int i; |
3853 slp_tree child; | 3903 slp_tree child; |
3854 tree lhs; | 3904 tree lhs; |
3855 stmt_vec_info stmt_info; | 3905 stmt_vec_info stmt_info; |
3858 return; | 3908 return; |
3859 | 3909 |
3860 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) | 3910 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) |
3861 vect_remove_slp_scalar_calls (child); | 3911 vect_remove_slp_scalar_calls (child); |
3862 | 3912 |
3863 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt) | 3913 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info) |
3864 { | 3914 { |
3865 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL) | 3915 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt); |
3916 if (!stmt || gimple_bb (stmt) == NULL) | |
3866 continue; | 3917 continue; |
3867 stmt_info = vinfo_for_stmt (stmt); | 3918 if (is_pattern_stmt_p (stmt_info) |
3868 if (stmt_info == NULL | |
3869 || is_pattern_stmt_p (stmt_info) | |
3870 || !PURE_SLP_STMT (stmt_info)) | 3919 || !PURE_SLP_STMT (stmt_info)) |
3871 continue; | 3920 continue; |
3872 lhs = gimple_call_lhs (stmt); | 3921 lhs = gimple_call_lhs (stmt); |
3873 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs))); | 3922 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs))); |
3874 set_vinfo_for_stmt (new_stmt, stmt_info); | |
3875 set_vinfo_for_stmt (stmt, NULL); | |
3876 STMT_VINFO_STMT (stmt_info) = new_stmt; | |
3877 gsi = gsi_for_stmt (stmt); | 3923 gsi = gsi_for_stmt (stmt); |
3878 gsi_replace (&gsi, new_stmt, false); | 3924 stmt_info->vinfo->replace_stmt (&gsi, stmt_info, new_stmt); |
3879 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt; | 3925 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt; |
3880 } | 3926 } |
3881 } | 3927 } |
3882 | 3928 |
3883 /* Generate vector code for all SLP instances in the loop/basic block. */ | 3929 /* Generate vector code for all SLP instances in the loop/basic block. */ |
3884 | 3930 |
3885 bool | 3931 void |
3886 vect_schedule_slp (vec_info *vinfo) | 3932 vect_schedule_slp (vec_info *vinfo) |
3887 { | 3933 { |
3888 vec<slp_instance> slp_instances; | 3934 vec<slp_instance> slp_instances; |
3889 slp_instance instance; | 3935 slp_instance instance; |
3890 unsigned int i; | 3936 unsigned int i; |
3891 bool is_store = false; | 3937 |
3892 | 3938 scalar_stmts_to_slp_tree_map_t *bst_map |
3939 = new scalar_stmts_to_slp_tree_map_t (); | |
3893 slp_instances = vinfo->slp_instances; | 3940 slp_instances = vinfo->slp_instances; |
3894 FOR_EACH_VEC_ELT (slp_instances, i, instance) | 3941 FOR_EACH_VEC_ELT (slp_instances, i, instance) |
3895 { | 3942 { |
3896 /* Schedule the tree of INSTANCE. */ | 3943 /* Schedule the tree of INSTANCE. */ |
3897 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance), | 3944 vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance), |
3898 instance); | 3945 instance, bst_map); |
3899 if (dump_enabled_p ()) | 3946 if (dump_enabled_p ()) |
3900 dump_printf_loc (MSG_NOTE, vect_location, | 3947 dump_printf_loc (MSG_NOTE, vect_location, |
3901 "vectorizing stmts using SLP.\n"); | 3948 "vectorizing stmts using SLP.\n"); |
3902 } | 3949 } |
3950 delete bst_map; | |
3903 | 3951 |
3904 FOR_EACH_VEC_ELT (slp_instances, i, instance) | 3952 FOR_EACH_VEC_ELT (slp_instances, i, instance) |
3905 { | 3953 { |
3906 slp_tree root = SLP_INSTANCE_TREE (instance); | 3954 slp_tree root = SLP_INSTANCE_TREE (instance); |
3907 gimple *store; | 3955 stmt_vec_info store_info; |
3908 unsigned int j; | 3956 unsigned int j; |
3909 gimple_stmt_iterator gsi; | |
3910 | 3957 |
3911 /* Remove scalar call stmts. Do not do this for basic-block | 3958 /* Remove scalar call stmts. Do not do this for basic-block |
3912 vectorization as not all uses may be vectorized. | 3959 vectorization as not all uses may be vectorized. |
3913 ??? Why should this be necessary? DCE should be able to | 3960 ??? Why should this be necessary? DCE should be able to |
3914 remove the stmts itself. | 3961 remove the stmts itself. |
3916 stmts starting from the SLP tree root if they have no | 3963 stmts starting from the SLP tree root if they have no |
3917 uses. */ | 3964 uses. */ |
3918 if (is_a <loop_vec_info> (vinfo)) | 3965 if (is_a <loop_vec_info> (vinfo)) |
3919 vect_remove_slp_scalar_calls (root); | 3966 vect_remove_slp_scalar_calls (root); |
3920 | 3967 |
3921 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store) | 3968 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store_info) |
3922 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++) | 3969 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++) |
3923 { | 3970 { |
3924 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store))) | 3971 if (!STMT_VINFO_DATA_REF (store_info)) |
3925 break; | 3972 break; |
3926 | 3973 |
3927 if (is_pattern_stmt_p (vinfo_for_stmt (store))) | 3974 store_info = vect_orig_stmt (store_info); |
3928 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store)); | 3975 /* Free the attached stmt_vec_info and remove the stmt. */ |
3929 /* Free the attached stmt_vec_info and remove the stmt. */ | 3976 vinfo->remove_stmt (store_info); |
3930 gsi = gsi_for_stmt (store); | |
3931 unlink_stmt_vdef (store); | |
3932 gsi_remove (&gsi, true); | |
3933 release_defs (store); | |
3934 free_stmt_vec_info (store); | |
3935 } | 3977 } |
3936 } | 3978 } |
3937 | 3979 } |
3938 return is_store; | |
3939 } |