comparison gcc/tree-vect-slp.c @ 131:84e7813d76e9

gcc-8.2
author mir3636
date Thu, 25 Oct 2018 07:37:49 +0900
parents 04ced10e8804
children 1830386684a0
comparison
equal deleted inserted replaced
111:04ced10e8804 131:84e7813d76e9
1 /* SLP - Basic Block Vectorization 1 /* SLP - Basic Block Vectorization
2 Copyright (C) 2007-2017 Free Software Foundation, Inc. 2 Copyright (C) 2007-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com> 3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
4 and Ira Rosen <irar@il.ibm.com> 4 and Ira Rosen <irar@il.ibm.com>
5 5
6 This file is part of GCC. 6 This file is part of GCC.
7 7
39 #include "cfgloop.h" 39 #include "cfgloop.h"
40 #include "tree-vectorizer.h" 40 #include "tree-vectorizer.h"
41 #include "langhooks.h" 41 #include "langhooks.h"
42 #include "gimple-walk.h" 42 #include "gimple-walk.h"
43 #include "dbgcnt.h" 43 #include "dbgcnt.h"
44 44 #include "tree-vector-builder.h"
45 45 #include "vec-perm-indices.h"
46 /* Recursively free the memory allocated for the SLP tree rooted at NODE. */ 46 #include "gimple-fold.h"
47 #include "internal-fn.h"
48
49
50 /* Recursively free the memory allocated for the SLP tree rooted at NODE.
51 FINAL_P is true if we have vectorized the instance or if we have
52 made a final decision not to vectorize the statements in any way. */
47 53
48 static void 54 static void
49 vect_free_slp_tree (slp_tree node) 55 vect_free_slp_tree (slp_tree node, bool final_p)
50 { 56 {
51 int i; 57 int i;
52 slp_tree child; 58 slp_tree child;
53 59
54 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) 60 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
55 vect_free_slp_tree (child); 61 vect_free_slp_tree (child, final_p);
56 62
57 gimple *stmt; 63 /* Don't update STMT_VINFO_NUM_SLP_USES if it isn't relevant.
58 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt) 64 Some statements might no longer exist, after having been
59 /* After transform some stmts are removed and thus their vinfo is gone. */ 65 removed by vect_transform_stmt. Updating the remaining
60 if (vinfo_for_stmt (stmt)) 66 statements would be redundant. */
61 { 67 if (!final_p)
62 gcc_assert (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) > 0); 68 {
63 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))--; 69 stmt_vec_info stmt_info;
64 } 70 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
71 {
72 gcc_assert (STMT_VINFO_NUM_SLP_USES (stmt_info) > 0);
73 STMT_VINFO_NUM_SLP_USES (stmt_info)--;
74 }
75 }
65 76
66 SLP_TREE_CHILDREN (node).release (); 77 SLP_TREE_CHILDREN (node).release ();
67 SLP_TREE_SCALAR_STMTS (node).release (); 78 SLP_TREE_SCALAR_STMTS (node).release ();
68 SLP_TREE_VEC_STMTS (node).release (); 79 SLP_TREE_VEC_STMTS (node).release ();
69 SLP_TREE_LOAD_PERMUTATION (node).release (); 80 SLP_TREE_LOAD_PERMUTATION (node).release ();
70 81
71 free (node); 82 free (node);
72 } 83 }
73 84
74 85
75 /* Free the memory allocated for the SLP instance. */ 86 /* Free the memory allocated for the SLP instance. FINAL_P is true if we
87 have vectorized the instance or if we have made a final decision not
88 to vectorize the statements in any way. */
76 89
77 void 90 void
78 vect_free_slp_instance (slp_instance instance) 91 vect_free_slp_instance (slp_instance instance, bool final_p)
79 { 92 {
80 vect_free_slp_tree (SLP_INSTANCE_TREE (instance)); 93 vect_free_slp_tree (SLP_INSTANCE_TREE (instance), final_p);
81 SLP_INSTANCE_LOADS (instance).release (); 94 SLP_INSTANCE_LOADS (instance).release ();
82 free (instance); 95 free (instance);
83 } 96 }
84 97
85 98
86 /* Create an SLP node for SCALAR_STMTS. */ 99 /* Create an SLP node for SCALAR_STMTS. */
87 100
88 static slp_tree 101 static slp_tree
89 vect_create_new_slp_node (vec<gimple *> scalar_stmts) 102 vect_create_new_slp_node (vec<stmt_vec_info> scalar_stmts)
90 { 103 {
91 slp_tree node; 104 slp_tree node;
92 gimple *stmt = scalar_stmts[0]; 105 stmt_vec_info stmt_info = scalar_stmts[0];
93 unsigned int nops; 106 unsigned int nops;
94 107
95 if (is_gimple_call (stmt)) 108 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
96 nops = gimple_call_num_args (stmt); 109 nops = gimple_call_num_args (stmt);
97 else if (is_gimple_assign (stmt)) 110 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
98 { 111 {
99 nops = gimple_num_ops (stmt) - 1; 112 nops = gimple_num_ops (stmt) - 1;
100 if (gimple_assign_rhs_code (stmt) == COND_EXPR) 113 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
101 nops++; 114 nops++;
102 } 115 }
103 else if (gimple_code (stmt) == GIMPLE_PHI) 116 else if (is_a <gphi *> (stmt_info->stmt))
104 nops = 0; 117 nops = 0;
105 else 118 else
106 return NULL; 119 return NULL;
107 120
108 node = XNEW (struct _slp_tree); 121 node = XNEW (struct _slp_tree);
109 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts; 122 SLP_TREE_SCALAR_STMTS (node) = scalar_stmts;
110 SLP_TREE_VEC_STMTS (node).create (0); 123 SLP_TREE_VEC_STMTS (node).create (0);
124 SLP_TREE_NUMBER_OF_VEC_STMTS (node) = 0;
111 SLP_TREE_CHILDREN (node).create (nops); 125 SLP_TREE_CHILDREN (node).create (nops);
112 SLP_TREE_LOAD_PERMUTATION (node) = vNULL; 126 SLP_TREE_LOAD_PERMUTATION (node) = vNULL;
113 SLP_TREE_TWO_OPERATORS (node) = false; 127 SLP_TREE_TWO_OPERATORS (node) = false;
114 SLP_TREE_DEF_TYPE (node) = vect_internal_def; 128 SLP_TREE_DEF_TYPE (node) = vect_internal_def;
115 129
116 unsigned i; 130 unsigned i;
117 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt) 131 FOR_EACH_VEC_ELT (scalar_stmts, i, stmt_info)
118 STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt))++; 132 STMT_VINFO_NUM_SLP_USES (stmt_info)++;
119 133
120 return node; 134 return node;
121 } 135 }
122 136
123 137
125 corresponds to the same operand in a group of scalar stmts in an SLP 139 corresponds to the same operand in a group of scalar stmts in an SLP
126 node. */ 140 node. */
127 typedef struct _slp_oprnd_info 141 typedef struct _slp_oprnd_info
128 { 142 {
129 /* Def-stmts for the operands. */ 143 /* Def-stmts for the operands. */
130 vec<gimple *> def_stmts; 144 vec<stmt_vec_info> def_stmts;
131 /* Information about the first statement, its vector def-type, type, the 145 /* Information about the first statement, its vector def-type, type, the
132 operand itself in case it's constant, and an indication if it's a pattern 146 operand itself in case it's constant, and an indication if it's a pattern
133 stmt. */ 147 stmt. */
134 tree first_op_type; 148 tree first_op_type;
135 enum vect_def_type first_dt; 149 enum vect_def_type first_dt;
179 193
180 oprnds_info.release (); 194 oprnds_info.release ();
181 } 195 }
182 196
183 197
184 /* Find the place of the data-ref in STMT in the interleaving chain that starts 198 /* Find the place of the data-ref in STMT_INFO in the interleaving chain
185 from FIRST_STMT. Return -1 if the data-ref is not a part of the chain. */ 199 that starts from FIRST_STMT_INFO. Return -1 if the data-ref is not a part
186 200 of the chain. */
187 static int 201
188 vect_get_place_in_interleaving_chain (gimple *stmt, gimple *first_stmt) 202 int
189 { 203 vect_get_place_in_interleaving_chain (stmt_vec_info stmt_info,
190 gimple *next_stmt = first_stmt; 204 stmt_vec_info first_stmt_info)
205 {
206 stmt_vec_info next_stmt_info = first_stmt_info;
191 int result = 0; 207 int result = 0;
192 208
193 if (first_stmt != GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))) 209 if (first_stmt_info != DR_GROUP_FIRST_ELEMENT (stmt_info))
194 return -1; 210 return -1;
195 211
196 do 212 do
197 { 213 {
198 if (next_stmt == stmt) 214 if (next_stmt_info == stmt_info)
199 return result; 215 return result;
200 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt)); 216 next_stmt_info = DR_GROUP_NEXT_ELEMENT (next_stmt_info);
201 if (next_stmt) 217 if (next_stmt_info)
202 result += GROUP_GAP (vinfo_for_stmt (next_stmt)); 218 result += DR_GROUP_GAP (next_stmt_info);
203 } 219 }
204 while (next_stmt); 220 while (next_stmt_info);
205 221
206 return -1; 222 return -1;
207 } 223 }
208 224
225 /* Check whether it is possible to load COUNT elements of type ELT_MODE
226 using the method implemented by duplicate_and_interleave. Return true
227 if so, returning the number of intermediate vectors in *NVECTORS_OUT
228 (if nonnull) and the type of each intermediate vector in *VECTOR_TYPE_OUT
229 (if nonnull). */
230
231 bool
232 can_duplicate_and_interleave_p (unsigned int count, machine_mode elt_mode,
233 unsigned int *nvectors_out,
234 tree *vector_type_out,
235 tree *permutes)
236 {
237 poly_int64 elt_bytes = count * GET_MODE_SIZE (elt_mode);
238 poly_int64 nelts;
239 unsigned int nvectors = 1;
240 for (;;)
241 {
242 scalar_int_mode int_mode;
243 poly_int64 elt_bits = elt_bytes * BITS_PER_UNIT;
244 if (multiple_p (current_vector_size, elt_bytes, &nelts)
245 && int_mode_for_size (elt_bits, 0).exists (&int_mode))
246 {
247 tree int_type = build_nonstandard_integer_type
248 (GET_MODE_BITSIZE (int_mode), 1);
249 tree vector_type = build_vector_type (int_type, nelts);
250 if (VECTOR_MODE_P (TYPE_MODE (vector_type)))
251 {
252 vec_perm_builder sel1 (nelts, 2, 3);
253 vec_perm_builder sel2 (nelts, 2, 3);
254 poly_int64 half_nelts = exact_div (nelts, 2);
255 for (unsigned int i = 0; i < 3; ++i)
256 {
257 sel1.quick_push (i);
258 sel1.quick_push (i + nelts);
259 sel2.quick_push (half_nelts + i);
260 sel2.quick_push (half_nelts + i + nelts);
261 }
262 vec_perm_indices indices1 (sel1, 2, nelts);
263 vec_perm_indices indices2 (sel2, 2, nelts);
264 if (can_vec_perm_const_p (TYPE_MODE (vector_type), indices1)
265 && can_vec_perm_const_p (TYPE_MODE (vector_type), indices2))
266 {
267 if (nvectors_out)
268 *nvectors_out = nvectors;
269 if (vector_type_out)
270 *vector_type_out = vector_type;
271 if (permutes)
272 {
273 permutes[0] = vect_gen_perm_mask_checked (vector_type,
274 indices1);
275 permutes[1] = vect_gen_perm_mask_checked (vector_type,
276 indices2);
277 }
278 return true;
279 }
280 }
281 }
282 if (!multiple_p (elt_bytes, 2, &elt_bytes))
283 return false;
284 nvectors *= 2;
285 }
286 }
209 287
210 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that 288 /* Get the defs for the rhs of STMT (collect them in OPRNDS_INFO), check that
211 they are of a valid type and that they match the defs of the first stmt of 289 they are of a valid type and that they match the defs of the first stmt of
212 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts 290 the SLP group (stored in OPRNDS_INFO). This function tries to match stmts
213 by swapping operands of STMT when possible. Non-zero *SWAP indicates swap 291 by swapping operands of STMTS[STMT_NUM] when possible. Non-zero *SWAP
214 is required for cond_expr stmts. Specifically, *SWAP is 1 if STMT is cond 292 indicates swap is required for cond_expr stmts. Specifically, *SWAP
215 and operands of comparison need to be swapped; *SWAP is 2 if STMT is cond 293 is 1 if STMT is cond and operands of comparison need to be swapped;
216 and code of comparison needs to be inverted. If there is any operand swap 294 *SWAP is 2 if STMT is cond and code of comparison needs to be inverted.
217 in this function, *SWAP is set to non-zero value. 295 If there is any operand swap in this function, *SWAP is set to non-zero
296 value.
218 If there was a fatal error return -1; if the error could be corrected by 297 If there was a fatal error return -1; if the error could be corrected by
219 swapping operands of father node of this one, return 1; if everything is 298 swapping operands of father node of this one, return 1; if everything is
220 ok return 0. */ 299 ok return 0. */
221
222 static int 300 static int
223 vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap, 301 vect_get_and_check_slp_defs (vec_info *vinfo, unsigned char *swap,
224 gimple *stmt, unsigned stmt_num, 302 vec<stmt_vec_info> stmts, unsigned stmt_num,
225 vec<slp_oprnd_info> *oprnds_info) 303 vec<slp_oprnd_info> *oprnds_info)
226 { 304 {
305 stmt_vec_info stmt_info = stmts[stmt_num];
227 tree oprnd; 306 tree oprnd;
228 unsigned int i, number_of_oprnds; 307 unsigned int i, number_of_oprnds;
229 gimple *def_stmt;
230 enum vect_def_type dt = vect_uninitialized_def; 308 enum vect_def_type dt = vect_uninitialized_def;
231 bool pattern = false; 309 bool pattern = false;
232 slp_oprnd_info oprnd_info; 310 slp_oprnd_info oprnd_info;
233 int first_op_idx = 1; 311 int first_op_idx = 1;
234 bool commutative = false; 312 unsigned int commutative_op = -1U;
235 bool first_op_cond = false; 313 bool first_op_cond = false;
236 bool first = stmt_num == 0; 314 bool first = stmt_num == 0;
237 bool second = stmt_num == 1; 315 bool second = stmt_num == 1;
238 316
239 if (is_gimple_call (stmt)) 317 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
240 { 318 {
241 number_of_oprnds = gimple_call_num_args (stmt); 319 number_of_oprnds = gimple_call_num_args (stmt);
242 first_op_idx = 3; 320 first_op_idx = 3;
243 } 321 if (gimple_call_internal_p (stmt))
244 else if (is_gimple_assign (stmt)) 322 {
323 internal_fn ifn = gimple_call_internal_fn (stmt);
324 commutative_op = first_commutative_argument (ifn);
325 }
326 }
327 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
245 { 328 {
246 enum tree_code code = gimple_assign_rhs_code (stmt); 329 enum tree_code code = gimple_assign_rhs_code (stmt);
247 number_of_oprnds = gimple_num_ops (stmt) - 1; 330 number_of_oprnds = gimple_num_ops (stmt) - 1;
248 /* Swap can only be done for cond_expr if asked to, otherwise we 331 /* Swap can only be done for cond_expr if asked to, otherwise we
249 could result in different comparison code to the first stmt. */ 332 could result in different comparison code to the first stmt. */
252 { 335 {
253 first_op_cond = true; 336 first_op_cond = true;
254 number_of_oprnds++; 337 number_of_oprnds++;
255 } 338 }
256 else 339 else
257 commutative = commutative_tree_code (code); 340 commutative_op = commutative_tree_code (code) ? 0U : -1U;
258 } 341 }
259 else 342 else
260 return -1; 343 return -1;
261 344
262 bool swapped = (*swap != 0); 345 bool swapped = (*swap != 0);
269 /* Map indicating how operands of cond_expr should be swapped. */ 352 /* Map indicating how operands of cond_expr should be swapped. */
270 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}}; 353 int maps[3][4] = {{0, 1, 2, 3}, {1, 0, 2, 3}, {0, 1, 3, 2}};
271 int *map = maps[*swap]; 354 int *map = maps[*swap];
272 355
273 if (i < 2) 356 if (i < 2)
274 oprnd = TREE_OPERAND (gimple_op (stmt, first_op_idx), map[i]); 357 oprnd = TREE_OPERAND (gimple_op (stmt_info->stmt,
358 first_op_idx), map[i]);
275 else 359 else
276 oprnd = gimple_op (stmt, map[i]); 360 oprnd = gimple_op (stmt_info->stmt, map[i]);
277 } 361 }
278 else 362 else
279 oprnd = gimple_op (stmt, first_op_idx + (swapped ? !i : i)); 363 oprnd = gimple_op (stmt_info->stmt, first_op_idx + (swapped ? !i : i));
280 364
281 oprnd_info = (*oprnds_info)[i]; 365 oprnd_info = (*oprnds_info)[i];
282 366
283 if (!vect_is_simple_use (oprnd, vinfo, &def_stmt, &dt)) 367 stmt_vec_info def_stmt_info;
368 if (!vect_is_simple_use (oprnd, vinfo, &dt, &def_stmt_info))
284 { 369 {
285 if (dump_enabled_p ()) 370 if (dump_enabled_p ())
286 { 371 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
287 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 372 "Build SLP failed: can't analyze def for %T\n",
288 "Build SLP failed: can't analyze def for "); 373 oprnd);
289 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
290 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
291 }
292 374
293 return -1; 375 return -1;
294 } 376 }
295
296 /* Check if DEF_STMT is a part of a pattern in LOOP and get the def stmt
297 from the pattern. Check that all the stmts of the node are in the
298 pattern. */
299 if (def_stmt && gimple_bb (def_stmt)
300 && vect_stmt_in_region_p (vinfo, def_stmt)
301 && vinfo_for_stmt (def_stmt)
302 && STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (def_stmt))
303 && !STMT_VINFO_RELEVANT (vinfo_for_stmt (def_stmt))
304 && !STMT_VINFO_LIVE_P (vinfo_for_stmt (def_stmt)))
305 {
306 pattern = true;
307 if (!first && !oprnd_info->first_pattern
308 /* Allow different pattern state for the defs of the
309 first stmt in reduction chains. */
310 && (oprnd_info->first_dt != vect_reduction_def
311 || (!second && !oprnd_info->second_pattern)))
312 {
313 if (i == 0
314 && !swapped
315 && commutative)
316 {
317 swapped = true;
318 goto again;
319 }
320
321 if (dump_enabled_p ())
322 {
323 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
324 "Build SLP failed: some of the stmts"
325 " are in a pattern, and others are not ");
326 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
327 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
328 }
329
330 return 1;
331 }
332
333 def_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (def_stmt));
334 dt = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt));
335
336 if (dt == vect_unknown_def_type)
337 {
338 if (dump_enabled_p ())
339 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
340 "Unsupported pattern.\n");
341 return -1;
342 }
343
344 switch (gimple_code (def_stmt))
345 {
346 case GIMPLE_PHI:
347 case GIMPLE_ASSIGN:
348 break;
349
350 default:
351 if (dump_enabled_p ())
352 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
353 "unsupported defining stmt:\n");
354 return -1;
355 }
356 }
357 377
358 if (second) 378 if (second)
359 oprnd_info->second_pattern = pattern; 379 oprnd_info->second_pattern = pattern;
360 380
361 if (first) 381 if (first)
369 /* Not first stmt of the group, check that the def-stmt/s match 389 /* Not first stmt of the group, check that the def-stmt/s match
370 the def-stmt/s of the first stmt. Allow different definition 390 the def-stmt/s of the first stmt. Allow different definition
371 types for reduction chains: the first stmt must be a 391 types for reduction chains: the first stmt must be a
372 vect_reduction_def (a phi node), and the rest 392 vect_reduction_def (a phi node), and the rest
373 vect_internal_def. */ 393 vect_internal_def. */
374 if (((oprnd_info->first_dt != dt 394 tree type = TREE_TYPE (oprnd);
375 && !(oprnd_info->first_dt == vect_reduction_def 395 if ((oprnd_info->first_dt != dt
376 && dt == vect_internal_def) 396 && !(oprnd_info->first_dt == vect_reduction_def
377 && !((oprnd_info->first_dt == vect_external_def 397 && dt == vect_internal_def)
378 || oprnd_info->first_dt == vect_constant_def) 398 && !((oprnd_info->first_dt == vect_external_def
379 && (dt == vect_external_def 399 || oprnd_info->first_dt == vect_constant_def)
380 || dt == vect_constant_def))) 400 && (dt == vect_external_def
381 || !types_compatible_p (oprnd_info->first_op_type, 401 || dt == vect_constant_def)))
382 TREE_TYPE (oprnd)))) 402 || !types_compatible_p (oprnd_info->first_op_type, type))
383 { 403 {
384 /* Try swapping operands if we got a mismatch. */ 404 /* Try swapping operands if we got a mismatch. */
385 if (i == 0 405 if (i == commutative_op && !swapped)
386 && !swapped
387 && commutative)
388 { 406 {
389 swapped = true; 407 swapped = true;
390 goto again; 408 goto again;
391 } 409 }
392 410
394 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 412 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
395 "Build SLP failed: different types\n"); 413 "Build SLP failed: different types\n");
396 414
397 return 1; 415 return 1;
398 } 416 }
417 if ((dt == vect_constant_def
418 || dt == vect_external_def)
419 && !current_vector_size.is_constant ()
420 && (TREE_CODE (type) == BOOLEAN_TYPE
421 || !can_duplicate_and_interleave_p (stmts.length (),
422 TYPE_MODE (type))))
423 {
424 if (dump_enabled_p ())
425 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
426 "Build SLP failed: invalid type of def "
427 "for variable-length SLP %T\n", oprnd);
428 return -1;
429 }
399 } 430 }
400 431
401 /* Check the types of the definitions. */ 432 /* Check the types of the definitions. */
402 switch (dt) 433 switch (dt)
403 { 434 {
406 break; 437 break;
407 438
408 case vect_reduction_def: 439 case vect_reduction_def:
409 case vect_induction_def: 440 case vect_induction_def:
410 case vect_internal_def: 441 case vect_internal_def:
411 oprnd_info->def_stmts.quick_push (def_stmt); 442 oprnd_info->def_stmts.quick_push (def_stmt_info);
412 break; 443 break;
413 444
414 default: 445 default:
415 /* FORNOW: Not supported. */ 446 /* FORNOW: Not supported. */
416 if (dump_enabled_p ()) 447 if (dump_enabled_p ())
417 { 448 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
418 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 449 "Build SLP failed: illegal type of def %T\n",
419 "Build SLP failed: illegal type of def "); 450 oprnd);
420 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, oprnd);
421 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
422 }
423 451
424 return -1; 452 return -1;
425 } 453 }
426 } 454 }
427 455
428 /* Swap operands. */ 456 /* Swap operands. */
429 if (swapped) 457 if (swapped)
430 { 458 {
431 /* If there are already uses of this stmt in a SLP instance then 459 /* If there are already uses of this stmt in a SLP instance then
432 we've committed to the operand order and can't swap it. */ 460 we've committed to the operand order and can't swap it. */
433 if (STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmt)) != 0) 461 if (STMT_VINFO_NUM_SLP_USES (stmt_info) != 0)
434 { 462 {
435 if (dump_enabled_p ()) 463 if (dump_enabled_p ())
436 { 464 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
437 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 465 "Build SLP failed: cannot swap operands of "
438 "Build SLP failed: cannot swap operands of " 466 "shared stmt %G", stmt_info->stmt);
439 "shared stmt ");
440 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
441 }
442 return -1; 467 return -1;
443 } 468 }
444 469
445 if (first_op_cond) 470 if (first_op_cond)
446 { 471 {
472 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
447 tree cond = gimple_assign_rhs1 (stmt); 473 tree cond = gimple_assign_rhs1 (stmt);
448 enum tree_code code = TREE_CODE (cond); 474 enum tree_code code = TREE_CODE (cond);
449 475
450 /* Swap. */ 476 /* Swap. */
451 if (*swap == 1) 477 if (*swap == 1)
464 gcc_assert (code != ERROR_MARK); 490 gcc_assert (code != ERROR_MARK);
465 TREE_SET_CODE (cond, code); 491 TREE_SET_CODE (cond, code);
466 } 492 }
467 } 493 }
468 else 494 else
469 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt), 495 {
470 gimple_assign_rhs2_ptr (stmt)); 496 unsigned int op = commutative_op + first_op_idx;
497 swap_ssa_operands (stmt_info->stmt,
498 gimple_op_ptr (stmt_info->stmt, op),
499 gimple_op_ptr (stmt_info->stmt, op + 1));
500 }
471 if (dump_enabled_p ()) 501 if (dump_enabled_p ())
472 { 502 dump_printf_loc (MSG_NOTE, vect_location,
473 dump_printf_loc (MSG_NOTE, vect_location, 503 "swapped operands to match def types in %G",
474 "swapped operands to match def types in "); 504 stmt_info->stmt);
475 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
476 }
477 } 505 }
478 506
479 *swap = swapped; 507 *swap = swapped;
480 return 0; 508 return 0;
481 } 509 }
482 510
511 /* Return true if call statements CALL1 and CALL2 are similar enough
512 to be combined into the same SLP group. */
513
514 static bool
515 compatible_calls_p (gcall *call1, gcall *call2)
516 {
517 unsigned int nargs = gimple_call_num_args (call1);
518 if (nargs != gimple_call_num_args (call2))
519 return false;
520
521 if (gimple_call_combined_fn (call1) != gimple_call_combined_fn (call2))
522 return false;
523
524 if (gimple_call_internal_p (call1))
525 {
526 if (!types_compatible_p (TREE_TYPE (gimple_call_lhs (call1)),
527 TREE_TYPE (gimple_call_lhs (call2))))
528 return false;
529 for (unsigned int i = 0; i < nargs; ++i)
530 if (!types_compatible_p (TREE_TYPE (gimple_call_arg (call1, i)),
531 TREE_TYPE (gimple_call_arg (call2, i))))
532 return false;
533 }
534 else
535 {
536 if (!operand_equal_p (gimple_call_fn (call1),
537 gimple_call_fn (call2), 0))
538 return false;
539
540 if (gimple_call_fntype (call1) != gimple_call_fntype (call2))
541 return false;
542 }
543 return true;
544 }
545
483 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the 546 /* A subroutine of vect_build_slp_tree for checking VECTYPE, which is the
484 caller's attempt to find the vector type in STMT with the narrowest 547 caller's attempt to find the vector type in STMT_INFO with the narrowest
485 element type. Return true if VECTYPE is nonnull and if it is valid 548 element type. Return true if VECTYPE is nonnull and if it is valid
486 for VINFO. When returning true, update MAX_NUNITS to reflect the 549 for STMT_INFO. When returning true, update MAX_NUNITS to reflect the
487 number of units in VECTYPE. VINFO, GORUP_SIZE and MAX_NUNITS are 550 number of units in VECTYPE. GROUP_SIZE and MAX_NUNITS are as for
488 as for vect_build_slp_tree. */ 551 vect_build_slp_tree. */
489 552
490 static bool 553 static bool
491 vect_record_max_nunits (vec_info *vinfo, gimple *stmt, unsigned int group_size, 554 vect_record_max_nunits (stmt_vec_info stmt_info, unsigned int group_size,
492 tree vectype, unsigned int *max_nunits) 555 tree vectype, poly_uint64 *max_nunits)
493 { 556 {
494 if (!vectype) 557 if (!vectype)
495 { 558 {
496 if (dump_enabled_p ()) 559 if (dump_enabled_p ())
497 { 560 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
498 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 561 "Build SLP failed: unsupported data-type in %G\n",
499 "Build SLP failed: unsupported data-type in "); 562 stmt_info->stmt);
500 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
501 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
502 }
503 /* Fatal mismatch. */ 563 /* Fatal mismatch. */
504 return false; 564 return false;
505 } 565 }
506 566
507 /* If populating the vector type requires unrolling then fail 567 /* If populating the vector type requires unrolling then fail
508 before adjusting *max_nunits for basic-block vectorization. */ 568 before adjusting *max_nunits for basic-block vectorization. */
509 if (is_a <bb_vec_info> (vinfo) 569 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
510 && TYPE_VECTOR_SUBPARTS (vectype) > group_size) 570 unsigned HOST_WIDE_INT const_nunits;
571 if (STMT_VINFO_BB_VINFO (stmt_info)
572 && (!nunits.is_constant (&const_nunits)
573 || const_nunits > group_size))
511 { 574 {
512 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 575 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
513 "Build SLP failed: unrolling required " 576 "Build SLP failed: unrolling required "
514 "in basic block SLP\n"); 577 "in basic block SLP\n");
515 /* Fatal mismatch. */ 578 /* Fatal mismatch. */
516 return false; 579 return false;
517 } 580 }
518 581
519 /* In case of multiple types we need to detect the smallest type. */ 582 /* In case of multiple types we need to detect the smallest type. */
520 if (*max_nunits < TYPE_VECTOR_SUBPARTS (vectype)) 583 vect_update_max_nunits (max_nunits, vectype);
521 *max_nunits = TYPE_VECTOR_SUBPARTS (vectype);
522
523 return true; 584 return true;
585 }
586
587 /* STMTS is a group of GROUP_SIZE SLP statements in which some
588 statements do the same operation as the first statement and in which
589 the others do ALT_STMT_CODE. Return true if we can take one vector
590 of the first operation and one vector of the second and permute them
591 to get the required result. VECTYPE is the type of the vector that
592 would be permuted. */
593
594 static bool
595 vect_two_operations_perm_ok_p (vec<stmt_vec_info> stmts,
596 unsigned int group_size, tree vectype,
597 tree_code alt_stmt_code)
598 {
599 unsigned HOST_WIDE_INT count;
600 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&count))
601 return false;
602
603 vec_perm_builder sel (count, count, 1);
604 for (unsigned int i = 0; i < count; ++i)
605 {
606 unsigned int elt = i;
607 gassign *stmt = as_a <gassign *> (stmts[i % group_size]->stmt);
608 if (gimple_assign_rhs_code (stmt) == alt_stmt_code)
609 elt += count;
610 sel.quick_push (elt);
611 }
612 vec_perm_indices indices (sel, 2, count);
613 return can_vec_perm_const_p (TYPE_MODE (vectype), indices);
524 } 614 }
525 615
526 /* Verify if the scalar stmts STMTS are isomorphic, require data 616 /* Verify if the scalar stmts STMTS are isomorphic, require data
527 permutation or are of unsupported types of operation. Return 617 permutation or are of unsupported types of operation. Return
528 true if they are, otherwise return false and indicate in *MATCHES 618 true if they are, otherwise return false and indicate in *MATCHES
536 to 2 if stmt I is isormorphic to the first stmt by inverting the code 626 to 2 if stmt I is isormorphic to the first stmt by inverting the code
537 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped 627 of comparison. Take A1 >= B1 ? X1 : Y1 as an exmple, it can be swapped
538 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */ 628 to (B1 <= A1 ? X1 : Y1); or be inverted to (A1 < B1) ? Y1 : X1. */
539 629
540 static bool 630 static bool
541 vect_build_slp_tree_1 (vec_info *vinfo, unsigned char *swap, 631 vect_build_slp_tree_1 (unsigned char *swap,
542 vec<gimple *> stmts, unsigned int group_size, 632 vec<stmt_vec_info> stmts, unsigned int group_size,
543 unsigned nops, unsigned int *max_nunits, 633 poly_uint64 *max_nunits, bool *matches,
544 bool *matches, bool *two_operators) 634 bool *two_operators)
545 { 635 {
546 unsigned int i; 636 unsigned int i;
547 gimple *first_stmt = stmts[0], *stmt = stmts[0]; 637 stmt_vec_info first_stmt_info = stmts[0];
548 enum tree_code first_stmt_code = ERROR_MARK; 638 enum tree_code first_stmt_code = ERROR_MARK;
549 enum tree_code alt_stmt_code = ERROR_MARK; 639 enum tree_code alt_stmt_code = ERROR_MARK;
550 enum tree_code rhs_code = ERROR_MARK; 640 enum tree_code rhs_code = ERROR_MARK;
551 enum tree_code first_cond_code = ERROR_MARK; 641 enum tree_code first_cond_code = ERROR_MARK;
552 tree lhs; 642 tree lhs;
553 bool need_same_oprnds = false; 643 bool need_same_oprnds = false;
554 tree vectype = NULL_TREE, scalar_type, first_op1 = NULL_TREE; 644 tree vectype = NULL_TREE, first_op1 = NULL_TREE;
555 optab optab; 645 optab optab;
556 int icode; 646 int icode;
557 machine_mode optab_op2_mode; 647 machine_mode optab_op2_mode;
558 machine_mode vec_mode; 648 machine_mode vec_mode;
559 HOST_WIDE_INT dummy; 649 stmt_vec_info first_load = NULL, prev_first_load = NULL;
560 gimple *first_load = NULL, *prev_first_load = NULL;
561 650
562 /* For every stmt in NODE find its def stmt/s. */ 651 /* For every stmt in NODE find its def stmt/s. */
563 FOR_EACH_VEC_ELT (stmts, i, stmt) 652 stmt_vec_info stmt_info;
564 { 653 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
654 {
655 gimple *stmt = stmt_info->stmt;
565 swap[i] = 0; 656 swap[i] = 0;
566 matches[i] = false; 657 matches[i] = false;
567 658
568 if (dump_enabled_p ()) 659 if (dump_enabled_p ())
569 { 660 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for %G", stmt);
570 dump_printf_loc (MSG_NOTE, vect_location, "Build SLP for ");
571 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
572 }
573 661
574 /* Fail to vectorize statements marked as unvectorizable. */ 662 /* Fail to vectorize statements marked as unvectorizable. */
575 if (!STMT_VINFO_VECTORIZABLE (vinfo_for_stmt (stmt))) 663 if (!STMT_VINFO_VECTORIZABLE (stmt_info))
576 { 664 {
577 if (dump_enabled_p ()) 665 if (dump_enabled_p ())
578 { 666 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
579 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 667 "Build SLP failed: unvectorizable statement %G",
580 "Build SLP failed: unvectorizable statement "); 668 stmt);
581 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
582 }
583 /* Fatal mismatch. */ 669 /* Fatal mismatch. */
584 matches[0] = false; 670 matches[0] = false;
585 return false; 671 return false;
586 } 672 }
587 673
588 lhs = gimple_get_lhs (stmt); 674 lhs = gimple_get_lhs (stmt);
589 if (lhs == NULL_TREE) 675 if (lhs == NULL_TREE)
590 { 676 {
591 if (dump_enabled_p ()) 677 if (dump_enabled_p ())
592 { 678 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
593 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 679 "Build SLP failed: not GIMPLE_ASSIGN nor "
594 "Build SLP failed: not GIMPLE_ASSIGN nor " 680 "GIMPLE_CALL %G", stmt);
595 "GIMPLE_CALL ");
596 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
597 }
598 /* Fatal mismatch. */ 681 /* Fatal mismatch. */
599 matches[0] = false; 682 matches[0] = false;
600 return false; 683 return false;
601 } 684 }
602 685
603 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, &dummy); 686 tree nunits_vectype;
604 vectype = get_vectype_for_scalar_type (scalar_type); 687 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
605 if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype, 688 &nunits_vectype)
606 max_nunits)) 689 || (nunits_vectype
690 && !vect_record_max_nunits (stmt_info, group_size,
691 nunits_vectype, max_nunits)))
607 { 692 {
608 /* Fatal mismatch. */ 693 /* Fatal mismatch. */
609 matches[0] = false; 694 matches[0] = false;
610 return false; 695 return false;
611 } 696 }
697
698 gcc_assert (vectype);
612 699
613 if (gcall *call_stmt = dyn_cast <gcall *> (stmt)) 700 if (gcall *call_stmt = dyn_cast <gcall *> (stmt))
614 { 701 {
615 rhs_code = CALL_EXPR; 702 rhs_code = CALL_EXPR;
616 if (gimple_call_internal_p (call_stmt) 703 if ((gimple_call_internal_p (call_stmt)
704 && (!vectorizable_internal_fn_p
705 (gimple_call_internal_fn (call_stmt))))
617 || gimple_call_tail_p (call_stmt) 706 || gimple_call_tail_p (call_stmt)
618 || gimple_call_noreturn_p (call_stmt) 707 || gimple_call_noreturn_p (call_stmt)
619 || !gimple_call_nothrow_p (call_stmt) 708 || !gimple_call_nothrow_p (call_stmt)
620 || gimple_call_chain (call_stmt)) 709 || gimple_call_chain (call_stmt))
621 { 710 {
622 if (dump_enabled_p ()) 711 if (dump_enabled_p ())
623 { 712 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
624 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 713 "Build SLP failed: unsupported call type %G",
625 "Build SLP failed: unsupported call type "); 714 call_stmt);
626 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
627 call_stmt, 0);
628 }
629 /* Fatal mismatch. */ 715 /* Fatal mismatch. */
630 matches[0] = false; 716 matches[0] = false;
631 return false; 717 return false;
632 } 718 }
633 } 719 }
643 vector shift with scalar shift operand. */ 729 vector shift with scalar shift operand. */
644 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR 730 if (rhs_code == LSHIFT_EXPR || rhs_code == RSHIFT_EXPR
645 || rhs_code == LROTATE_EXPR 731 || rhs_code == LROTATE_EXPR
646 || rhs_code == RROTATE_EXPR) 732 || rhs_code == RROTATE_EXPR)
647 { 733 {
734 if (vectype == boolean_type_node)
735 {
736 if (dump_enabled_p ())
737 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
738 "Build SLP failed: shift of a"
739 " boolean.\n");
740 /* Fatal mismatch. */
741 matches[0] = false;
742 return false;
743 }
744
648 vec_mode = TYPE_MODE (vectype); 745 vec_mode = TYPE_MODE (vectype);
649 746
650 /* First see if we have a vector/vector shift. */ 747 /* First see if we have a vector/vector shift. */
651 optab = optab_for_tree_code (rhs_code, vectype, 748 optab = optab_for_tree_code (rhs_code, vectype,
652 optab_vector); 749 optab_vector);
707 && !((first_stmt_code == PLUS_EXPR 804 && !((first_stmt_code == PLUS_EXPR
708 || first_stmt_code == MINUS_EXPR) 805 || first_stmt_code == MINUS_EXPR)
709 && (alt_stmt_code == PLUS_EXPR 806 && (alt_stmt_code == PLUS_EXPR
710 || alt_stmt_code == MINUS_EXPR) 807 || alt_stmt_code == MINUS_EXPR)
711 && rhs_code == alt_stmt_code) 808 && rhs_code == alt_stmt_code)
712 && !(STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)) 809 && !(STMT_VINFO_GROUPED_ACCESS (stmt_info)
713 && (first_stmt_code == ARRAY_REF 810 && (first_stmt_code == ARRAY_REF
714 || first_stmt_code == BIT_FIELD_REF 811 || first_stmt_code == BIT_FIELD_REF
715 || first_stmt_code == INDIRECT_REF 812 || first_stmt_code == INDIRECT_REF
716 || first_stmt_code == COMPONENT_REF 813 || first_stmt_code == COMPONENT_REF
717 || first_stmt_code == MEM_REF))) 814 || first_stmt_code == MEM_REF)))
718 { 815 {
719 if (dump_enabled_p ()) 816 if (dump_enabled_p ())
720 { 817 {
721 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 818 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
722 "Build SLP failed: different operation " 819 "Build SLP failed: different operation "
723 "in stmt "); 820 "in stmt %G", stmt);
724 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
725 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 821 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
726 "original stmt "); 822 "original stmt %G", first_stmt_info->stmt);
727 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
728 first_stmt, 0);
729 } 823 }
730 /* Mismatch. */ 824 /* Mismatch. */
731 continue; 825 continue;
732 } 826 }
733 827
734 if (need_same_oprnds 828 if (need_same_oprnds
735 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0)) 829 && !operand_equal_p (first_op1, gimple_assign_rhs2 (stmt), 0))
736 { 830 {
737 if (dump_enabled_p ()) 831 if (dump_enabled_p ())
738 { 832 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
739 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 833 "Build SLP failed: different shift "
740 "Build SLP failed: different shift " 834 "arguments in %G", stmt);
741 "arguments in ");
742 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
743 }
744 /* Mismatch. */ 835 /* Mismatch. */
745 continue; 836 continue;
746 } 837 }
747 838
748 if (rhs_code == CALL_EXPR) 839 if (rhs_code == CALL_EXPR)
749 { 840 {
750 gimple *first_stmt = stmts[0]; 841 if (!compatible_calls_p (as_a <gcall *> (stmts[0]->stmt),
751 if (gimple_call_num_args (stmt) != nops 842 as_a <gcall *> (stmt)))
752 || !operand_equal_p (gimple_call_fn (first_stmt),
753 gimple_call_fn (stmt), 0)
754 || gimple_call_fntype (first_stmt)
755 != gimple_call_fntype (stmt))
756 { 843 {
757 if (dump_enabled_p ()) 844 if (dump_enabled_p ())
758 { 845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
759 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 846 "Build SLP failed: different calls in %G",
760 "Build SLP failed: different calls in "); 847 stmt);
761 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
762 stmt, 0);
763 }
764 /* Mismatch. */ 848 /* Mismatch. */
765 continue; 849 continue;
766 } 850 }
767 } 851 }
768 } 852 }
769 853
770 /* Grouped store or load. */ 854 /* Grouped store or load. */
771 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))) 855 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
772 { 856 {
773 if (REFERENCE_CLASS_P (lhs)) 857 if (REFERENCE_CLASS_P (lhs))
774 { 858 {
775 /* Store. */ 859 /* Store. */
776 ; 860 ;
777 } 861 }
778 else 862 else
779 { 863 {
780 /* Load. */ 864 /* Load. */
781 first_load = GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)); 865 first_load = DR_GROUP_FIRST_ELEMENT (stmt_info);
782 if (prev_first_load) 866 if (prev_first_load)
783 { 867 {
784 /* Check that there are no loads from different interleaving 868 /* Check that there are no loads from different interleaving
785 chains in the same node. */ 869 chains in the same node. */
786 if (prev_first_load != first_load) 870 if (prev_first_load != first_load)
787 { 871 {
788 if (dump_enabled_p ()) 872 if (dump_enabled_p ())
789 { 873 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
790 dump_printf_loc (MSG_MISSED_OPTIMIZATION, 874 vect_location,
791 vect_location, 875 "Build SLP failed: different "
792 "Build SLP failed: different " 876 "interleaving chains in one node %G",
793 "interleaving chains in one node "); 877 stmt);
794 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
795 stmt, 0);
796 }
797 /* Mismatch. */ 878 /* Mismatch. */
798 continue; 879 continue;
799 } 880 }
800 } 881 }
801 else 882 else
806 { 887 {
807 if (TREE_CODE_CLASS (rhs_code) == tcc_reference) 888 if (TREE_CODE_CLASS (rhs_code) == tcc_reference)
808 { 889 {
809 /* Not grouped load. */ 890 /* Not grouped load. */
810 if (dump_enabled_p ()) 891 if (dump_enabled_p ())
811 { 892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
812 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 893 "Build SLP failed: not grouped load %G", stmt);
813 "Build SLP failed: not grouped load ");
814 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
815 }
816 894
817 /* FORNOW: Not grouped loads are not supported. */ 895 /* FORNOW: Not grouped loads are not supported. */
818 /* Fatal mismatch. */ 896 /* Fatal mismatch. */
819 matches[0] = false; 897 matches[0] = false;
820 return false; 898 return false;
826 && TREE_CODE_CLASS (rhs_code) != tcc_expression 904 && TREE_CODE_CLASS (rhs_code) != tcc_expression
827 && TREE_CODE_CLASS (rhs_code) != tcc_comparison 905 && TREE_CODE_CLASS (rhs_code) != tcc_comparison
828 && rhs_code != CALL_EXPR) 906 && rhs_code != CALL_EXPR)
829 { 907 {
830 if (dump_enabled_p ()) 908 if (dump_enabled_p ())
831 { 909 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
832 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 910 "Build SLP failed: operation unsupported %G",
833 "Build SLP failed: operation"); 911 stmt);
834 dump_printf (MSG_MISSED_OPTIMIZATION, " unsupported ");
835 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
836 }
837 /* Fatal mismatch. */ 912 /* Fatal mismatch. */
838 matches[0] = false; 913 matches[0] = false;
839 return false; 914 return false;
840 } 915 }
841 916
864 else if (first_cond_code == invert_code) 939 else if (first_cond_code == invert_code)
865 swap[i] = 2; 940 swap[i] = 2;
866 else 941 else
867 { 942 {
868 if (dump_enabled_p ()) 943 if (dump_enabled_p ())
869 { 944 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
870 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 945 "Build SLP failed: different"
871 "Build SLP failed: different" 946 " operation %G", stmt);
872 " operation");
873 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
874 stmt, 0);
875 }
876 /* Mismatch. */ 947 /* Mismatch. */
877 continue; 948 continue;
878 } 949 }
879 } 950 }
880 } 951 }
889 /* If we allowed a two-operation SLP node verify the target can cope 960 /* If we allowed a two-operation SLP node verify the target can cope
890 with the permute we are going to use. */ 961 with the permute we are going to use. */
891 if (alt_stmt_code != ERROR_MARK 962 if (alt_stmt_code != ERROR_MARK
892 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference) 963 && TREE_CODE_CLASS (alt_stmt_code) != tcc_reference)
893 { 964 {
894 unsigned int count = TYPE_VECTOR_SUBPARTS (vectype); 965 if (vectype == boolean_type_node
895 auto_vec_perm_indices sel (count); 966 || !vect_two_operations_perm_ok_p (stmts, group_size,
896 for (i = 0; i < count; ++i) 967 vectype, alt_stmt_code))
897 {
898 unsigned int elt = i;
899 if (gimple_assign_rhs_code (stmts[i % group_size]) == alt_stmt_code)
900 elt += count;
901 sel.quick_push (elt);
902 }
903 if (!can_vec_perm_p (TYPE_MODE (vectype), false, &sel))
904 { 968 {
905 for (i = 0; i < group_size; ++i) 969 for (i = 0; i < group_size; ++i)
906 if (gimple_assign_rhs_code (stmts[i]) == alt_stmt_code) 970 if (gimple_assign_rhs_code (stmts[i]->stmt) == alt_stmt_code)
907 { 971 {
908 matches[i] = false; 972 matches[i] = false;
909 if (dump_enabled_p ()) 973 if (dump_enabled_p ())
910 { 974 {
911 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 975 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
912 "Build SLP failed: different operation " 976 "Build SLP failed: different operation "
913 "in stmt "); 977 "in stmt %G", stmts[i]->stmt);
914 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
915 stmts[i], 0);
916 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 978 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
917 "original stmt "); 979 "original stmt %G", first_stmt_info->stmt);
918 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
919 first_stmt, 0);
920 } 980 }
921 } 981 }
922 return false; 982 return false;
923 } 983 }
924 *two_operators = true; 984 *two_operators = true;
930 /* Traits for the hash_set to record failed SLP builds for a stmt set. 990 /* Traits for the hash_set to record failed SLP builds for a stmt set.
931 Note we never remove apart from at destruction time so we do not 991 Note we never remove apart from at destruction time so we do not
932 need a special value for deleted that differs from empty. */ 992 need a special value for deleted that differs from empty. */
933 struct bst_traits 993 struct bst_traits
934 { 994 {
935 typedef vec <gimple *> value_type; 995 typedef vec <stmt_vec_info> value_type;
936 typedef vec <gimple *> compare_type; 996 typedef vec <stmt_vec_info> compare_type;
937 static inline hashval_t hash (value_type); 997 static inline hashval_t hash (value_type);
938 static inline bool equal (value_type existing, value_type candidate); 998 static inline bool equal (value_type existing, value_type candidate);
939 static inline bool is_empty (value_type x) { return !x.exists (); } 999 static inline bool is_empty (value_type x) { return !x.exists (); }
940 static inline bool is_deleted (value_type x) { return !x.exists (); } 1000 static inline bool is_deleted (value_type x) { return !x.exists (); }
941 static inline void mark_empty (value_type &x) { x.release (); } 1001 static inline void mark_empty (value_type &x) { x.release (); }
945 inline hashval_t 1005 inline hashval_t
946 bst_traits::hash (value_type x) 1006 bst_traits::hash (value_type x)
947 { 1007 {
948 inchash::hash h; 1008 inchash::hash h;
949 for (unsigned i = 0; i < x.length (); ++i) 1009 for (unsigned i = 0; i < x.length (); ++i)
950 h.add_int (gimple_uid (x[i])); 1010 h.add_int (gimple_uid (x[i]->stmt));
951 return h.end (); 1011 return h.end ();
952 } 1012 }
953 inline bool 1013 inline bool
954 bst_traits::equal (value_type existing, value_type candidate) 1014 bst_traits::equal (value_type existing, value_type candidate)
955 { 1015 {
959 if (existing[i] != candidate[i]) 1019 if (existing[i] != candidate[i])
960 return false; 1020 return false;
961 return true; 1021 return true;
962 } 1022 }
963 1023
964 static hash_set <vec <gimple *>, bst_traits> *bst_fail; 1024 typedef hash_set <vec <gimple *>, bst_traits> scalar_stmts_set_t;
1025 static scalar_stmts_set_t *bst_fail;
1026
1027 typedef hash_map <vec <gimple *>, slp_tree,
1028 simple_hashmap_traits <bst_traits, slp_tree> >
1029 scalar_stmts_to_slp_tree_map_t;
965 1030
966 static slp_tree 1031 static slp_tree
967 vect_build_slp_tree_2 (vec_info *vinfo, 1032 vect_build_slp_tree_2 (vec_info *vinfo,
968 vec<gimple *> stmts, unsigned int group_size, 1033 vec<stmt_vec_info> stmts, unsigned int group_size,
969 unsigned int *max_nunits, 1034 poly_uint64 *max_nunits,
970 vec<slp_tree> *loads, 1035 vec<slp_tree> *loads,
971 bool *matches, unsigned *npermutes, unsigned *tree_size, 1036 bool *matches, unsigned *npermutes, unsigned *tree_size,
972 unsigned max_tree_size); 1037 unsigned max_tree_size);
973 1038
974 static slp_tree 1039 static slp_tree
975 vect_build_slp_tree (vec_info *vinfo, 1040 vect_build_slp_tree (vec_info *vinfo,
976 vec<gimple *> stmts, unsigned int group_size, 1041 vec<stmt_vec_info> stmts, unsigned int group_size,
977 unsigned int *max_nunits, 1042 poly_uint64 *max_nunits, vec<slp_tree> *loads,
978 vec<slp_tree> *loads,
979 bool *matches, unsigned *npermutes, unsigned *tree_size, 1043 bool *matches, unsigned *npermutes, unsigned *tree_size,
980 unsigned max_tree_size) 1044 unsigned max_tree_size)
981 { 1045 {
982 if (bst_fail->contains (stmts)) 1046 if (bst_fail->contains (stmts))
983 return NULL; 1047 return NULL;
987 /* When SLP build fails for stmts record this, otherwise SLP build 1051 /* When SLP build fails for stmts record this, otherwise SLP build
988 can be exponential in time when we allow to construct parts from 1052 can be exponential in time when we allow to construct parts from
989 scalars, see PR81723. */ 1053 scalars, see PR81723. */
990 if (! res) 1054 if (! res)
991 { 1055 {
992 vec <gimple *> x; 1056 vec <stmt_vec_info> x;
993 x.create (stmts.length ()); 1057 x.create (stmts.length ());
994 x.splice (stmts); 1058 x.splice (stmts);
995 bst_fail->add (x); 1059 bst_fail->add (x);
996 } 1060 }
997 return res; 1061 return res;
1004 The value returned is the depth in the SLP tree where a mismatch 1068 The value returned is the depth in the SLP tree where a mismatch
1005 was found. */ 1069 was found. */
1006 1070
1007 static slp_tree 1071 static slp_tree
1008 vect_build_slp_tree_2 (vec_info *vinfo, 1072 vect_build_slp_tree_2 (vec_info *vinfo,
1009 vec<gimple *> stmts, unsigned int group_size, 1073 vec<stmt_vec_info> stmts, unsigned int group_size,
1010 unsigned int *max_nunits, 1074 poly_uint64 *max_nunits,
1011 vec<slp_tree> *loads, 1075 vec<slp_tree> *loads,
1012 bool *matches, unsigned *npermutes, unsigned *tree_size, 1076 bool *matches, unsigned *npermutes, unsigned *tree_size,
1013 unsigned max_tree_size) 1077 unsigned max_tree_size)
1014 { 1078 {
1015 unsigned nops, i, this_tree_size = 0, this_max_nunits = *max_nunits; 1079 unsigned nops, i, this_tree_size = 0;
1016 gimple *stmt; 1080 poly_uint64 this_max_nunits = *max_nunits;
1017 slp_tree node; 1081 slp_tree node;
1018 1082
1019 matches[0] = false; 1083 matches[0] = false;
1020 1084
1021 stmt = stmts[0]; 1085 stmt_vec_info stmt_info = stmts[0];
1022 if (is_gimple_call (stmt)) 1086 if (gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt))
1023 nops = gimple_call_num_args (stmt); 1087 nops = gimple_call_num_args (stmt);
1024 else if (is_gimple_assign (stmt)) 1088 else if (gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt))
1025 { 1089 {
1026 nops = gimple_num_ops (stmt) - 1; 1090 nops = gimple_num_ops (stmt) - 1;
1027 if (gimple_assign_rhs_code (stmt) == COND_EXPR) 1091 if (gimple_assign_rhs_code (stmt) == COND_EXPR)
1028 nops++; 1092 nops++;
1029 } 1093 }
1030 else if (gimple_code (stmt) == GIMPLE_PHI) 1094 else if (is_a <gphi *> (stmt_info->stmt))
1031 nops = 0; 1095 nops = 0;
1032 else 1096 else
1033 return NULL; 1097 return NULL;
1034 1098
1035 /* If the SLP node is a PHI (induction or reduction), terminate 1099 /* If the SLP node is a PHI (induction or reduction), terminate
1036 the recursion. */ 1100 the recursion. */
1037 if (gimple_code (stmt) == GIMPLE_PHI) 1101 if (gphi *stmt = dyn_cast <gphi *> (stmt_info->stmt))
1038 { 1102 {
1039 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt)); 1103 tree scalar_type = TREE_TYPE (PHI_RESULT (stmt));
1040 tree vectype = get_vectype_for_scalar_type (scalar_type); 1104 tree vectype = get_vectype_for_scalar_type (scalar_type);
1041 if (!vect_record_max_nunits (vinfo, stmt, group_size, vectype, 1105 if (!vect_record_max_nunits (stmt_info, group_size, vectype, max_nunits))
1042 max_nunits))
1043 return NULL; 1106 return NULL;
1044 1107
1045 vect_def_type def_type = STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)); 1108 vect_def_type def_type = STMT_VINFO_DEF_TYPE (stmt_info);
1046 /* Induction from different IVs is not supported. */ 1109 /* Induction from different IVs is not supported. */
1047 if (def_type == vect_induction_def) 1110 if (def_type == vect_induction_def)
1048 { 1111 {
1049 FOR_EACH_VEC_ELT (stmts, i, stmt) 1112 stmt_vec_info other_info;
1050 if (stmt != stmts[0]) 1113 FOR_EACH_VEC_ELT (stmts, i, other_info)
1114 if (stmt_info != other_info)
1051 return NULL; 1115 return NULL;
1052 } 1116 }
1053 else 1117 else
1054 { 1118 {
1055 /* Else def types have to match. */ 1119 /* Else def types have to match. */
1056 FOR_EACH_VEC_ELT (stmts, i, stmt) 1120 stmt_vec_info other_info;
1121 FOR_EACH_VEC_ELT (stmts, i, other_info)
1057 { 1122 {
1058 /* But for reduction chains only check on the first stmt. */ 1123 /* But for reduction chains only check on the first stmt. */
1059 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) 1124 if (REDUC_GROUP_FIRST_ELEMENT (other_info)
1060 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) != stmt) 1125 && REDUC_GROUP_FIRST_ELEMENT (other_info) != stmt_info)
1061 continue; 1126 continue;
1062 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != def_type) 1127 if (STMT_VINFO_DEF_TYPE (other_info) != def_type)
1063 return NULL; 1128 return NULL;
1064 } 1129 }
1065 } 1130 }
1066 node = vect_create_new_slp_node (stmts); 1131 node = vect_create_new_slp_node (stmts);
1067 return node; 1132 return node;
1068 } 1133 }
1069 1134
1070 1135
1071 bool two_operators = false; 1136 bool two_operators = false;
1072 unsigned char *swap = XALLOCAVEC (unsigned char, group_size); 1137 unsigned char *swap = XALLOCAVEC (unsigned char, group_size);
1073 if (!vect_build_slp_tree_1 (vinfo, swap, 1138 if (!vect_build_slp_tree_1 (swap, stmts, group_size,
1074 stmts, group_size, nops,
1075 &this_max_nunits, matches, &two_operators)) 1139 &this_max_nunits, matches, &two_operators))
1076 return NULL; 1140 return NULL;
1077 1141
1078 /* If the SLP node is a load, terminate the recursion. */ 1142 /* If the SLP node is a load, terminate the recursion. */
1079 if (STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt)) 1143 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)
1080 && DR_IS_READ (STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)))) 1144 && DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
1081 { 1145 {
1082 *max_nunits = this_max_nunits; 1146 *max_nunits = this_max_nunits;
1083 node = vect_create_new_slp_node (stmts); 1147 node = vect_create_new_slp_node (stmts);
1084 loads->safe_push (node); 1148 loads->safe_push (node);
1085 return node; 1149 return node;
1086 } 1150 }
1087 1151
1088 /* Get at the operands, verifying they are compatible. */ 1152 /* Get at the operands, verifying they are compatible. */
1089 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size); 1153 vec<slp_oprnd_info> oprnds_info = vect_create_oprnd_info (nops, group_size);
1090 slp_oprnd_info oprnd_info; 1154 slp_oprnd_info oprnd_info;
1091 FOR_EACH_VEC_ELT (stmts, i, stmt) 1155 FOR_EACH_VEC_ELT (stmts, i, stmt_info)
1092 { 1156 {
1093 int res = vect_get_and_check_slp_defs (vinfo, &swap[i], 1157 int res = vect_get_and_check_slp_defs (vinfo, &swap[i],
1094 stmt, i, &oprnds_info); 1158 stmts, i, &oprnds_info);
1095 if (res != 0) 1159 if (res != 0)
1096 matches[(res == -1) ? 0 : i] = false; 1160 matches[(res == -1) ? 0 : i] = false;
1097 if (!matches[0]) 1161 if (!matches[0])
1098 break; 1162 break;
1099 } 1163 }
1105 } 1169 }
1106 1170
1107 auto_vec<slp_tree, 4> children; 1171 auto_vec<slp_tree, 4> children;
1108 auto_vec<slp_tree> this_loads; 1172 auto_vec<slp_tree> this_loads;
1109 1173
1110 stmt = stmts[0]; 1174 stmt_info = stmts[0];
1111 1175
1112 if (tree_size) 1176 if (tree_size)
1113 max_tree_size -= *tree_size; 1177 max_tree_size -= *tree_size;
1114 1178
1115 /* Create SLP_TREE nodes for the definition node/s. */ 1179 /* Create SLP_TREE nodes for the definition node/s. */
1125 && oprnd_info->first_dt != vect_induction_def) 1189 && oprnd_info->first_dt != vect_induction_def)
1126 continue; 1190 continue;
1127 1191
1128 if (++this_tree_size > max_tree_size) 1192 if (++this_tree_size > max_tree_size)
1129 { 1193 {
1194 if (dump_enabled_p ())
1195 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1196 vect_location,
1197 "Build SLP failed: SLP tree too large\n");
1130 FOR_EACH_VEC_ELT (children, j, child) 1198 FOR_EACH_VEC_ELT (children, j, child)
1131 vect_free_slp_tree (child); 1199 vect_free_slp_tree (child, false);
1132 vect_free_oprnd_info (oprnds_info); 1200 vect_free_oprnd_info (oprnds_info);
1133 return NULL; 1201 return NULL;
1134 } 1202 }
1135 1203
1136 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts, 1204 if ((child = vect_build_slp_tree (vinfo, oprnd_info->def_stmts,
1143 throw that away and build it up this node from scalars. */ 1211 throw that away and build it up this node from scalars. */
1144 if (!SLP_TREE_CHILDREN (child).is_empty () 1212 if (!SLP_TREE_CHILDREN (child).is_empty ()
1145 /* ??? Rejecting patterns this way doesn't work. We'd have to 1213 /* ??? Rejecting patterns this way doesn't work. We'd have to
1146 do extra work to cancel the pattern so the uses see the 1214 do extra work to cancel the pattern so the uses see the
1147 scalar version. */ 1215 scalar version. */
1148 && !is_pattern_stmt_p 1216 && !is_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (child)[0]))
1149 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1150 { 1217 {
1151 slp_tree grandchild; 1218 slp_tree grandchild;
1152 1219
1153 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) 1220 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1154 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def) 1221 if (SLP_TREE_DEF_TYPE (grandchild) == vect_internal_def)
1157 { 1224 {
1158 /* Roll back. */ 1225 /* Roll back. */
1159 this_loads.truncate (old_nloads); 1226 this_loads.truncate (old_nloads);
1160 this_tree_size = old_tree_size; 1227 this_tree_size = old_tree_size;
1161 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) 1228 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1162 vect_free_slp_tree (grandchild); 1229 vect_free_slp_tree (grandchild, false);
1163 SLP_TREE_CHILDREN (child).truncate (0); 1230 SLP_TREE_CHILDREN (child).truncate (0);
1164 1231
1165 dump_printf_loc (MSG_NOTE, vect_location, 1232 dump_printf_loc (MSG_NOTE, vect_location,
1166 "Building parent vector operands from " 1233 "Building parent vector operands from "
1167 "scalars instead\n"); 1234 "scalars instead\n");
1188 if (is_a <bb_vec_info> (vinfo) 1255 if (is_a <bb_vec_info> (vinfo)
1189 && !matches[0] 1256 && !matches[0]
1190 /* ??? Rejecting patterns this way doesn't work. We'd have to 1257 /* ??? Rejecting patterns this way doesn't work. We'd have to
1191 do extra work to cancel the pattern so the uses see the 1258 do extra work to cancel the pattern so the uses see the
1192 scalar version. */ 1259 scalar version. */
1193 && !is_pattern_stmt_p (vinfo_for_stmt (stmt))) 1260 && !is_pattern_stmt_p (stmt_info))
1194 { 1261 {
1195 dump_printf_loc (MSG_NOTE, vect_location, 1262 dump_printf_loc (MSG_NOTE, vect_location,
1196 "Building vector operands from scalars\n"); 1263 "Building vector operands from scalars\n");
1197 child = vect_create_new_slp_node (oprnd_info->def_stmts); 1264 child = vect_create_new_slp_node (oprnd_info->def_stmts);
1198 SLP_TREE_DEF_TYPE (child) = vect_external_def; 1265 SLP_TREE_DEF_TYPE (child) = vect_external_def;
1209 && matches[0] 1276 && matches[0]
1210 /* ??? For COND_EXPRs we can swap the comparison operands 1277 /* ??? For COND_EXPRs we can swap the comparison operands
1211 as well as the arms under some constraints. */ 1278 as well as the arms under some constraints. */
1212 && nops == 2 1279 && nops == 2
1213 && oprnds_info[1]->first_dt == vect_internal_def 1280 && oprnds_info[1]->first_dt == vect_internal_def
1214 && is_gimple_assign (stmt) 1281 && is_gimple_assign (stmt_info->stmt)
1215 && commutative_tree_code (gimple_assign_rhs_code (stmt))
1216 && ! two_operators
1217 /* Do so only if the number of not successful permutes was nor more 1282 /* Do so only if the number of not successful permutes was nor more
1218 than a cut-ff as re-trying the recursive match on 1283 than a cut-ff as re-trying the recursive match on
1219 possibly each level of the tree would expose exponential 1284 possibly each level of the tree would expose exponential
1220 behavior. */ 1285 behavior. */
1221 && *npermutes < 4) 1286 && *npermutes < 4)
1222 { 1287 {
1223 /* Verify if we can safely swap or if we committed to a specific 1288 /* See whether we can swap the matching or the non-matching
1224 operand order already. */ 1289 stmt operands. */
1225 for (j = 0; j < group_size; ++j) 1290 bool swap_not_matching = true;
1226 if (!matches[j] 1291 do
1227 && (swap[j] != 0 1292 {
1228 || STMT_VINFO_NUM_SLP_USES (vinfo_for_stmt (stmts[j])))) 1293 for (j = 0; j < group_size; ++j)
1229 { 1294 {
1230 if (dump_enabled_p ()) 1295 if (matches[j] != !swap_not_matching)
1231 { 1296 continue;
1232 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1297 stmt_vec_info stmt_info = stmts[j];
1233 "Build SLP failed: cannot swap operands " 1298 /* Verify if we can swap operands of this stmt. */
1234 "of shared stmt "); 1299 gassign *stmt = dyn_cast <gassign *> (stmt_info->stmt);
1235 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, 1300 if (!stmt
1236 stmts[j], 0); 1301 || !commutative_tree_code (gimple_assign_rhs_code (stmt)))
1237 } 1302 {
1238 goto fail; 1303 if (!swap_not_matching)
1239 } 1304 goto fail;
1305 swap_not_matching = false;
1306 break;
1307 }
1308 /* Verify if we can safely swap or if we committed to a
1309 specific operand order already.
1310 ??? Instead of modifying GIMPLE stmts here we could
1311 record whether we want to swap operands in the SLP
1312 node and temporarily do that when processing it
1313 (or wrap operand accessors in a helper). */
1314 else if (swap[j] != 0
1315 || STMT_VINFO_NUM_SLP_USES (stmt_info))
1316 {
1317 if (!swap_not_matching)
1318 {
1319 if (dump_enabled_p ())
1320 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
1321 vect_location,
1322 "Build SLP failed: cannot swap "
1323 "operands of shared stmt %G",
1324 stmts[j]->stmt);
1325 goto fail;
1326 }
1327 swap_not_matching = false;
1328 break;
1329 }
1330 }
1331 }
1332 while (j != group_size);
1240 1333
1241 /* Swap mismatched definition stmts. */ 1334 /* Swap mismatched definition stmts. */
1242 dump_printf_loc (MSG_NOTE, vect_location, 1335 dump_printf_loc (MSG_NOTE, vect_location,
1243 "Re-trying with swapped operands of stmts "); 1336 "Re-trying with swapped operands of stmts ");
1244 for (j = 0; j < group_size; ++j) 1337 for (j = 0; j < group_size; ++j)
1245 if (!matches[j]) 1338 if (matches[j] == !swap_not_matching)
1246 { 1339 {
1247 std::swap (oprnds_info[0]->def_stmts[j], 1340 std::swap (oprnds_info[0]->def_stmts[j],
1248 oprnds_info[1]->def_stmts[j]); 1341 oprnds_info[1]->def_stmts[j]);
1249 dump_printf (MSG_NOTE, "%d ", j); 1342 dump_printf (MSG_NOTE, "%d ", j);
1250 } 1343 }
1262 vect_get_slp_defs uses operand indexes and thus expects 1355 vect_get_slp_defs uses operand indexes and thus expects
1263 canonical operand order. This is also necessary even 1356 canonical operand order. This is also necessary even
1264 if we end up building the operand from scalars as 1357 if we end up building the operand from scalars as
1265 we'll continue to process swapped operand two. */ 1358 we'll continue to process swapped operand two. */
1266 for (j = 0; j < group_size; ++j) 1359 for (j = 0; j < group_size; ++j)
1267 { 1360 gimple_set_plf (stmts[j]->stmt, GF_PLF_1, false);
1268 gimple *stmt = stmts[j];
1269 gimple_set_plf (stmt, GF_PLF_1, false);
1270 }
1271 for (j = 0; j < group_size; ++j) 1361 for (j = 0; j < group_size; ++j)
1272 { 1362 if (matches[j] == !swap_not_matching)
1273 gimple *stmt = stmts[j]; 1363 {
1274 if (!matches[j]) 1364 gassign *stmt = as_a <gassign *> (stmts[j]->stmt);
1275 { 1365 /* Avoid swapping operands twice. */
1276 /* Avoid swapping operands twice. */ 1366 if (gimple_plf (stmt, GF_PLF_1))
1277 if (gimple_plf (stmt, GF_PLF_1)) 1367 continue;
1278 continue; 1368 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt),
1279 swap_ssa_operands (stmt, gimple_assign_rhs1_ptr (stmt), 1369 gimple_assign_rhs2_ptr (stmt));
1280 gimple_assign_rhs2_ptr (stmt)); 1370 gimple_set_plf (stmt, GF_PLF_1, true);
1281 gimple_set_plf (stmt, GF_PLF_1, true); 1371 }
1282 }
1283 }
1284 /* Verify we swap all duplicates or none. */ 1372 /* Verify we swap all duplicates or none. */
1285 if (flag_checking) 1373 if (flag_checking)
1286 for (j = 0; j < group_size; ++j) 1374 for (j = 0; j < group_size; ++j)
1287 { 1375 gcc_assert (gimple_plf (stmts[j]->stmt, GF_PLF_1)
1288 gimple *stmt = stmts[j]; 1376 == (matches[j] == !swap_not_matching));
1289 gcc_assert (gimple_plf (stmt, GF_PLF_1) == ! matches[j]);
1290 }
1291 1377
1292 /* If we have all children of child built up from scalars then 1378 /* If we have all children of child built up from scalars then
1293 just throw that away and build it up this node from scalars. */ 1379 just throw that away and build it up this node from scalars. */
1294 if (!SLP_TREE_CHILDREN (child).is_empty () 1380 if (!SLP_TREE_CHILDREN (child).is_empty ()
1295 /* ??? Rejecting patterns this way doesn't work. We'd have 1381 /* ??? Rejecting patterns this way doesn't work. We'd have
1296 to do extra work to cancel the pattern so the uses see the 1382 to do extra work to cancel the pattern so the uses see the
1297 scalar version. */ 1383 scalar version. */
1298 && !is_pattern_stmt_p 1384 && !is_pattern_stmt_p (SLP_TREE_SCALAR_STMTS (child)[0]))
1299 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])))
1300 { 1385 {
1301 unsigned int j; 1386 unsigned int j;
1302 slp_tree grandchild; 1387 slp_tree grandchild;
1303 1388
1304 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) 1389 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1308 { 1393 {
1309 /* Roll back. */ 1394 /* Roll back. */
1310 this_loads.truncate (old_nloads); 1395 this_loads.truncate (old_nloads);
1311 this_tree_size = old_tree_size; 1396 this_tree_size = old_tree_size;
1312 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild) 1397 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (child), j, grandchild)
1313 vect_free_slp_tree (grandchild); 1398 vect_free_slp_tree (grandchild, false);
1314 SLP_TREE_CHILDREN (child).truncate (0); 1399 SLP_TREE_CHILDREN (child).truncate (0);
1315 1400
1316 dump_printf_loc (MSG_NOTE, vect_location, 1401 dump_printf_loc (MSG_NOTE, vect_location,
1317 "Building parent vector operands from " 1402 "Building parent vector operands from "
1318 "scalars instead\n"); 1403 "scalars instead\n");
1332 } 1417 }
1333 1418
1334 fail: 1419 fail:
1335 gcc_assert (child == NULL); 1420 gcc_assert (child == NULL);
1336 FOR_EACH_VEC_ELT (children, j, child) 1421 FOR_EACH_VEC_ELT (children, j, child)
1337 vect_free_slp_tree (child); 1422 vect_free_slp_tree (child, false);
1338 vect_free_oprnd_info (oprnds_info); 1423 vect_free_oprnd_info (oprnds_info);
1339 return NULL; 1424 return NULL;
1340 } 1425 }
1341 1426
1342 vect_free_oprnd_info (oprnds_info); 1427 vect_free_oprnd_info (oprnds_info);
1353 } 1438 }
1354 1439
1355 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */ 1440 /* Dump a slp tree NODE using flags specified in DUMP_KIND. */
1356 1441
1357 static void 1442 static void
1358 vect_print_slp_tree (dump_flags_t dump_kind, location_t loc, slp_tree node) 1443 vect_print_slp_tree (dump_flags_t dump_kind, dump_location_t loc,
1444 slp_tree node)
1359 { 1445 {
1360 int i; 1446 int i;
1361 gimple *stmt; 1447 stmt_vec_info stmt_info;
1362 slp_tree child; 1448 slp_tree child;
1363 1449
1364 dump_printf_loc (dump_kind, loc, "node%s\n", 1450 dump_printf_loc (dump_kind, loc, "node%s\n",
1365 SLP_TREE_DEF_TYPE (node) != vect_internal_def 1451 SLP_TREE_DEF_TYPE (node) != vect_internal_def
1366 ? " (external)" : ""); 1452 ? " (external)" : "");
1367 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt) 1453 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1368 { 1454 dump_printf_loc (dump_kind, loc, "\tstmt %d %G", i, stmt_info->stmt);
1369 dump_printf_loc (dump_kind, loc, "\tstmt %d ", i);
1370 dump_gimple_stmt (dump_kind, TDF_SLIM, stmt, 0);
1371 }
1372 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) 1455 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1373 vect_print_slp_tree (dump_kind, loc, child); 1456 vect_print_slp_tree (dump_kind, loc, child);
1374 } 1457 }
1375 1458
1376 1459
1381 1464
1382 static void 1465 static void
1383 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j) 1466 vect_mark_slp_stmts (slp_tree node, enum slp_vect_type mark, int j)
1384 { 1467 {
1385 int i; 1468 int i;
1386 gimple *stmt; 1469 stmt_vec_info stmt_info;
1387 slp_tree child; 1470 slp_tree child;
1388 1471
1389 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def) 1472 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1390 return; 1473 return;
1391 1474
1392 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt) 1475 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1393 if (j < 0 || i == j) 1476 if (j < 0 || i == j)
1394 STMT_SLP_TYPE (vinfo_for_stmt (stmt)) = mark; 1477 STMT_SLP_TYPE (stmt_info) = mark;
1395 1478
1396 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) 1479 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1397 vect_mark_slp_stmts (child, mark, j); 1480 vect_mark_slp_stmts (child, mark, j);
1398 } 1481 }
1399 1482
1402 1485
1403 static void 1486 static void
1404 vect_mark_slp_stmts_relevant (slp_tree node) 1487 vect_mark_slp_stmts_relevant (slp_tree node)
1405 { 1488 {
1406 int i; 1489 int i;
1407 gimple *stmt;
1408 stmt_vec_info stmt_info; 1490 stmt_vec_info stmt_info;
1409 slp_tree child; 1491 slp_tree child;
1410 1492
1411 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def) 1493 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
1412 return; 1494 return;
1413 1495
1414 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt) 1496 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1415 { 1497 {
1416 stmt_info = vinfo_for_stmt (stmt);
1417 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info) 1498 gcc_assert (!STMT_VINFO_RELEVANT (stmt_info)
1418 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope); 1499 || STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope);
1419 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope; 1500 STMT_VINFO_RELEVANT (stmt_info) = vect_used_in_scope;
1420 } 1501 }
1421 1502
1428 1509
1429 static void 1510 static void
1430 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size, 1511 vect_slp_rearrange_stmts (slp_tree node, unsigned int group_size,
1431 vec<unsigned> permutation) 1512 vec<unsigned> permutation)
1432 { 1513 {
1433 gimple *stmt; 1514 stmt_vec_info stmt_info;
1434 vec<gimple *> tmp_stmts; 1515 vec<stmt_vec_info> tmp_stmts;
1435 unsigned int i; 1516 unsigned int i;
1436 slp_tree child; 1517 slp_tree child;
1437 1518
1438 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) 1519 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1439 vect_slp_rearrange_stmts (child, group_size, permutation); 1520 vect_slp_rearrange_stmts (child, group_size, permutation);
1440 1521
1441 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ()); 1522 gcc_assert (group_size == SLP_TREE_SCALAR_STMTS (node).length ());
1442 tmp_stmts.create (group_size); 1523 tmp_stmts.create (group_size);
1443 tmp_stmts.quick_grow_cleared (group_size); 1524 tmp_stmts.quick_grow_cleared (group_size);
1444 1525
1445 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt) 1526 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
1446 tmp_stmts[permutation[i]] = stmt; 1527 tmp_stmts[permutation[i]] = stmt_info;
1447 1528
1448 SLP_TREE_SCALAR_STMTS (node).release (); 1529 SLP_TREE_SCALAR_STMTS (node).release ();
1449 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts; 1530 SLP_TREE_SCALAR_STMTS (node) = tmp_stmts;
1450 } 1531 }
1451 1532
1499 according to the order of the loads. */ 1580 according to the order of the loads. */
1500 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size, 1581 vect_slp_rearrange_stmts (SLP_INSTANCE_TREE (slp_instn), group_size,
1501 node->load_permutation); 1582 node->load_permutation);
1502 1583
1503 /* We are done, no actual permutations need to be generated. */ 1584 /* We are done, no actual permutations need to be generated. */
1504 unsigned int unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn); 1585 poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
1505 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) 1586 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1506 { 1587 {
1507 gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0]; 1588 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1508 first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt)); 1589 first_stmt_info = DR_GROUP_FIRST_ELEMENT (first_stmt_info);
1509 /* But we have to keep those permutations that are required because 1590 /* But we have to keep those permutations that are required because
1510 of handling of gaps. */ 1591 of handling of gaps. */
1511 if (unrolling_factor == 1 1592 if (known_eq (unrolling_factor, 1U)
1512 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt)) 1593 || (group_size == DR_GROUP_SIZE (first_stmt_info)
1513 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0)) 1594 && DR_GROUP_GAP (first_stmt_info) == 0))
1514 SLP_TREE_LOAD_PERMUTATION (node).release (); 1595 SLP_TREE_LOAD_PERMUTATION (node).release ();
1515 else 1596 else
1516 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j) 1597 for (j = 0; j < SLP_TREE_LOAD_PERMUTATION (node).length (); ++j)
1517 SLP_TREE_LOAD_PERMUTATION (node)[j] = j; 1598 SLP_TREE_LOAD_PERMUTATION (node)[j] = j;
1518 } 1599 }
1527 vect_supported_load_permutation_p (slp_instance slp_instn) 1608 vect_supported_load_permutation_p (slp_instance slp_instn)
1528 { 1609 {
1529 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn); 1610 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_instn);
1530 unsigned int i, j, k, next; 1611 unsigned int i, j, k, next;
1531 slp_tree node; 1612 slp_tree node;
1532 gimple *stmt, *load, *next_load;
1533 1613
1534 if (dump_enabled_p ()) 1614 if (dump_enabled_p ())
1535 { 1615 {
1536 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation "); 1616 dump_printf_loc (MSG_NOTE, vect_location, "Load permutation ");
1537 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) 1617 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1556 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) 1636 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1557 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size) 1637 if (SLP_TREE_SCALAR_STMTS (node).length () != (unsigned) group_size)
1558 return false; 1638 return false;
1559 1639
1560 node = SLP_INSTANCE_TREE (slp_instn); 1640 node = SLP_INSTANCE_TREE (slp_instn);
1561 stmt = SLP_TREE_SCALAR_STMTS (node)[0]; 1641 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
1562 1642
1563 /* Reduction (there are no data-refs in the root). 1643 /* Reduction (there are no data-refs in the root).
1564 In reduction chain the order of the loads is not important. */ 1644 In reduction chain the order of the loads is not important. */
1565 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)) 1645 if (!STMT_VINFO_DATA_REF (stmt_info)
1566 && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))) 1646 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
1567 vect_attempt_slp_rearrange_stmts (slp_instn); 1647 vect_attempt_slp_rearrange_stmts (slp_instn);
1568 1648
1569 /* In basic block vectorization we allow any subchain of an interleaving 1649 /* In basic block vectorization we allow any subchain of an interleaving
1570 chain. 1650 chain.
1571 FORNOW: not supported in loop SLP because of realignment compications. */ 1651 FORNOW: not supported in loop SLP because of realignment compications. */
1572 if (STMT_VINFO_BB_VINFO (vinfo_for_stmt (stmt))) 1652 if (STMT_VINFO_BB_VINFO (stmt_info))
1573 { 1653 {
1574 /* Check whether the loads in an instance form a subchain and thus 1654 /* Check whether the loads in an instance form a subchain and thus
1575 no permutation is necessary. */ 1655 no permutation is necessary. */
1576 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) 1656 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1577 { 1657 {
1578 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ()) 1658 if (!SLP_TREE_LOAD_PERMUTATION (node).exists ())
1579 continue; 1659 continue;
1580 bool subchain_p = true; 1660 bool subchain_p = true;
1581 next_load = NULL; 1661 stmt_vec_info next_load_info = NULL;
1582 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load) 1662 stmt_vec_info load_info;
1583 { 1663 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), j, load_info)
1584 if (j != 0 1664 {
1585 && (next_load != load 1665 if (j != 0
1586 || GROUP_GAP (vinfo_for_stmt (load)) != 1)) 1666 && (next_load_info != load_info
1667 || DR_GROUP_GAP (load_info) != 1))
1587 { 1668 {
1588 subchain_p = false; 1669 subchain_p = false;
1589 break; 1670 break;
1590 } 1671 }
1591 next_load = GROUP_NEXT_ELEMENT (vinfo_for_stmt (load)); 1672 next_load_info = DR_GROUP_NEXT_ELEMENT (load_info);
1592 } 1673 }
1593 if (subchain_p) 1674 if (subchain_p)
1594 SLP_TREE_LOAD_PERMUTATION (node).release (); 1675 SLP_TREE_LOAD_PERMUTATION (node).release ();
1595 else 1676 else
1596 { 1677 {
1597 stmt_vec_info group_info 1678 stmt_vec_info group_info = SLP_TREE_SCALAR_STMTS (node)[0];
1598 = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]); 1679 group_info = DR_GROUP_FIRST_ELEMENT (group_info);
1599 group_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info)); 1680 unsigned HOST_WIDE_INT nunits;
1600 unsigned nunits
1601 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (group_info));
1602 unsigned k, maxk = 0; 1681 unsigned k, maxk = 0;
1603 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k) 1682 FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
1604 if (k > maxk) 1683 if (k > maxk)
1605 maxk = k; 1684 maxk = k;
1606 /* In BB vectorization we may not actually use a loaded vector 1685 /* In BB vectorization we may not actually use a loaded vector
1607 accessing elements in excess of GROUP_SIZE. */ 1686 accessing elements in excess of DR_GROUP_SIZE. */
1608 if (maxk >= (GROUP_SIZE (group_info) & ~(nunits - 1))) 1687 tree vectype = STMT_VINFO_VECTYPE (group_info);
1688 if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
1689 || maxk >= (DR_GROUP_SIZE (group_info) & ~(nunits - 1)))
1609 { 1690 {
1610 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1691 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1611 "BB vectorization with gaps at the end of " 1692 "BB vectorization with gaps at the end of "
1612 "a load is not supported\n"); 1693 "a load is not supported\n");
1613 return false; 1694 return false;
1633 conservative about the vectorization factor, there are permutations 1714 conservative about the vectorization factor, there are permutations
1634 that will use three vector inputs only starting from a specific factor 1715 that will use three vector inputs only starting from a specific factor
1635 and the vectorization factor is not yet final. 1716 and the vectorization factor is not yet final.
1636 ??? The SLP instance unrolling factor might not be the maximum one. */ 1717 ??? The SLP instance unrolling factor might not be the maximum one. */
1637 unsigned n_perms; 1718 unsigned n_perms;
1638 unsigned test_vf 1719 poly_uint64 test_vf
1639 = least_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn), 1720 = force_common_multiple (SLP_INSTANCE_UNROLLING_FACTOR (slp_instn),
1640 LOOP_VINFO_VECT_FACTOR 1721 LOOP_VINFO_VECT_FACTOR
1641 (STMT_VINFO_LOOP_VINFO (vinfo_for_stmt (stmt)))); 1722 (STMT_VINFO_LOOP_VINFO (stmt_info)));
1642 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node) 1723 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
1643 if (node->load_permutation.exists () 1724 if (node->load_permutation.exists ()
1644 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf, 1725 && !vect_transform_slp_perm_load (node, vNULL, NULL, test_vf,
1645 slp_instn, true, &n_perms)) 1726 slp_instn, true, &n_perms))
1646 return false; 1727 return false;
1649 } 1730 }
1650 1731
1651 1732
1652 /* Find the last store in SLP INSTANCE. */ 1733 /* Find the last store in SLP INSTANCE. */
1653 1734
1654 gimple * 1735 stmt_vec_info
1655 vect_find_last_scalar_stmt_in_slp (slp_tree node) 1736 vect_find_last_scalar_stmt_in_slp (slp_tree node)
1656 { 1737 {
1657 gimple *last = NULL, *stmt; 1738 stmt_vec_info last = NULL;
1658 1739 stmt_vec_info stmt_vinfo;
1659 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt); i++) 1740
1660 { 1741 for (int i = 0; SLP_TREE_SCALAR_STMTS (node).iterate (i, &stmt_vinfo); i++)
1661 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); 1742 {
1662 if (is_pattern_stmt_p (stmt_vinfo)) 1743 stmt_vinfo = vect_orig_stmt (stmt_vinfo);
1663 last = get_later_stmt (STMT_VINFO_RELATED_STMT (stmt_vinfo), last); 1744 last = last ? get_later_stmt (stmt_vinfo, last) : stmt_vinfo;
1664 else
1665 last = get_later_stmt (stmt, last);
1666 } 1745 }
1667 1746
1668 return last; 1747 return last;
1669 } 1748 }
1670 1749
1671 /* Compute the cost for the SLP node NODE in the SLP instance INSTANCE. */ 1750 /* Splits a group of stores, currently beginning at FIRST_VINFO, into
1672 1751 two groups: one (still beginning at FIRST_VINFO) of size GROUP1_SIZE
1673 static void 1752 (also containing the first GROUP1_SIZE stmts, since stores are
1674 vect_analyze_slp_cost_1 (slp_instance instance, slp_tree node, 1753 consecutive), the second containing the remainder.
1675 stmt_vector_for_cost *prologue_cost_vec,
1676 stmt_vector_for_cost *body_cost_vec,
1677 unsigned ncopies_for_cost)
1678 {
1679 unsigned i, j;
1680 slp_tree child;
1681 gimple *stmt;
1682 stmt_vec_info stmt_info;
1683 tree lhs;
1684
1685 /* Recurse down the SLP tree. */
1686 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1687 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
1688 vect_analyze_slp_cost_1 (instance, child, prologue_cost_vec,
1689 body_cost_vec, ncopies_for_cost);
1690
1691 /* Look at the first scalar stmt to determine the cost. */
1692 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1693 stmt_info = vinfo_for_stmt (stmt);
1694 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1695 {
1696 vect_memory_access_type memory_access_type
1697 = (STMT_VINFO_STRIDED_P (stmt_info)
1698 ? VMAT_STRIDED_SLP
1699 : VMAT_CONTIGUOUS);
1700 if (DR_IS_WRITE (STMT_VINFO_DATA_REF (stmt_info)))
1701 vect_model_store_cost (stmt_info, ncopies_for_cost,
1702 memory_access_type, vect_uninitialized_def,
1703 node, prologue_cost_vec, body_cost_vec);
1704 else
1705 {
1706 gcc_checking_assert (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)));
1707 if (SLP_TREE_LOAD_PERMUTATION (node).exists ())
1708 {
1709 /* If the load is permuted then the alignment is determined by
1710 the first group element not by the first scalar stmt DR. */
1711 stmt = GROUP_FIRST_ELEMENT (stmt_info);
1712 stmt_info = vinfo_for_stmt (stmt);
1713 /* Record the cost for the permutation. */
1714 unsigned n_perms;
1715 vect_transform_slp_perm_load (node, vNULL, NULL,
1716 ncopies_for_cost, instance, true,
1717 &n_perms);
1718 record_stmt_cost (body_cost_vec, n_perms, vec_perm,
1719 stmt_info, 0, vect_body);
1720 unsigned nunits
1721 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1722 /* And adjust the number of loads performed. This handles
1723 redundancies as well as loads that are later dead. */
1724 auto_sbitmap perm (GROUP_SIZE (stmt_info));
1725 bitmap_clear (perm);
1726 for (i = 0; i < SLP_TREE_LOAD_PERMUTATION (node).length (); ++i)
1727 bitmap_set_bit (perm, SLP_TREE_LOAD_PERMUTATION (node)[i]);
1728 ncopies_for_cost = 0;
1729 bool load_seen = false;
1730 for (i = 0; i < GROUP_SIZE (stmt_info); ++i)
1731 {
1732 if (i % nunits == 0)
1733 {
1734 if (load_seen)
1735 ncopies_for_cost++;
1736 load_seen = false;
1737 }
1738 if (bitmap_bit_p (perm, i))
1739 load_seen = true;
1740 }
1741 if (load_seen)
1742 ncopies_for_cost++;
1743 gcc_assert (ncopies_for_cost
1744 <= (GROUP_SIZE (stmt_info) - GROUP_GAP (stmt_info)
1745 + nunits - 1) / nunits);
1746 ncopies_for_cost *= SLP_INSTANCE_UNROLLING_FACTOR (instance);
1747 }
1748 /* Record the cost for the vector loads. */
1749 vect_model_load_cost (stmt_info, ncopies_for_cost,
1750 memory_access_type, node, prologue_cost_vec,
1751 body_cost_vec);
1752 return;
1753 }
1754 }
1755 else if (STMT_VINFO_TYPE (stmt_info) == induc_vec_info_type)
1756 {
1757 /* ncopies_for_cost is the number of IVs we generate. */
1758 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1759 stmt_info, 0, vect_body);
1760
1761 /* Prologue cost for the initial values and step vector. */
1762 record_stmt_cost (prologue_cost_vec, ncopies_for_cost,
1763 CONSTANT_CLASS_P
1764 (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED
1765 (stmt_info))
1766 ? vector_load : vec_construct,
1767 stmt_info, 0, vect_prologue);
1768 record_stmt_cost (prologue_cost_vec, 1,
1769 CONSTANT_CLASS_P
1770 (STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_info))
1771 ? vector_load : vec_construct,
1772 stmt_info, 0, vect_prologue);
1773
1774 /* ??? No easy way to get at the actual number of vector stmts
1775 to be geneated and thus the derived IVs. */
1776 }
1777 else
1778 {
1779 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1780 stmt_info, 0, vect_body);
1781 if (SLP_TREE_TWO_OPERATORS (node))
1782 {
1783 record_stmt_cost (body_cost_vec, ncopies_for_cost, vector_stmt,
1784 stmt_info, 0, vect_body);
1785 record_stmt_cost (body_cost_vec, ncopies_for_cost, vec_perm,
1786 stmt_info, 0, vect_body);
1787 }
1788 }
1789
1790 /* Push SLP node def-type to stmts. */
1791 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1792 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1793 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1794 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child);
1795
1796 /* Scan operands and account for prologue cost of constants/externals.
1797 ??? This over-estimates cost for multiple uses and should be
1798 re-engineered. */
1799 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1800 lhs = gimple_get_lhs (stmt);
1801 for (i = 0; i < gimple_num_ops (stmt); ++i)
1802 {
1803 tree op = gimple_op (stmt, i);
1804 gimple *def_stmt;
1805 enum vect_def_type dt;
1806 if (!op || op == lhs)
1807 continue;
1808 if (vect_is_simple_use (op, stmt_info->vinfo, &def_stmt, &dt))
1809 {
1810 /* Without looking at the actual initializer a vector of
1811 constants can be implemented as load from the constant pool.
1812 ??? We need to pass down stmt_info for a vector type
1813 even if it points to the wrong stmt. */
1814 if (dt == vect_constant_def)
1815 record_stmt_cost (prologue_cost_vec, 1, vector_load,
1816 stmt_info, 0, vect_prologue);
1817 else if (dt == vect_external_def)
1818 record_stmt_cost (prologue_cost_vec, 1, vec_construct,
1819 stmt_info, 0, vect_prologue);
1820 }
1821 }
1822
1823 /* Restore stmt def-types. */
1824 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
1825 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
1826 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt)
1827 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def;
1828 }
1829
1830 /* Compute the cost for the SLP instance INSTANCE. */
1831
1832 static void
1833 vect_analyze_slp_cost (slp_instance instance, void *data)
1834 {
1835 stmt_vector_for_cost body_cost_vec, prologue_cost_vec;
1836 unsigned ncopies_for_cost;
1837 stmt_info_for_cost *si;
1838 unsigned i;
1839
1840 if (dump_enabled_p ())
1841 dump_printf_loc (MSG_NOTE, vect_location,
1842 "=== vect_analyze_slp_cost ===\n");
1843
1844 /* Calculate the number of vector stmts to create based on the unrolling
1845 factor (number of vectors is 1 if NUNITS >= GROUP_SIZE, and is
1846 GROUP_SIZE / NUNITS otherwise. */
1847 unsigned group_size = SLP_INSTANCE_GROUP_SIZE (instance);
1848 slp_tree node = SLP_INSTANCE_TREE (instance);
1849 stmt_vec_info stmt_info = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
1850 /* Adjust the group_size by the vectorization factor which is always one
1851 for basic-block vectorization. */
1852 if (STMT_VINFO_LOOP_VINFO (stmt_info))
1853 group_size *= LOOP_VINFO_VECT_FACTOR (STMT_VINFO_LOOP_VINFO (stmt_info));
1854 unsigned nunits = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
1855 /* For reductions look at a reduction operand in case the reduction
1856 operation is widening like DOT_PROD or SAD. */
1857 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
1858 {
1859 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0];
1860 switch (gimple_assign_rhs_code (stmt))
1861 {
1862 case DOT_PROD_EXPR:
1863 case SAD_EXPR:
1864 nunits = TYPE_VECTOR_SUBPARTS (get_vectype_for_scalar_type
1865 (TREE_TYPE (gimple_assign_rhs1 (stmt))));
1866 break;
1867 default:;
1868 }
1869 }
1870 ncopies_for_cost = least_common_multiple (nunits, group_size) / nunits;
1871
1872 prologue_cost_vec.create (10);
1873 body_cost_vec.create (10);
1874 vect_analyze_slp_cost_1 (instance, SLP_INSTANCE_TREE (instance),
1875 &prologue_cost_vec, &body_cost_vec,
1876 ncopies_for_cost);
1877
1878 /* Record the prologue costs, which were delayed until we were
1879 sure that SLP was successful. */
1880 FOR_EACH_VEC_ELT (prologue_cost_vec, i, si)
1881 {
1882 struct _stmt_vec_info *stmt_info
1883 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1884 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1885 si->misalign, vect_prologue);
1886 }
1887
1888 /* Record the instance's instructions in the target cost model. */
1889 FOR_EACH_VEC_ELT (body_cost_vec, i, si)
1890 {
1891 struct _stmt_vec_info *stmt_info
1892 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
1893 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
1894 si->misalign, vect_body);
1895 }
1896
1897 prologue_cost_vec.release ();
1898 body_cost_vec.release ();
1899 }
1900
1901 /* Splits a group of stores, currently beginning at FIRST_STMT, into two groups:
1902 one (still beginning at FIRST_STMT) of size GROUP1_SIZE (also containing
1903 the first GROUP1_SIZE stmts, since stores are consecutive), the second
1904 containing the remainder.
1905 Return the first stmt in the second group. */ 1754 Return the first stmt in the second group. */
1906 1755
1907 static gimple * 1756 static stmt_vec_info
1908 vect_split_slp_store_group (gimple *first_stmt, unsigned group1_size) 1757 vect_split_slp_store_group (stmt_vec_info first_vinfo, unsigned group1_size)
1909 { 1758 {
1910 stmt_vec_info first_vinfo = vinfo_for_stmt (first_stmt); 1759 gcc_assert (DR_GROUP_FIRST_ELEMENT (first_vinfo) == first_vinfo);
1911 gcc_assert (GROUP_FIRST_ELEMENT (first_vinfo) == first_stmt);
1912 gcc_assert (group1_size > 0); 1760 gcc_assert (group1_size > 0);
1913 int group2_size = GROUP_SIZE (first_vinfo) - group1_size; 1761 int group2_size = DR_GROUP_SIZE (first_vinfo) - group1_size;
1914 gcc_assert (group2_size > 0); 1762 gcc_assert (group2_size > 0);
1915 GROUP_SIZE (first_vinfo) = group1_size; 1763 DR_GROUP_SIZE (first_vinfo) = group1_size;
1916 1764
1917 gimple *stmt = first_stmt; 1765 stmt_vec_info stmt_info = first_vinfo;
1918 for (unsigned i = group1_size; i > 1; i--) 1766 for (unsigned i = group1_size; i > 1; i--)
1919 { 1767 {
1920 stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)); 1768 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info);
1921 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1); 1769 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1922 } 1770 }
1923 /* STMT is now the last element of the first group. */ 1771 /* STMT is now the last element of the first group. */
1924 gimple *group2 = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)); 1772 stmt_vec_info group2 = DR_GROUP_NEXT_ELEMENT (stmt_info);
1925 GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)) = 0; 1773 DR_GROUP_NEXT_ELEMENT (stmt_info) = 0;
1926 1774
1927 GROUP_SIZE (vinfo_for_stmt (group2)) = group2_size; 1775 DR_GROUP_SIZE (group2) = group2_size;
1928 for (stmt = group2; stmt; stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt))) 1776 for (stmt_info = group2; stmt_info;
1929 { 1777 stmt_info = DR_GROUP_NEXT_ELEMENT (stmt_info))
1930 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) = group2; 1778 {
1931 gcc_assert (GROUP_GAP (vinfo_for_stmt (stmt)) == 1); 1779 DR_GROUP_FIRST_ELEMENT (stmt_info) = group2;
1932 } 1780 gcc_assert (DR_GROUP_GAP (stmt_info) == 1);
1933 1781 }
1934 /* For the second group, the GROUP_GAP is that before the original group, 1782
1783 /* For the second group, the DR_GROUP_GAP is that before the original group,
1935 plus skipping over the first vector. */ 1784 plus skipping over the first vector. */
1936 GROUP_GAP (vinfo_for_stmt (group2)) = 1785 DR_GROUP_GAP (group2) = DR_GROUP_GAP (first_vinfo) + group1_size;
1937 GROUP_GAP (first_vinfo) + group1_size; 1786
1938 1787 /* DR_GROUP_GAP of the first group now has to skip over the second group too. */
1939 /* GROUP_GAP of the first group now has to skip over the second group too. */ 1788 DR_GROUP_GAP (first_vinfo) += group2_size;
1940 GROUP_GAP (first_vinfo) += group2_size;
1941 1789
1942 if (dump_enabled_p ()) 1790 if (dump_enabled_p ())
1943 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n", 1791 dump_printf_loc (MSG_NOTE, vect_location, "Split group into %d and %d\n",
1944 group1_size, group2_size); 1792 group1_size, group2_size);
1945 1793
1946 return group2; 1794 return group2;
1947 } 1795 }
1948 1796
1797 /* Calculate the unrolling factor for an SLP instance with GROUP_SIZE
1798 statements and a vector of NUNITS elements. */
1799
1800 static poly_uint64
1801 calculate_unrolling_factor (poly_uint64 nunits, unsigned int group_size)
1802 {
1803 return exact_div (common_multiple (nunits, group_size), group_size);
1804 }
1805
1949 /* Analyze an SLP instance starting from a group of grouped stores. Call 1806 /* Analyze an SLP instance starting from a group of grouped stores. Call
1950 vect_build_slp_tree to build a tree of packed stmts if possible. 1807 vect_build_slp_tree to build a tree of packed stmts if possible.
1951 Return FALSE if it's impossible to SLP any stmt in the loop. */ 1808 Return FALSE if it's impossible to SLP any stmt in the loop. */
1952 1809
1953 static bool 1810 static bool
1954 vect_analyze_slp_instance (vec_info *vinfo, 1811 vect_analyze_slp_instance (vec_info *vinfo,
1955 gimple *stmt, unsigned max_tree_size) 1812 stmt_vec_info stmt_info, unsigned max_tree_size)
1956 { 1813 {
1957 slp_instance new_instance; 1814 slp_instance new_instance;
1958 slp_tree node; 1815 slp_tree node;
1959 unsigned int group_size = GROUP_SIZE (vinfo_for_stmt (stmt)); 1816 unsigned int group_size;
1960 unsigned int unrolling_factor = 1, nunits;
1961 tree vectype, scalar_type = NULL_TREE; 1817 tree vectype, scalar_type = NULL_TREE;
1962 gimple *next;
1963 unsigned int i; 1818 unsigned int i;
1964 unsigned int max_nunits = 0;
1965 vec<slp_tree> loads; 1819 vec<slp_tree> loads;
1966 struct data_reference *dr = STMT_VINFO_DATA_REF (vinfo_for_stmt (stmt)); 1820 struct data_reference *dr = STMT_VINFO_DATA_REF (stmt_info);
1967 vec<gimple *> scalar_stmts; 1821 vec<stmt_vec_info> scalar_stmts;
1968 1822
1969 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))) 1823 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
1970 { 1824 {
1971 if (dr) 1825 scalar_type = TREE_TYPE (DR_REF (dr));
1972 { 1826 vectype = get_vectype_for_scalar_type (scalar_type);
1973 scalar_type = TREE_TYPE (DR_REF (dr)); 1827 group_size = DR_GROUP_SIZE (stmt_info);
1974 vectype = get_vectype_for_scalar_type (scalar_type); 1828 }
1975 } 1829 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
1976 else 1830 {
1977 { 1831 gcc_assert (is_a <loop_vec_info> (vinfo));
1978 gcc_assert (is_a <loop_vec_info> (vinfo)); 1832 vectype = STMT_VINFO_VECTYPE (stmt_info);
1979 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt)); 1833 group_size = REDUC_GROUP_SIZE (stmt_info);
1980 }
1981
1982 group_size = GROUP_SIZE (vinfo_for_stmt (stmt));
1983 } 1834 }
1984 else 1835 else
1985 { 1836 {
1986 gcc_assert (is_a <loop_vec_info> (vinfo)); 1837 gcc_assert (is_a <loop_vec_info> (vinfo));
1987 vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (stmt)); 1838 vectype = STMT_VINFO_VECTYPE (stmt_info);
1988 group_size = as_a <loop_vec_info> (vinfo)->reductions.length (); 1839 group_size = as_a <loop_vec_info> (vinfo)->reductions.length ();
1989 } 1840 }
1990 1841
1991 if (!vectype) 1842 if (!vectype)
1992 { 1843 {
1993 if (dump_enabled_p ()) 1844 if (dump_enabled_p ())
1994 { 1845 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1995 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1846 "Build SLP failed: unsupported data-type %T\n",
1996 "Build SLP failed: unsupported data-type "); 1847 scalar_type);
1997 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, scalar_type);
1998 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
1999 }
2000 1848
2001 return false; 1849 return false;
2002 } 1850 }
2003 nunits = TYPE_VECTOR_SUBPARTS (vectype); 1851 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
2004 1852
2005 /* Create a node (a root of the SLP tree) for the packed grouped stores. */ 1853 /* Create a node (a root of the SLP tree) for the packed grouped stores. */
2006 scalar_stmts.create (group_size); 1854 scalar_stmts.create (group_size);
2007 next = stmt; 1855 stmt_vec_info next_info = stmt_info;
2008 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))) 1856 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
2009 { 1857 {
2010 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */ 1858 /* Collect the stores and store them in SLP_TREE_SCALAR_STMTS. */
2011 while (next) 1859 while (next_info)
2012 { 1860 {
2013 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next)) 1861 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
2014 && STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next))) 1862 next_info = DR_GROUP_NEXT_ELEMENT (next_info);
2015 scalar_stmts.safe_push ( 1863 }
2016 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next))); 1864 }
2017 else 1865 else if (!dr && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2018 scalar_stmts.safe_push (next); 1866 {
2019 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next)); 1867 /* Collect the reduction stmts and store them in
1868 SLP_TREE_SCALAR_STMTS. */
1869 while (next_info)
1870 {
1871 scalar_stmts.safe_push (vect_stmt_to_vectorize (next_info));
1872 next_info = REDUC_GROUP_NEXT_ELEMENT (next_info);
2020 } 1873 }
2021 /* Mark the first element of the reduction chain as reduction to properly 1874 /* Mark the first element of the reduction chain as reduction to properly
2022 transform the node. In the reduction analysis phase only the last 1875 transform the node. In the reduction analysis phase only the last
2023 element of the chain is marked as reduction. */ 1876 element of the chain is marked as reduction. */
2024 if (!STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))) 1877 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
2025 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_reduction_def;
2026 } 1878 }
2027 else 1879 else
2028 { 1880 {
2029 /* Collect reduction statements. */ 1881 /* Collect reduction statements. */
2030 vec<gimple *> reductions = as_a <loop_vec_info> (vinfo)->reductions; 1882 vec<stmt_vec_info> reductions = as_a <loop_vec_info> (vinfo)->reductions;
2031 for (i = 0; reductions.iterate (i, &next); i++) 1883 for (i = 0; reductions.iterate (i, &next_info); i++)
2032 scalar_stmts.safe_push (next); 1884 scalar_stmts.safe_push (next_info);
2033 } 1885 }
2034 1886
2035 loads.create (group_size); 1887 loads.create (group_size);
2036 1888
2037 /* Build the tree for the SLP instance. */ 1889 /* Build the tree for the SLP instance. */
2038 bool *matches = XALLOCAVEC (bool, group_size); 1890 bool *matches = XALLOCAVEC (bool, group_size);
2039 unsigned npermutes = 0; 1891 unsigned npermutes = 0;
2040 bst_fail = new hash_set <vec <gimple *>, bst_traits> (); 1892 bst_fail = new scalar_stmts_set_t ();
1893 poly_uint64 max_nunits = nunits;
2041 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size, 1894 node = vect_build_slp_tree (vinfo, scalar_stmts, group_size,
2042 &max_nunits, &loads, matches, &npermutes, 1895 &max_nunits, &loads, matches, &npermutes,
2043 NULL, max_tree_size); 1896 NULL, max_tree_size);
2044 delete bst_fail; 1897 delete bst_fail;
2045 if (node != NULL) 1898 if (node != NULL)
2046 { 1899 {
2047 /* Calculate the unrolling factor based on the smallest type. */ 1900 /* Calculate the unrolling factor based on the smallest type. */
2048 unrolling_factor 1901 poly_uint64 unrolling_factor
2049 = least_common_multiple (max_nunits, group_size) / group_size; 1902 = calculate_unrolling_factor (max_nunits, group_size);
2050 1903
2051 if (unrolling_factor != 1 1904 if (maybe_ne (unrolling_factor, 1U)
2052 && is_a <bb_vec_info> (vinfo)) 1905 && is_a <bb_vec_info> (vinfo))
2053 { 1906 {
2054 1907 unsigned HOST_WIDE_INT const_max_nunits;
2055 if (max_nunits > group_size) 1908 if (!max_nunits.is_constant (&const_max_nunits)
2056 { 1909 || const_max_nunits > group_size)
2057 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1910 {
2058 "Build SLP failed: store group " 1911 if (dump_enabled_p ())
2059 "size not a multiple of the vector size " 1912 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2060 "in basic block SLP\n"); 1913 "Build SLP failed: store group "
2061 vect_free_slp_tree (node); 1914 "size not a multiple of the vector size "
2062 loads.release (); 1915 "in basic block SLP\n");
2063 return false; 1916 vect_free_slp_tree (node, false);
2064 } 1917 loads.release ();
1918 return false;
1919 }
2065 /* Fatal mismatch. */ 1920 /* Fatal mismatch. */
2066 matches[group_size/max_nunits * max_nunits] = false; 1921 matches[group_size / const_max_nunits * const_max_nunits] = false;
2067 vect_free_slp_tree (node); 1922 vect_free_slp_tree (node, false);
2068 loads.release (); 1923 loads.release ();
2069 } 1924 }
2070 else 1925 else
2071 { 1926 {
2072 /* Create a new SLP instance. */ 1927 /* Create a new SLP instance. */
2081 bool loads_permuted = false; 1936 bool loads_permuted = false;
2082 FOR_EACH_VEC_ELT (loads, i, load_node) 1937 FOR_EACH_VEC_ELT (loads, i, load_node)
2083 { 1938 {
2084 vec<unsigned> load_permutation; 1939 vec<unsigned> load_permutation;
2085 int j; 1940 int j;
2086 gimple *load, *first_stmt; 1941 stmt_vec_info load_info;
2087 bool this_load_permuted = false; 1942 bool this_load_permuted = false;
2088 load_permutation.create (group_size); 1943 load_permutation.create (group_size);
2089 first_stmt = GROUP_FIRST_ELEMENT 1944 stmt_vec_info first_stmt_info = DR_GROUP_FIRST_ELEMENT
2090 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0])); 1945 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2091 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load) 1946 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (load_node), j, load_info)
2092 { 1947 {
2093 int load_place = vect_get_place_in_interleaving_chain 1948 int load_place = vect_get_place_in_interleaving_chain
2094 (load, first_stmt); 1949 (load_info, first_stmt_info);
2095 gcc_assert (load_place != -1); 1950 gcc_assert (load_place != -1);
2096 if (load_place != j) 1951 if (load_place != j)
2097 this_load_permuted = true; 1952 this_load_permuted = true;
2098 load_permutation.safe_push (load_place); 1953 load_permutation.safe_push (load_place);
2099 } 1954 }
2100 if (!this_load_permuted 1955 if (!this_load_permuted
2101 /* The load requires permutation when unrolling exposes 1956 /* The load requires permutation when unrolling exposes
2102 a gap either because the group is larger than the SLP 1957 a gap either because the group is larger than the SLP
2103 group-size or because there is a gap between the groups. */ 1958 group-size or because there is a gap between the groups. */
2104 && (unrolling_factor == 1 1959 && (known_eq (unrolling_factor, 1U)
2105 || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt)) 1960 || (group_size == DR_GROUP_SIZE (first_stmt_info)
2106 && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0))) 1961 && DR_GROUP_GAP (first_stmt_info) == 0)))
2107 { 1962 {
2108 load_permutation.release (); 1963 load_permutation.release ();
2109 continue; 1964 continue;
2110 } 1965 }
2111 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation; 1966 SLP_TREE_LOAD_PERMUTATION (load_node) = load_permutation;
2115 if (loads_permuted) 1970 if (loads_permuted)
2116 { 1971 {
2117 if (!vect_supported_load_permutation_p (new_instance)) 1972 if (!vect_supported_load_permutation_p (new_instance))
2118 { 1973 {
2119 if (dump_enabled_p ()) 1974 if (dump_enabled_p ())
2120 { 1975 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2121 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1976 "Build SLP failed: unsupported load "
2122 "Build SLP failed: unsupported load " 1977 "permutation %G", stmt_info->stmt);
2123 "permutation "); 1978 vect_free_slp_instance (new_instance, false);
2124 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION,
2125 TDF_SLIM, stmt, 0);
2126 }
2127 vect_free_slp_instance (new_instance);
2128 return false; 1979 return false;
2129 } 1980 }
2130 } 1981 }
2131 1982
2132 /* If the loads and stores can be handled with load/store-lan 1983 /* If the loads and stores can be handled with load/store-lan
2133 instructions do not generate this SLP instance. */ 1984 instructions do not generate this SLP instance. */
2134 if (is_a <loop_vec_info> (vinfo) 1985 if (is_a <loop_vec_info> (vinfo)
2135 && loads_permuted 1986 && loads_permuted
2136 && dr && vect_store_lanes_supported (vectype, group_size)) 1987 && dr && vect_store_lanes_supported (vectype, group_size, false))
2137 { 1988 {
2138 slp_tree load_node; 1989 slp_tree load_node;
2139 FOR_EACH_VEC_ELT (loads, i, load_node) 1990 FOR_EACH_VEC_ELT (loads, i, load_node)
2140 { 1991 {
2141 gimple *first_stmt = GROUP_FIRST_ELEMENT 1992 stmt_vec_info stmt_vinfo = DR_GROUP_FIRST_ELEMENT
2142 (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (load_node)[0])); 1993 (SLP_TREE_SCALAR_STMTS (load_node)[0]);
2143 stmt_vec_info stmt_vinfo = vinfo_for_stmt (first_stmt); 1994 /* Use SLP for strided accesses (or if we can't load-lanes). */
2144 /* Use SLP for strided accesses (or if we
2145 can't load-lanes). */
2146 if (STMT_VINFO_STRIDED_P (stmt_vinfo) 1995 if (STMT_VINFO_STRIDED_P (stmt_vinfo)
2147 || ! vect_load_lanes_supported 1996 || ! vect_load_lanes_supported
2148 (STMT_VINFO_VECTYPE (stmt_vinfo), 1997 (STMT_VINFO_VECTYPE (stmt_vinfo),
2149 GROUP_SIZE (stmt_vinfo))) 1998 DR_GROUP_SIZE (stmt_vinfo), false))
2150 break; 1999 break;
2151 } 2000 }
2152 if (i == loads.length ()) 2001 if (i == loads.length ())
2153 { 2002 {
2154 if (dump_enabled_p ()) 2003 if (dump_enabled_p ())
2155 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2004 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2156 "Built SLP cancelled: can use " 2005 "Built SLP cancelled: can use "
2157 "load/store-lanes\n"); 2006 "load/store-lanes\n");
2158 vect_free_slp_instance (new_instance); 2007 vect_free_slp_instance (new_instance, false);
2159 return false; 2008 return false;
2160 } 2009 }
2161 } 2010 }
2162 2011
2163 vinfo->slp_instances.safe_push (new_instance); 2012 vinfo->slp_instances.safe_push (new_instance);
2180 loads.release (); 2029 loads.release ();
2181 } 2030 }
2182 2031
2183 /* For basic block SLP, try to break the group up into multiples of the 2032 /* For basic block SLP, try to break the group up into multiples of the
2184 vector size. */ 2033 vector size. */
2034 unsigned HOST_WIDE_INT const_nunits;
2185 if (is_a <bb_vec_info> (vinfo) 2035 if (is_a <bb_vec_info> (vinfo)
2186 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt)) 2036 && STMT_VINFO_GROUPED_ACCESS (stmt_info)
2187 && STMT_VINFO_GROUPED_ACCESS (vinfo_for_stmt (stmt))) 2037 && DR_GROUP_FIRST_ELEMENT (stmt_info)
2038 && nunits.is_constant (&const_nunits))
2188 { 2039 {
2189 /* We consider breaking the group only on VF boundaries from the existing 2040 /* We consider breaking the group only on VF boundaries from the existing
2190 start. */ 2041 start. */
2191 for (i = 0; i < group_size; i++) 2042 for (i = 0; i < group_size; i++)
2192 if (!matches[i]) break; 2043 if (!matches[i]) break;
2193 2044
2194 if (i >= nunits && i < group_size) 2045 if (i >= const_nunits && i < group_size)
2195 { 2046 {
2196 /* Split into two groups at the first vector boundary before i. */ 2047 /* Split into two groups at the first vector boundary before i. */
2197 gcc_assert ((nunits & (nunits - 1)) == 0); 2048 gcc_assert ((const_nunits & (const_nunits - 1)) == 0);
2198 unsigned group1_size = i & ~(nunits - 1); 2049 unsigned group1_size = i & ~(const_nunits - 1);
2199 2050
2200 gimple *rest = vect_split_slp_store_group (stmt, group1_size); 2051 stmt_vec_info rest = vect_split_slp_store_group (stmt_info,
2201 bool res = vect_analyze_slp_instance (vinfo, stmt, max_tree_size); 2052 group1_size);
2053 bool res = vect_analyze_slp_instance (vinfo, stmt_info,
2054 max_tree_size);
2202 /* If the first non-match was in the middle of a vector, 2055 /* If the first non-match was in the middle of a vector,
2203 skip the rest of that vector. */ 2056 skip the rest of that vector. */
2204 if (group1_size < i) 2057 if (group1_size < i)
2205 { 2058 {
2206 i = group1_size + nunits; 2059 i = group1_size + const_nunits;
2207 if (i < group_size) 2060 if (i < group_size)
2208 rest = vect_split_slp_store_group (rest, nunits); 2061 rest = vect_split_slp_store_group (rest, const_nunits);
2209 } 2062 }
2210 if (i < group_size) 2063 if (i < group_size)
2211 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size); 2064 res |= vect_analyze_slp_instance (vinfo, rest, max_tree_size);
2212 return res; 2065 return res;
2213 } 2066 }
2220 2073
2221 2074
2222 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP 2075 /* Check if there are stmts in the loop can be vectorized using SLP. Build SLP
2223 trees of packed scalar stmts if SLP is possible. */ 2076 trees of packed scalar stmts if SLP is possible. */
2224 2077
2225 bool 2078 opt_result
2226 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size) 2079 vect_analyze_slp (vec_info *vinfo, unsigned max_tree_size)
2227 { 2080 {
2228 unsigned int i; 2081 unsigned int i;
2229 gimple *first_element; 2082 stmt_vec_info first_element;
2230 2083
2231 if (dump_enabled_p ()) 2084 DUMP_VECT_SCOPE ("vect_analyze_slp");
2232 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_analyze_slp ===\n");
2233 2085
2234 /* Find SLP sequences starting from groups of grouped stores. */ 2086 /* Find SLP sequences starting from groups of grouped stores. */
2235 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element) 2087 FOR_EACH_VEC_ELT (vinfo->grouped_stores, i, first_element)
2236 vect_analyze_slp_instance (vinfo, first_element, max_tree_size); 2088 vect_analyze_slp_instance (vinfo, first_element, max_tree_size);
2237 2089
2243 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element) 2095 FOR_EACH_VEC_ELT (loop_vinfo->reduction_chains, i, first_element)
2244 if (! vect_analyze_slp_instance (vinfo, first_element, 2096 if (! vect_analyze_slp_instance (vinfo, first_element,
2245 max_tree_size)) 2097 max_tree_size))
2246 { 2098 {
2247 /* Dissolve reduction chain group. */ 2099 /* Dissolve reduction chain group. */
2248 gimple *next, *stmt = first_element; 2100 stmt_vec_info vinfo = first_element;
2249 while (stmt) 2101 while (vinfo)
2250 { 2102 {
2251 stmt_vec_info vinfo = vinfo_for_stmt (stmt); 2103 stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (vinfo);
2252 next = GROUP_NEXT_ELEMENT (vinfo); 2104 REDUC_GROUP_FIRST_ELEMENT (vinfo) = NULL;
2253 GROUP_FIRST_ELEMENT (vinfo) = NULL; 2105 REDUC_GROUP_NEXT_ELEMENT (vinfo) = NULL;
2254 GROUP_NEXT_ELEMENT (vinfo) = NULL; 2106 vinfo = next;
2255 stmt = next;
2256 } 2107 }
2257 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (first_element)) 2108 STMT_VINFO_DEF_TYPE (first_element) = vect_internal_def;
2258 = vect_internal_def;
2259 } 2109 }
2260 } 2110 }
2261 2111
2262 /* Find SLP sequences starting from groups of reductions. */ 2112 /* Find SLP sequences starting from groups of reductions. */
2263 if (loop_vinfo->reductions.length () > 1) 2113 if (loop_vinfo->reductions.length () > 1)
2264 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0], 2114 vect_analyze_slp_instance (vinfo, loop_vinfo->reductions[0],
2265 max_tree_size); 2115 max_tree_size);
2266 } 2116 }
2267 2117
2268 return true; 2118 return opt_result::success ();
2269 } 2119 }
2270 2120
2271 2121
2272 /* For each possible SLP instance decide whether to SLP it and calculate overall 2122 /* For each possible SLP instance decide whether to SLP it and calculate overall
2273 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at 2123 unrolling factor needed to SLP the loop. Return TRUE if decided to SLP at
2274 least one instance. */ 2124 least one instance. */
2275 2125
2276 bool 2126 bool
2277 vect_make_slp_decision (loop_vec_info loop_vinfo) 2127 vect_make_slp_decision (loop_vec_info loop_vinfo)
2278 { 2128 {
2279 unsigned int i, unrolling_factor = 1; 2129 unsigned int i;
2130 poly_uint64 unrolling_factor = 1;
2280 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo); 2131 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2281 slp_instance instance; 2132 slp_instance instance;
2282 int decided_to_slp = 0; 2133 int decided_to_slp = 0;
2283 2134
2284 if (dump_enabled_p ()) 2135 DUMP_VECT_SCOPE ("vect_make_slp_decision");
2285 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_make_slp_decision ==="
2286 "\n");
2287 2136
2288 FOR_EACH_VEC_ELT (slp_instances, i, instance) 2137 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2289 { 2138 {
2290 /* FORNOW: SLP if you can. */ 2139 /* FORNOW: SLP if you can. */
2291 if (unrolling_factor < SLP_INSTANCE_UNROLLING_FACTOR (instance)) 2140 /* All unroll factors have the form current_vector_size * X for some
2292 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (instance); 2141 rational X, so they must have a common multiple. */
2142 unrolling_factor
2143 = force_common_multiple (unrolling_factor,
2144 SLP_INSTANCE_UNROLLING_FACTOR (instance));
2293 2145
2294 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we 2146 /* Mark all the stmts that belong to INSTANCE as PURE_SLP stmts. Later we
2295 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and 2147 call vect_detect_hybrid_slp () to find stmts that need hybrid SLP and
2296 loop-based vectorization. Such stmts will be marked as HYBRID. */ 2148 loop-based vectorization. Such stmts will be marked as HYBRID. */
2297 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1); 2149 vect_mark_slp_stmts (SLP_INSTANCE_TREE (instance), pure_slp, -1);
2299 } 2151 }
2300 2152
2301 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor; 2153 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo) = unrolling_factor;
2302 2154
2303 if (decided_to_slp && dump_enabled_p ()) 2155 if (decided_to_slp && dump_enabled_p ())
2304 dump_printf_loc (MSG_NOTE, vect_location, 2156 {
2305 "Decided to SLP %d instances. Unrolling factor %d\n", 2157 dump_printf_loc (MSG_NOTE, vect_location,
2306 decided_to_slp, unrolling_factor); 2158 "Decided to SLP %d instances. Unrolling factor ",
2159 decided_to_slp);
2160 dump_dec (MSG_NOTE, unrolling_factor);
2161 dump_printf (MSG_NOTE, "\n");
2162 }
2307 2163
2308 return (decided_to_slp > 0); 2164 return (decided_to_slp > 0);
2309 } 2165 }
2310 2166
2311 2167
2313 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */ 2169 can't be SLPed) in the tree rooted at NODE. Mark such stmts as HYBRID. */
2314 2170
2315 static void 2171 static void
2316 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype) 2172 vect_detect_hybrid_slp_stmts (slp_tree node, unsigned i, slp_vect_type stype)
2317 { 2173 {
2318 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[i]; 2174 stmt_vec_info stmt_vinfo = SLP_TREE_SCALAR_STMTS (node)[i];
2319 imm_use_iterator imm_iter; 2175 imm_use_iterator imm_iter;
2320 gimple *use_stmt; 2176 gimple *use_stmt;
2321 stmt_vec_info use_vinfo, stmt_vinfo = vinfo_for_stmt (stmt); 2177 stmt_vec_info use_vinfo;
2322 slp_tree child; 2178 slp_tree child;
2323 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 2179 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
2324 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
2325 int j; 2180 int j;
2326 2181
2327 /* Propagate hybrid down the SLP tree. */ 2182 /* Propagate hybrid down the SLP tree. */
2328 if (stype == hybrid) 2183 if (stype == hybrid)
2329 ; 2184 ;
2333 { 2188 {
2334 /* Check if a pure SLP stmt has uses in non-SLP stmts. */ 2189 /* Check if a pure SLP stmt has uses in non-SLP stmts. */
2335 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo)); 2190 gcc_checking_assert (PURE_SLP_STMT (stmt_vinfo));
2336 /* If we get a pattern stmt here we have to use the LHS of the 2191 /* If we get a pattern stmt here we have to use the LHS of the
2337 original stmt for immediate uses. */ 2192 original stmt for immediate uses. */
2338 if (! STMT_VINFO_IN_PATTERN_P (stmt_vinfo) 2193 gimple *stmt = vect_orig_stmt (stmt_vinfo)->stmt;
2339 && STMT_VINFO_RELATED_STMT (stmt_vinfo))
2340 stmt = STMT_VINFO_RELATED_STMT (stmt_vinfo);
2341 tree def; 2194 tree def;
2342 if (gimple_code (stmt) == GIMPLE_PHI) 2195 if (gimple_code (stmt) == GIMPLE_PHI)
2343 def = gimple_phi_result (stmt); 2196 def = gimple_phi_result (stmt);
2344 else 2197 else
2345 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF); 2198 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF);
2346 if (def) 2199 if (def)
2347 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def) 2200 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, def)
2348 { 2201 {
2349 if (!flow_bb_inside_loop_p (loop, gimple_bb (use_stmt))) 2202 use_vinfo = loop_vinfo->lookup_stmt (use_stmt);
2203 if (!use_vinfo)
2350 continue; 2204 continue;
2351 use_vinfo = vinfo_for_stmt (use_stmt); 2205 use_vinfo = vect_stmt_to_vectorize (use_vinfo);
2352 if (STMT_VINFO_IN_PATTERN_P (use_vinfo)
2353 && STMT_VINFO_RELATED_STMT (use_vinfo))
2354 use_vinfo = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (use_vinfo));
2355 if (!STMT_SLP_TYPE (use_vinfo) 2206 if (!STMT_SLP_TYPE (use_vinfo)
2356 && (STMT_VINFO_RELEVANT (use_vinfo) 2207 && (STMT_VINFO_RELEVANT (use_vinfo)
2357 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo))) 2208 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2358 && !(gimple_code (use_stmt) == GIMPLE_PHI 2209 && !(gimple_code (use_stmt) == GIMPLE_PHI
2359 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def)) 2210 && STMT_VINFO_DEF_TYPE (use_vinfo) == vect_reduction_def))
2360 { 2211 {
2361 if (dump_enabled_p ()) 2212 if (dump_enabled_p ())
2362 { 2213 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP "
2363 dump_printf_loc (MSG_NOTE, vect_location, "use of SLP " 2214 "def in non-SLP stmt: %G", use_stmt);
2364 "def in non-SLP stmt: ");
2365 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, use_stmt, 0);
2366 }
2367 stype = hybrid; 2215 stype = hybrid;
2368 } 2216 }
2369 } 2217 }
2370 } 2218 }
2371 2219
2372 if (stype == hybrid 2220 if (stype == hybrid
2373 && !HYBRID_SLP_STMT (stmt_vinfo)) 2221 && !HYBRID_SLP_STMT (stmt_vinfo))
2374 { 2222 {
2375 if (dump_enabled_p ()) 2223 if (dump_enabled_p ())
2376 { 2224 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
2377 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: "); 2225 stmt_vinfo->stmt);
2378 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
2379 }
2380 STMT_SLP_TYPE (stmt_vinfo) = hybrid; 2226 STMT_SLP_TYPE (stmt_vinfo) = hybrid;
2381 } 2227 }
2382 2228
2383 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child) 2229 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2384 if (SLP_TREE_DEF_TYPE (child) != vect_external_def) 2230 if (SLP_TREE_DEF_TYPE (child) != vect_external_def)
2389 2235
2390 static tree 2236 static tree
2391 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data) 2237 vect_detect_hybrid_slp_1 (tree *tp, int *, void *data)
2392 { 2238 {
2393 walk_stmt_info *wi = (walk_stmt_info *)data; 2239 walk_stmt_info *wi = (walk_stmt_info *)data;
2394 struct loop *loopp = (struct loop *)wi->info; 2240 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2395 2241
2396 if (wi->is_lhs) 2242 if (wi->is_lhs)
2397 return NULL_TREE; 2243 return NULL_TREE;
2398 2244
2399 if (TREE_CODE (*tp) == SSA_NAME 2245 stmt_vec_info def_stmt_info = loop_vinfo->lookup_def (*tp);
2400 && !SSA_NAME_IS_DEFAULT_DEF (*tp)) 2246 if (def_stmt_info && PURE_SLP_STMT (def_stmt_info))
2401 { 2247 {
2402 gimple *def_stmt = SSA_NAME_DEF_STMT (*tp); 2248 if (dump_enabled_p ())
2403 if (flow_bb_inside_loop_p (loopp, gimple_bb (def_stmt)) 2249 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: %G",
2404 && PURE_SLP_STMT (vinfo_for_stmt (def_stmt))) 2250 def_stmt_info->stmt);
2405 { 2251 STMT_SLP_TYPE (def_stmt_info) = hybrid;
2406 if (dump_enabled_p ())
2407 {
2408 dump_printf_loc (MSG_NOTE, vect_location, "marking hybrid: ");
2409 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, def_stmt, 0);
2410 }
2411 STMT_SLP_TYPE (vinfo_for_stmt (def_stmt)) = hybrid;
2412 }
2413 } 2252 }
2414 2253
2415 return NULL_TREE; 2254 return NULL_TREE;
2416 } 2255 }
2417 2256
2418 static tree 2257 static tree
2419 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled, 2258 vect_detect_hybrid_slp_2 (gimple_stmt_iterator *gsi, bool *handled,
2420 walk_stmt_info *) 2259 walk_stmt_info *wi)
2421 { 2260 {
2422 stmt_vec_info use_vinfo = vinfo_for_stmt (gsi_stmt (*gsi)); 2261 loop_vec_info loop_vinfo = (loop_vec_info) wi->info;
2262 stmt_vec_info use_vinfo = loop_vinfo->lookup_stmt (gsi_stmt (*gsi));
2423 /* If the stmt is in a SLP instance then this isn't a reason 2263 /* If the stmt is in a SLP instance then this isn't a reason
2424 to mark use definitions in other SLP instances as hybrid. */ 2264 to mark use definitions in other SLP instances as hybrid. */
2425 if (! STMT_SLP_TYPE (use_vinfo) 2265 if (! STMT_SLP_TYPE (use_vinfo)
2426 && (STMT_VINFO_RELEVANT (use_vinfo) 2266 && (STMT_VINFO_RELEVANT (use_vinfo)
2427 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo))) 2267 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (use_vinfo)))
2440 { 2280 {
2441 unsigned int i; 2281 unsigned int i;
2442 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo); 2282 vec<slp_instance> slp_instances = LOOP_VINFO_SLP_INSTANCES (loop_vinfo);
2443 slp_instance instance; 2283 slp_instance instance;
2444 2284
2445 if (dump_enabled_p ()) 2285 DUMP_VECT_SCOPE ("vect_detect_hybrid_slp");
2446 dump_printf_loc (MSG_NOTE, vect_location, "=== vect_detect_hybrid_slp ==="
2447 "\n");
2448 2286
2449 /* First walk all pattern stmt in the loop and mark defs of uses as 2287 /* First walk all pattern stmt in the loop and mark defs of uses as
2450 hybrid because immediate uses in them are not recorded. */ 2288 hybrid because immediate uses in them are not recorded. */
2451 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i) 2289 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2452 { 2290 {
2453 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i]; 2291 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2454 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); 2292 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2455 gsi_next (&gsi)) 2293 gsi_next (&gsi))
2456 { 2294 {
2457 gimple *stmt = gsi_stmt (gsi); 2295 gimple *stmt = gsi_stmt (gsi);
2458 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 2296 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
2459 if (STMT_VINFO_IN_PATTERN_P (stmt_info)) 2297 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2460 { 2298 {
2461 walk_stmt_info wi; 2299 walk_stmt_info wi;
2462 memset (&wi, 0, sizeof (wi)); 2300 memset (&wi, 0, sizeof (wi));
2463 wi.info = LOOP_VINFO_LOOP (loop_vinfo); 2301 wi.info = loop_vinfo;
2464 gimple_stmt_iterator gsi2 2302 gimple_stmt_iterator gsi2
2465 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)); 2303 = gsi_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)->stmt);
2466 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2, 2304 walk_gimple_stmt (&gsi2, vect_detect_hybrid_slp_2,
2467 vect_detect_hybrid_slp_1, &wi); 2305 vect_detect_hybrid_slp_1, &wi);
2468 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info), 2306 walk_gimple_seq (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info),
2469 vect_detect_hybrid_slp_2, 2307 vect_detect_hybrid_slp_2,
2470 vect_detect_hybrid_slp_1, &wi); 2308 vect_detect_hybrid_slp_1, &wi);
2486 2324
2487 /* Initialize a bb_vec_info struct for the statements between 2325 /* Initialize a bb_vec_info struct for the statements between
2488 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */ 2326 REGION_BEGIN_IN (inclusive) and REGION_END_IN (exclusive). */
2489 2327
2490 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in, 2328 _bb_vec_info::_bb_vec_info (gimple_stmt_iterator region_begin_in,
2491 gimple_stmt_iterator region_end_in) 2329 gimple_stmt_iterator region_end_in,
2492 : vec_info (vec_info::bb, init_cost (NULL)), 2330 vec_info_shared *shared)
2331 : vec_info (vec_info::bb, init_cost (NULL), shared),
2493 bb (gsi_bb (region_begin_in)), 2332 bb (gsi_bb (region_begin_in)),
2494 region_begin (region_begin_in), 2333 region_begin (region_begin_in),
2495 region_end (region_end_in) 2334 region_end (region_end_in)
2496 { 2335 {
2497 gimple_stmt_iterator gsi; 2336 gimple_stmt_iterator gsi;
2499 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end); 2338 for (gsi = region_begin; gsi_stmt (gsi) != gsi_stmt (region_end);
2500 gsi_next (&gsi)) 2339 gsi_next (&gsi))
2501 { 2340 {
2502 gimple *stmt = gsi_stmt (gsi); 2341 gimple *stmt = gsi_stmt (gsi);
2503 gimple_set_uid (stmt, 0); 2342 gimple_set_uid (stmt, 0);
2504 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, this)); 2343 add_stmt (stmt);
2505 } 2344 }
2506 2345
2507 bb->aux = this; 2346 bb->aux = this;
2508 } 2347 }
2509 2348
2513 2352
2514 _bb_vec_info::~_bb_vec_info () 2353 _bb_vec_info::~_bb_vec_info ()
2515 { 2354 {
2516 for (gimple_stmt_iterator si = region_begin; 2355 for (gimple_stmt_iterator si = region_begin;
2517 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si)) 2356 gsi_stmt (si) != gsi_stmt (region_end); gsi_next (&si))
2518 { 2357 /* Reset region marker. */
2519 gimple *stmt = gsi_stmt (si); 2358 gimple_set_uid (gsi_stmt (si), -1);
2520 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2521
2522 if (stmt_info)
2523 /* Free stmt_vec_info. */
2524 free_stmt_vec_info (stmt);
2525
2526 /* Reset region marker. */
2527 gimple_set_uid (stmt, -1);
2528 }
2529 2359
2530 bb->aux = NULL; 2360 bb->aux = NULL;
2531 } 2361 }
2532 2362
2533 2363 /* Subroutine of vect_slp_analyze_node_operations. Handle the root of NODE,
2534 /* Analyze statements contained in SLP tree NODE after recursively analyzing 2364 given then that child nodes have already been processed, and that
2535 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE. 2365 their def types currently match their SLP node's def type. */
2536
2537 Return true if the operations are supported. */
2538 2366
2539 static bool 2367 static bool
2540 vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node, 2368 vect_slp_analyze_node_operations_1 (vec_info *vinfo, slp_tree node,
2541 slp_instance node_instance) 2369 slp_instance node_instance,
2542 { 2370 stmt_vector_for_cost *cost_vec)
2543 bool dummy; 2371 {
2544 int i, j; 2372 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2545 gimple *stmt;
2546 slp_tree child;
2547
2548 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2549 return true;
2550
2551 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2552 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance))
2553 return false;
2554
2555 stmt = SLP_TREE_SCALAR_STMTS (node)[0];
2556 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
2557 gcc_assert (stmt_info);
2558 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect); 2373 gcc_assert (STMT_SLP_TYPE (stmt_info) != loop_vect);
2559 2374
2560 /* For BB vectorization vector types are assigned here. 2375 /* For BB vectorization vector types are assigned here.
2561 Memory accesses already got their vector type assigned 2376 Memory accesses already got their vector type assigned
2562 in vect_analyze_data_refs. */ 2377 in vect_analyze_data_refs. */
2563 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info); 2378 bb_vec_info bb_vinfo = STMT_VINFO_BB_VINFO (stmt_info);
2564 if (bb_vinfo 2379 if (bb_vinfo
2565 && ! STMT_VINFO_DATA_REF (stmt_info)) 2380 && ! STMT_VINFO_DATA_REF (stmt_info))
2566 { 2381 {
2567 gcc_assert (PURE_SLP_STMT (stmt_info)); 2382 tree vectype, nunits_vectype;
2568 2383 if (!vect_get_vector_types_for_stmt (stmt_info, &vectype,
2569 tree scalar_type = TREE_TYPE (gimple_get_lhs (stmt)); 2384 &nunits_vectype))
2570 if (dump_enabled_p ()) 2385 /* We checked this when building the node. */
2571 { 2386 gcc_unreachable ();
2572 dump_printf_loc (MSG_NOTE, vect_location, 2387 if (vectype == boolean_type_node)
2573 "get vectype for scalar type: "); 2388 {
2574 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type); 2389 vectype = vect_get_mask_type_for_stmt (stmt_info);
2575 dump_printf (MSG_NOTE, "\n"); 2390 if (!vectype)
2576 } 2391 /* vect_get_mask_type_for_stmt has already explained the
2577 2392 failure. */
2578 tree vectype = get_vectype_for_scalar_type (scalar_type); 2393 return false;
2579 if (!vectype) 2394 }
2580 { 2395
2581 if (dump_enabled_p ()) 2396 stmt_vec_info sstmt_info;
2582 { 2397 unsigned int i;
2583 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2398 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt_info)
2584 "not SLPed: unsupported data-type "); 2399 STMT_VINFO_VECTYPE (sstmt_info) = vectype;
2585 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
2586 scalar_type);
2587 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2588 }
2589 return false;
2590 }
2591
2592 if (dump_enabled_p ())
2593 {
2594 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
2595 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
2596 dump_printf (MSG_NOTE, "\n");
2597 }
2598
2599 gimple *sstmt;
2600 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, sstmt)
2601 STMT_VINFO_VECTYPE (vinfo_for_stmt (sstmt)) = vectype;
2602 } 2400 }
2603 2401
2604 /* Calculate the number of vector statements to be created for the 2402 /* Calculate the number of vector statements to be created for the
2605 scalar stmts in this node. For SLP reductions it is equal to the 2403 scalar stmts in this node. For SLP reductions it is equal to the
2606 number of vector statements in the children (which has already been 2404 number of vector statements in the children (which has already been
2607 calculated by the recursive call). Otherwise it is the number of 2405 calculated by the recursive call). Otherwise it is the number of
2608 scalar elements in one scalar iteration (GROUP_SIZE) multiplied by 2406 scalar elements in one scalar iteration (DR_GROUP_SIZE) multiplied by
2609 VF divided by the number of elements in a vector. */ 2407 VF divided by the number of elements in a vector. */
2610 if (GROUP_FIRST_ELEMENT (stmt_info) 2408 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
2611 && !STMT_VINFO_GROUPED_ACCESS (stmt_info)) 2409 && REDUC_GROUP_FIRST_ELEMENT (stmt_info))
2612 SLP_TREE_NUMBER_OF_VEC_STMTS (node) 2410 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2613 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]); 2411 = SLP_TREE_NUMBER_OF_VEC_STMTS (SLP_TREE_CHILDREN (node)[0]);
2614 else 2412 else
2615 { 2413 {
2616 int vf; 2414 poly_uint64 vf;
2617 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo)) 2415 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
2618 vf = loop_vinfo->vectorization_factor; 2416 vf = loop_vinfo->vectorization_factor;
2619 else 2417 else
2620 vf = 1; 2418 vf = 1;
2621 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance); 2419 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (node_instance);
2622 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 2420 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
2623 SLP_TREE_NUMBER_OF_VEC_STMTS (node) 2421 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2624 = vf * group_size / TYPE_VECTOR_SUBPARTS (vectype); 2422 = vect_get_num_vectors (vf * group_size, vectype);
2625 } 2423 }
2424
2425 bool dummy;
2426 return vect_analyze_stmt (stmt_info, &dummy, node, node_instance, cost_vec);
2427 }
2428
2429 /* Analyze statements contained in SLP tree NODE after recursively analyzing
2430 the subtree. NODE_INSTANCE contains NODE and VINFO contains INSTANCE.
2431
2432 Return true if the operations are supported. */
2433
2434 static bool
2435 vect_slp_analyze_node_operations (vec_info *vinfo, slp_tree node,
2436 slp_instance node_instance,
2437 scalar_stmts_to_slp_tree_map_t *visited,
2438 scalar_stmts_to_slp_tree_map_t *lvisited,
2439 stmt_vector_for_cost *cost_vec)
2440 {
2441 int i, j;
2442 slp_tree child;
2443
2444 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
2445 return true;
2446
2447 /* If we already analyzed the exact same set of scalar stmts we're done.
2448 We share the generated vector stmts for those. */
2449 slp_tree *leader;
2450 if ((leader = visited->get (SLP_TREE_SCALAR_STMTS (node)))
2451 || (leader = lvisited->get (SLP_TREE_SCALAR_STMTS (node))))
2452 {
2453 SLP_TREE_NUMBER_OF_VEC_STMTS (node)
2454 = SLP_TREE_NUMBER_OF_VEC_STMTS (*leader);
2455 return true;
2456 }
2457
2458 /* The SLP graph is acyclic so not caching whether we failed or succeeded
2459 doesn't result in any issue since we throw away the lvisited set
2460 when we fail. */
2461 lvisited->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
2462
2463 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2464 if (!vect_slp_analyze_node_operations (vinfo, child, node_instance,
2465 visited, lvisited, cost_vec))
2466 return false;
2626 2467
2627 /* Push SLP node def-type to stmt operands. */ 2468 /* Push SLP node def-type to stmt operands. */
2628 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child) 2469 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2629 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def) 2470 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2630 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])) 2471 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2631 = SLP_TREE_DEF_TYPE (child); 2472 = SLP_TREE_DEF_TYPE (child);
2632 bool res = vect_analyze_stmt (stmt, &dummy, node, node_instance); 2473 bool res = vect_slp_analyze_node_operations_1 (vinfo, node, node_instance,
2474 cost_vec);
2633 /* Restore def-types. */ 2475 /* Restore def-types. */
2634 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child) 2476 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), j, child)
2635 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def) 2477 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
2636 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (child)[0])) 2478 STMT_VINFO_DEF_TYPE (SLP_TREE_SCALAR_STMTS (child)[0])
2637 = vect_internal_def; 2479 = vect_internal_def;
2638 if (! res) 2480 if (! res)
2639 return false; 2481 return false;
2640 2482
2641 return true; 2483 return true;
2649 vect_slp_analyze_operations (vec_info *vinfo) 2491 vect_slp_analyze_operations (vec_info *vinfo)
2650 { 2492 {
2651 slp_instance instance; 2493 slp_instance instance;
2652 int i; 2494 int i;
2653 2495
2654 if (dump_enabled_p ()) 2496 DUMP_VECT_SCOPE ("vect_slp_analyze_operations");
2655 dump_printf_loc (MSG_NOTE, vect_location, 2497
2656 "=== vect_slp_analyze_operations ===\n"); 2498 scalar_stmts_to_slp_tree_map_t *visited
2657 2499 = new scalar_stmts_to_slp_tree_map_t ();
2658 for (i = 0; vinfo->slp_instances.iterate (i, &instance); ) 2500 for (i = 0; vinfo->slp_instances.iterate (i, &instance); )
2659 { 2501 {
2502 scalar_stmts_to_slp_tree_map_t lvisited;
2503 stmt_vector_for_cost cost_vec;
2504 cost_vec.create (2);
2660 if (!vect_slp_analyze_node_operations (vinfo, 2505 if (!vect_slp_analyze_node_operations (vinfo,
2661 SLP_INSTANCE_TREE (instance), 2506 SLP_INSTANCE_TREE (instance),
2662 instance)) 2507 instance, visited, &lvisited,
2508 &cost_vec))
2663 { 2509 {
2510 slp_tree node = SLP_INSTANCE_TREE (instance);
2511 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2664 dump_printf_loc (MSG_NOTE, vect_location, 2512 dump_printf_loc (MSG_NOTE, vect_location,
2665 "removing SLP instance operations starting from: "); 2513 "removing SLP instance operations starting from: %G",
2666 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, 2514 stmt_info->stmt);
2667 SLP_TREE_SCALAR_STMTS 2515 vect_free_slp_instance (instance, false);
2668 (SLP_INSTANCE_TREE (instance))[0], 0);
2669 vect_free_slp_instance (instance);
2670 vinfo->slp_instances.ordered_remove (i); 2516 vinfo->slp_instances.ordered_remove (i);
2517 cost_vec.release ();
2671 } 2518 }
2672 else 2519 else
2673 { 2520 {
2674 /* Compute the costs of the SLP instance. */ 2521 for (scalar_stmts_to_slp_tree_map_t::iterator x = lvisited.begin();
2675 vect_analyze_slp_cost (instance, vinfo->target_cost_data); 2522 x != lvisited.end(); ++x)
2523 visited->put ((*x).first.copy (), (*x).second);
2676 i++; 2524 i++;
2677 } 2525
2678 } 2526 add_stmt_costs (vinfo->target_cost_data, &cost_vec);
2527 cost_vec.release ();
2528 }
2529 }
2530 delete visited;
2679 2531
2680 return !vinfo->slp_instances.is_empty (); 2532 return !vinfo->slp_instances.is_empty ();
2681 } 2533 }
2682 2534
2683 2535
2684 /* Compute the scalar cost of the SLP node NODE and its children 2536 /* Compute the scalar cost of the SLP node NODE and its children
2685 and return it. Do not account defs that are marked in LIFE and 2537 and return it. Do not account defs that are marked in LIFE and
2686 update LIFE according to uses of NODE. */ 2538 update LIFE according to uses of NODE. */
2687 2539
2688 static unsigned 2540 static void
2689 vect_bb_slp_scalar_cost (basic_block bb, 2541 vect_bb_slp_scalar_cost (basic_block bb,
2690 slp_tree node, vec<bool, va_heap> *life) 2542 slp_tree node, vec<bool, va_heap> *life,
2691 { 2543 stmt_vector_for_cost *cost_vec)
2692 unsigned scalar_cost = 0; 2544 {
2693 unsigned i; 2545 unsigned i;
2694 gimple *stmt; 2546 stmt_vec_info stmt_info;
2695 slp_tree child; 2547 slp_tree child;
2696 2548
2697 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt) 2549 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
2698 { 2550 {
2699 unsigned stmt_cost; 2551 gimple *stmt = stmt_info->stmt;
2552 vec_info *vinfo = stmt_info->vinfo;
2700 ssa_op_iter op_iter; 2553 ssa_op_iter op_iter;
2701 def_operand_p def_p; 2554 def_operand_p def_p;
2702 stmt_vec_info stmt_info;
2703 2555
2704 if ((*life)[i]) 2556 if ((*life)[i])
2705 continue; 2557 continue;
2706 2558
2707 /* If there is a non-vectorized use of the defs then the scalar 2559 /* If there is a non-vectorized use of the defs then the scalar
2712 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF) 2564 FOR_EACH_SSA_DEF_OPERAND (def_p, stmt, op_iter, SSA_OP_DEF)
2713 { 2565 {
2714 imm_use_iterator use_iter; 2566 imm_use_iterator use_iter;
2715 gimple *use_stmt; 2567 gimple *use_stmt;
2716 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p)) 2568 FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, DEF_FROM_PTR (def_p))
2717 if (!is_gimple_debug (use_stmt) 2569 if (!is_gimple_debug (use_stmt))
2718 && (! vect_stmt_in_region_p (vinfo_for_stmt (stmt)->vinfo,
2719 use_stmt)
2720 || ! PURE_SLP_STMT (vinfo_for_stmt (use_stmt))))
2721 { 2570 {
2722 (*life)[i] = true; 2571 stmt_vec_info use_stmt_info = vinfo->lookup_stmt (use_stmt);
2723 BREAK_FROM_IMM_USE_STMT (use_iter); 2572 if (!use_stmt_info || !PURE_SLP_STMT (use_stmt_info))
2573 {
2574 (*life)[i] = true;
2575 BREAK_FROM_IMM_USE_STMT (use_iter);
2576 }
2724 } 2577 }
2725 } 2578 }
2726 if ((*life)[i]) 2579 if ((*life)[i])
2727 continue; 2580 continue;
2728 2581
2729 /* Count scalar stmts only once. */ 2582 /* Count scalar stmts only once. */
2730 if (gimple_visited_p (stmt)) 2583 if (gimple_visited_p (stmt))
2731 continue; 2584 continue;
2732 gimple_set_visited (stmt, true); 2585 gimple_set_visited (stmt, true);
2733 2586
2734 stmt_info = vinfo_for_stmt (stmt); 2587 vect_cost_for_stmt kind;
2735 if (STMT_VINFO_DATA_REF (stmt_info)) 2588 if (STMT_VINFO_DATA_REF (stmt_info))
2736 { 2589 {
2737 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info))) 2590 if (DR_IS_READ (STMT_VINFO_DATA_REF (stmt_info)))
2738 stmt_cost = vect_get_stmt_cost (scalar_load); 2591 kind = scalar_load;
2739 else 2592 else
2740 stmt_cost = vect_get_stmt_cost (scalar_store); 2593 kind = scalar_store;
2741 } 2594 }
2742 else 2595 else
2743 stmt_cost = vect_get_stmt_cost (scalar_stmt); 2596 kind = scalar_stmt;
2744 2597 record_stmt_cost (cost_vec, 1, kind, stmt_info, 0, vect_body);
2745 scalar_cost += stmt_cost;
2746 } 2598 }
2747 2599
2748 auto_vec<bool, 20> subtree_life; 2600 auto_vec<bool, 20> subtree_life;
2749 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) 2601 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
2750 { 2602 {
2751 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def) 2603 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
2752 { 2604 {
2753 /* Do not directly pass LIFE to the recursive call, copy it to 2605 /* Do not directly pass LIFE to the recursive call, copy it to
2754 confine changes in the callee to the current child/subtree. */ 2606 confine changes in the callee to the current child/subtree. */
2755 subtree_life.safe_splice (*life); 2607 subtree_life.safe_splice (*life);
2756 scalar_cost += vect_bb_slp_scalar_cost (bb, child, &subtree_life); 2608 vect_bb_slp_scalar_cost (bb, child, &subtree_life, cost_vec);
2757 subtree_life.truncate (0); 2609 subtree_life.truncate (0);
2758 } 2610 }
2759 } 2611 }
2760
2761 return scalar_cost;
2762 } 2612 }
2763 2613
2764 /* Check if vectorization of the basic block is profitable. */ 2614 /* Check if vectorization of the basic block is profitable. */
2765 2615
2766 static bool 2616 static bool
2771 int i; 2621 int i;
2772 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0; 2622 unsigned int vec_inside_cost = 0, vec_outside_cost = 0, scalar_cost = 0;
2773 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0; 2623 unsigned int vec_prologue_cost = 0, vec_epilogue_cost = 0;
2774 2624
2775 /* Calculate scalar cost. */ 2625 /* Calculate scalar cost. */
2626 stmt_vector_for_cost scalar_costs;
2627 scalar_costs.create (0);
2776 FOR_EACH_VEC_ELT (slp_instances, i, instance) 2628 FOR_EACH_VEC_ELT (slp_instances, i, instance)
2777 { 2629 {
2778 auto_vec<bool, 20> life; 2630 auto_vec<bool, 20> life;
2779 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance)); 2631 life.safe_grow_cleared (SLP_INSTANCE_GROUP_SIZE (instance));
2780 scalar_cost += vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo), 2632 vect_bb_slp_scalar_cost (BB_VINFO_BB (bb_vinfo),
2781 SLP_INSTANCE_TREE (instance), 2633 SLP_INSTANCE_TREE (instance),
2782 &life); 2634 &life, &scalar_costs);
2783 } 2635 }
2636 void *target_cost_data = init_cost (NULL);
2637 add_stmt_costs (target_cost_data, &scalar_costs);
2638 scalar_costs.release ();
2639 unsigned dummy;
2640 finish_cost (target_cost_data, &dummy, &scalar_cost, &dummy);
2641 destroy_cost_data (target_cost_data);
2784 2642
2785 /* Unset visited flag. */ 2643 /* Unset visited flag. */
2786 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin; 2644 for (gimple_stmt_iterator gsi = bb_vinfo->region_begin;
2787 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi)) 2645 gsi_stmt (gsi) != gsi_stmt (bb_vinfo->region_end); gsi_next (&gsi))
2788 gimple_set_visited (gsi_stmt (gsi), false); 2646 gimple_set_visited (gsi_stmt (gsi), false);
2820 2678
2821 static bb_vec_info 2679 static bb_vec_info
2822 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin, 2680 vect_slp_analyze_bb_1 (gimple_stmt_iterator region_begin,
2823 gimple_stmt_iterator region_end, 2681 gimple_stmt_iterator region_end,
2824 vec<data_reference_p> datarefs, int n_stmts, 2682 vec<data_reference_p> datarefs, int n_stmts,
2825 bool &fatal) 2683 bool &fatal, vec_info_shared *shared)
2826 { 2684 {
2827 bb_vec_info bb_vinfo; 2685 bb_vec_info bb_vinfo;
2828 slp_instance instance; 2686 slp_instance instance;
2829 int i; 2687 int i;
2830 int min_vf = 2; 2688 poly_uint64 min_vf = 2;
2831 2689
2832 /* The first group of checks is independent of the vector size. */ 2690 /* The first group of checks is independent of the vector size. */
2833 fatal = true; 2691 fatal = true;
2834 2692
2835 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB)) 2693 if (n_stmts > PARAM_VALUE (PARAM_SLP_MAX_INSNS_IN_BB))
2840 "basic block.\n"); 2698 "basic block.\n");
2841 free_data_refs (datarefs); 2699 free_data_refs (datarefs);
2842 return NULL; 2700 return NULL;
2843 } 2701 }
2844 2702
2845 bb_vinfo = new _bb_vec_info (region_begin, region_end); 2703 bb_vinfo = new _bb_vec_info (region_begin, region_end, shared);
2846 if (!bb_vinfo) 2704 if (!bb_vinfo)
2847 return NULL; 2705 return NULL;
2848 2706
2849 BB_VINFO_DATAREFS (bb_vinfo) = datarefs; 2707 BB_VINFO_DATAREFS (bb_vinfo) = datarefs;
2708 bb_vinfo->shared->save_datarefs ();
2850 2709
2851 /* Analyze the data references. */ 2710 /* Analyze the data references. */
2852 2711
2853 if (!vect_analyze_data_refs (bb_vinfo, &min_vf)) 2712 if (!vect_analyze_data_refs (bb_vinfo, &min_vf))
2854 { 2713 {
2926 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); ) 2785 for (i = 0; BB_VINFO_SLP_INSTANCES (bb_vinfo).iterate (i, &instance); )
2927 { 2786 {
2928 if (! vect_slp_analyze_and_verify_instance_alignment (instance) 2787 if (! vect_slp_analyze_and_verify_instance_alignment (instance)
2929 || ! vect_slp_analyze_instance_dependence (instance)) 2788 || ! vect_slp_analyze_instance_dependence (instance))
2930 { 2789 {
2790 slp_tree node = SLP_INSTANCE_TREE (instance);
2791 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
2931 dump_printf_loc (MSG_NOTE, vect_location, 2792 dump_printf_loc (MSG_NOTE, vect_location,
2932 "removing SLP instance operations starting from: "); 2793 "removing SLP instance operations starting from: %G",
2933 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, 2794 stmt_info->stmt);
2934 SLP_TREE_SCALAR_STMTS 2795 vect_free_slp_instance (instance, false);
2935 (SLP_INSTANCE_TREE (instance))[0], 0);
2936 vect_free_slp_instance (instance);
2937 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i); 2796 BB_VINFO_SLP_INSTANCES (bb_vinfo).ordered_remove (i);
2938 continue; 2797 continue;
2939 } 2798 }
2940 2799
2941 /* Mark all the statements that we want to vectorize as pure SLP and 2800 /* Mark all the statements that we want to vectorize as pure SLP and
2988 bool 2847 bool
2989 vect_slp_bb (basic_block bb) 2848 vect_slp_bb (basic_block bb)
2990 { 2849 {
2991 bb_vec_info bb_vinfo; 2850 bb_vec_info bb_vinfo;
2992 gimple_stmt_iterator gsi; 2851 gimple_stmt_iterator gsi;
2993 unsigned int vector_sizes;
2994 bool any_vectorized = false; 2852 bool any_vectorized = false;
2995 2853 auto_vector_sizes vector_sizes;
2996 if (dump_enabled_p ()) 2854
2997 dump_printf_loc (MSG_NOTE, vect_location, "===vect_slp_analyze_bb===\n"); 2855 DUMP_VECT_SCOPE ("vect_slp_analyze_bb");
2998 2856
2999 /* Autodetect first vector size we try. */ 2857 /* Autodetect first vector size we try. */
3000 current_vector_size = 0; 2858 current_vector_size = 0;
3001 vector_sizes = targetm.vectorize.autovectorize_vector_sizes (); 2859 targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
2860 unsigned int next_size = 0;
3002 2861
3003 gsi = gsi_start_bb (bb); 2862 gsi = gsi_start_bb (bb);
3004 2863
2864 poly_uint64 autodetected_vector_size = 0;
3005 while (1) 2865 while (1)
3006 { 2866 {
3007 if (gsi_end_p (gsi)) 2867 if (gsi_end_p (gsi))
3008 break; 2868 break;
3009 2869
3017 if (is_gimple_debug (stmt)) 2877 if (is_gimple_debug (stmt))
3018 continue; 2878 continue;
3019 insns++; 2879 insns++;
3020 2880
3021 if (gimple_location (stmt) != UNKNOWN_LOCATION) 2881 if (gimple_location (stmt) != UNKNOWN_LOCATION)
3022 vect_location = gimple_location (stmt); 2882 vect_location = stmt;
3023 2883
3024 if (!find_data_references_in_stmt (NULL, stmt, &datarefs)) 2884 if (!vect_find_stmt_data_reference (NULL, stmt, &datarefs))
3025 break; 2885 break;
3026 } 2886 }
3027 2887
3028 /* Skip leading unhandled stmts. */ 2888 /* Skip leading unhandled stmts. */
3029 if (gsi_stmt (region_begin) == gsi_stmt (gsi)) 2889 if (gsi_stmt (region_begin) == gsi_stmt (gsi))
3034 2894
3035 gimple_stmt_iterator region_end = gsi; 2895 gimple_stmt_iterator region_end = gsi;
3036 2896
3037 bool vectorized = false; 2897 bool vectorized = false;
3038 bool fatal = false; 2898 bool fatal = false;
2899 vec_info_shared shared;
3039 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end, 2900 bb_vinfo = vect_slp_analyze_bb_1 (region_begin, region_end,
3040 datarefs, insns, fatal); 2901 datarefs, insns, fatal, &shared);
3041 if (bb_vinfo 2902 if (bb_vinfo
3042 && dbg_cnt (vect_slp)) 2903 && dbg_cnt (vect_slp))
3043 { 2904 {
3044 if (dump_enabled_p ()) 2905 if (dump_enabled_p ())
3045 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n"); 2906 dump_printf_loc (MSG_NOTE, vect_location, "SLPing BB part\n");
3046 2907
2908 bb_vinfo->shared->check_datarefs ();
3047 vect_schedule_slp (bb_vinfo); 2909 vect_schedule_slp (bb_vinfo);
3048 2910
3049 if (dump_enabled_p ()) 2911 unsigned HOST_WIDE_INT bytes;
3050 dump_printf_loc (MSG_NOTE, vect_location, 2912 if (current_vector_size.is_constant (&bytes))
3051 "basic block part vectorized\n"); 2913 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2914 "basic block part vectorized using %wu byte "
2915 "vectors\n", bytes);
2916 else
2917 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location,
2918 "basic block part vectorized using variable "
2919 "length vectors\n");
3052 2920
3053 vectorized = true; 2921 vectorized = true;
3054 } 2922 }
3055 delete bb_vinfo; 2923 delete bb_vinfo;
3056 2924
3057 any_vectorized |= vectorized; 2925 any_vectorized |= vectorized;
3058 2926
3059 vector_sizes &= ~current_vector_size; 2927 if (next_size == 0)
2928 autodetected_vector_size = current_vector_size;
2929
2930 if (next_size < vector_sizes.length ()
2931 && known_eq (vector_sizes[next_size], autodetected_vector_size))
2932 next_size += 1;
2933
3060 if (vectorized 2934 if (vectorized
3061 || vector_sizes == 0 2935 || next_size == vector_sizes.length ()
3062 || current_vector_size == 0 2936 || known_eq (current_vector_size, 0U)
3063 /* If vect_slp_analyze_bb_1 signaled that analysis for all 2937 /* If vect_slp_analyze_bb_1 signaled that analysis for all
3064 vector sizes will fail do not bother iterating. */ 2938 vector sizes will fail do not bother iterating. */
3065 || fatal) 2939 || fatal)
3066 { 2940 {
3067 if (gsi_end_p (region_end)) 2941 if (gsi_end_p (region_end))
3070 /* Skip the unhandled stmt. */ 2944 /* Skip the unhandled stmt. */
3071 gsi_next (&gsi); 2945 gsi_next (&gsi);
3072 2946
3073 /* And reset vector sizes. */ 2947 /* And reset vector sizes. */
3074 current_vector_size = 0; 2948 current_vector_size = 0;
3075 vector_sizes = targetm.vectorize.autovectorize_vector_sizes (); 2949 next_size = 0;
3076 } 2950 }
3077 else 2951 else
3078 { 2952 {
3079 /* Try the next biggest vector size. */ 2953 /* Try the next biggest vector size. */
3080 current_vector_size = 1 << floor_log2 (vector_sizes); 2954 current_vector_size = vector_sizes[next_size++];
3081 if (dump_enabled_p ()) 2955 if (dump_enabled_p ())
3082 dump_printf_loc (MSG_NOTE, vect_location, 2956 {
3083 "***** Re-trying analysis with " 2957 dump_printf_loc (MSG_NOTE, vect_location,
3084 "vector size %d\n", current_vector_size); 2958 "***** Re-trying analysis with "
2959 "vector size ");
2960 dump_dec (MSG_NOTE, current_vector_size);
2961 dump_printf (MSG_NOTE, "\n");
2962 }
3085 2963
3086 /* Start over. */ 2964 /* Start over. */
3087 gsi = region_begin; 2965 gsi = region_begin;
3088 } 2966 }
3089 } 2967 }
3091 return any_vectorized; 2969 return any_vectorized;
3092 } 2970 }
3093 2971
3094 2972
3095 /* Return 1 if vector type of boolean constant which is OPNUM 2973 /* Return 1 if vector type of boolean constant which is OPNUM
3096 operand in statement STMT is a boolean vector. */ 2974 operand in statement STMT_VINFO is a boolean vector. */
3097 2975
3098 static bool 2976 static bool
3099 vect_mask_constant_operand_p (gimple *stmt, int opnum) 2977 vect_mask_constant_operand_p (stmt_vec_info stmt_vinfo, int opnum)
3100 { 2978 {
3101 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); 2979 enum tree_code code = gimple_expr_code (stmt_vinfo->stmt);
3102 enum tree_code code = gimple_expr_code (stmt);
3103 tree op, vectype; 2980 tree op, vectype;
3104 gimple *def_stmt;
3105 enum vect_def_type dt; 2981 enum vect_def_type dt;
3106 2982
3107 /* For comparison and COND_EXPR type is chosen depending 2983 /* For comparison and COND_EXPR type is chosen depending
3108 on the other comparison operand. */ 2984 on the other comparison operand. */
3109 if (TREE_CODE_CLASS (code) == tcc_comparison) 2985 if (TREE_CODE_CLASS (code) == tcc_comparison)
3110 { 2986 {
2987 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
3111 if (opnum) 2988 if (opnum)
3112 op = gimple_assign_rhs1 (stmt); 2989 op = gimple_assign_rhs1 (stmt);
3113 else 2990 else
3114 op = gimple_assign_rhs2 (stmt); 2991 op = gimple_assign_rhs2 (stmt);
3115 2992
3116 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt, 2993 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
3117 &dt, &vectype))
3118 gcc_unreachable (); 2994 gcc_unreachable ();
3119 2995
3120 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype); 2996 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3121 } 2997 }
3122 2998
3123 if (code == COND_EXPR) 2999 if (code == COND_EXPR)
3124 { 3000 {
3001 gassign *stmt = as_a <gassign *> (stmt_vinfo->stmt);
3125 tree cond = gimple_assign_rhs1 (stmt); 3002 tree cond = gimple_assign_rhs1 (stmt);
3126 3003
3127 if (TREE_CODE (cond) == SSA_NAME) 3004 if (TREE_CODE (cond) == SSA_NAME)
3128 op = cond; 3005 op = cond;
3129 else if (opnum) 3006 else if (opnum)
3130 op = TREE_OPERAND (cond, 1); 3007 op = TREE_OPERAND (cond, 1);
3131 else 3008 else
3132 op = TREE_OPERAND (cond, 0); 3009 op = TREE_OPERAND (cond, 0);
3133 3010
3134 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &def_stmt, 3011 if (!vect_is_simple_use (op, stmt_vinfo->vinfo, &dt, &vectype))
3135 &dt, &vectype))
3136 gcc_unreachable (); 3012 gcc_unreachable ();
3137 3013
3138 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype); 3014 return !vectype || VECTOR_BOOLEAN_TYPE_P (vectype);
3139 } 3015 }
3140 3016
3141 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo)); 3017 return VECTOR_BOOLEAN_TYPE_P (STMT_VINFO_VECTYPE (stmt_vinfo));
3018 }
3019
3020 /* Build a variable-length vector in which the elements in ELTS are repeated
3021 to a fill NRESULTS vectors of type VECTOR_TYPE. Store the vectors in
3022 RESULTS and add any new instructions to SEQ.
3023
3024 The approach we use is:
3025
3026 (1) Find a vector mode VM with integer elements of mode IM.
3027
3028 (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3029 ELTS' has mode IM. This involves creating NELTS' VIEW_CONVERT_EXPRs
3030 from small vectors to IM.
3031
3032 (3) Duplicate each ELTS'[I] into a vector of mode VM.
3033
3034 (4) Use a tree of interleaving VEC_PERM_EXPRs to create VMs with the
3035 correct byte contents.
3036
3037 (5) Use VIEW_CONVERT_EXPR to cast the final VMs to the required type.
3038
3039 We try to find the largest IM for which this sequence works, in order
3040 to cut down on the number of interleaves. */
3041
3042 void
3043 duplicate_and_interleave (gimple_seq *seq, tree vector_type, vec<tree> elts,
3044 unsigned int nresults, vec<tree> &results)
3045 {
3046 unsigned int nelts = elts.length ();
3047 tree element_type = TREE_TYPE (vector_type);
3048
3049 /* (1) Find a vector mode VM with integer elements of mode IM. */
3050 unsigned int nvectors = 1;
3051 tree new_vector_type;
3052 tree permutes[2];
3053 if (!can_duplicate_and_interleave_p (nelts, TYPE_MODE (element_type),
3054 &nvectors, &new_vector_type,
3055 permutes))
3056 gcc_unreachable ();
3057
3058 /* Get a vector type that holds ELTS[0:NELTS/NELTS']. */
3059 unsigned int partial_nelts = nelts / nvectors;
3060 tree partial_vector_type = build_vector_type (element_type, partial_nelts);
3061
3062 tree_vector_builder partial_elts;
3063 auto_vec<tree, 32> pieces (nvectors * 2);
3064 pieces.quick_grow (nvectors * 2);
3065 for (unsigned int i = 0; i < nvectors; ++i)
3066 {
3067 /* (2) Replace ELTS[0:NELTS] with ELTS'[0:NELTS'], where each element of
3068 ELTS' has mode IM. */
3069 partial_elts.new_vector (partial_vector_type, partial_nelts, 1);
3070 for (unsigned int j = 0; j < partial_nelts; ++j)
3071 partial_elts.quick_push (elts[i * partial_nelts + j]);
3072 tree t = gimple_build_vector (seq, &partial_elts);
3073 t = gimple_build (seq, VIEW_CONVERT_EXPR,
3074 TREE_TYPE (new_vector_type), t);
3075
3076 /* (3) Duplicate each ELTS'[I] into a vector of mode VM. */
3077 pieces[i] = gimple_build_vector_from_val (seq, new_vector_type, t);
3078 }
3079
3080 /* (4) Use a tree of VEC_PERM_EXPRs to create a single VM with the
3081 correct byte contents.
3082
3083 We need to repeat the following operation log2(nvectors) times:
3084
3085 out[i * 2] = VEC_PERM_EXPR (in[i], in[i + hi_start], lo_permute);
3086 out[i * 2 + 1] = VEC_PERM_EXPR (in[i], in[i + hi_start], hi_permute);
3087
3088 However, if each input repeats every N elements and the VF is
3089 a multiple of N * 2, the HI result is the same as the LO. */
3090 unsigned int in_start = 0;
3091 unsigned int out_start = nvectors;
3092 unsigned int hi_start = nvectors / 2;
3093 /* A bound on the number of outputs needed to produce NRESULTS results
3094 in the final iteration. */
3095 unsigned int noutputs_bound = nvectors * nresults;
3096 for (unsigned int in_repeat = 1; in_repeat < nvectors; in_repeat *= 2)
3097 {
3098 noutputs_bound /= 2;
3099 unsigned int limit = MIN (noutputs_bound, nvectors);
3100 for (unsigned int i = 0; i < limit; ++i)
3101 {
3102 if ((i & 1) != 0
3103 && multiple_p (TYPE_VECTOR_SUBPARTS (new_vector_type),
3104 2 * in_repeat))
3105 {
3106 pieces[out_start + i] = pieces[out_start + i - 1];
3107 continue;
3108 }
3109
3110 tree output = make_ssa_name (new_vector_type);
3111 tree input1 = pieces[in_start + (i / 2)];
3112 tree input2 = pieces[in_start + (i / 2) + hi_start];
3113 gassign *stmt = gimple_build_assign (output, VEC_PERM_EXPR,
3114 input1, input2,
3115 permutes[i & 1]);
3116 gimple_seq_add_stmt (seq, stmt);
3117 pieces[out_start + i] = output;
3118 }
3119 std::swap (in_start, out_start);
3120 }
3121
3122 /* (5) Use VIEW_CONVERT_EXPR to cast the final VM to the required type. */
3123 results.reserve (nresults);
3124 for (unsigned int i = 0; i < nresults; ++i)
3125 if (i < nvectors)
3126 results.quick_push (gimple_build (seq, VIEW_CONVERT_EXPR, vector_type,
3127 pieces[in_start + i]));
3128 else
3129 results.quick_push (results[i - nvectors]);
3142 } 3130 }
3143 3131
3144 3132
3145 /* For constant and loop invariant defs of SLP_NODE this function returns 3133 /* For constant and loop invariant defs of SLP_NODE this function returns
3146 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts. 3134 (vector) defs (VEC_OPRNDS) that will be used in the vectorized stmts.
3152 static void 3140 static void
3153 vect_get_constant_vectors (tree op, slp_tree slp_node, 3141 vect_get_constant_vectors (tree op, slp_tree slp_node,
3154 vec<tree> *vec_oprnds, 3142 vec<tree> *vec_oprnds,
3155 unsigned int op_num, unsigned int number_of_vectors) 3143 unsigned int op_num, unsigned int number_of_vectors)
3156 { 3144 {
3157 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node); 3145 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
3158 gimple *stmt = stmts[0]; 3146 stmt_vec_info stmt_vinfo = stmts[0];
3159 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); 3147 gimple *stmt = stmt_vinfo->stmt;
3160 unsigned nunits; 3148 unsigned HOST_WIDE_INT nunits;
3161 tree vec_cst; 3149 tree vec_cst;
3162 unsigned j, number_of_places_left_in_vector; 3150 unsigned j, number_of_places_left_in_vector;
3163 tree vector_type; 3151 tree vector_type;
3164 tree vop; 3152 tree vop;
3165 int group_size = stmts.length (); 3153 int group_size = stmts.length ();
3169 voprnds.create (number_of_vectors); 3157 voprnds.create (number_of_vectors);
3170 bool constant_p, is_store; 3158 bool constant_p, is_store;
3171 tree neutral_op = NULL; 3159 tree neutral_op = NULL;
3172 enum tree_code code = gimple_expr_code (stmt); 3160 enum tree_code code = gimple_expr_code (stmt);
3173 gimple_seq ctor_seq = NULL; 3161 gimple_seq ctor_seq = NULL;
3162 auto_vec<tree, 16> permute_results;
3174 3163
3175 /* Check if vector type is a boolean vector. */ 3164 /* Check if vector type is a boolean vector. */
3176 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op)) 3165 if (VECT_SCALAR_BOOLEAN_TYPE_P (TREE_TYPE (op))
3177 && vect_mask_constant_operand_p (stmt, op_num)) 3166 && vect_mask_constant_operand_p (stmt_vinfo, op_num))
3178 vector_type 3167 vector_type
3179 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo)); 3168 = build_same_sized_truth_vector_type (STMT_VINFO_VECTYPE (stmt_vinfo));
3180 else 3169 else
3181 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op)); 3170 vector_type = get_vectype_for_scalar_type (TREE_TYPE (op));
3182 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
3183 3171
3184 if (STMT_VINFO_DATA_REF (stmt_vinfo)) 3172 if (STMT_VINFO_DATA_REF (stmt_vinfo))
3185 { 3173 {
3186 is_store = true; 3174 is_store = true;
3187 op = gimple_assign_rhs1 (stmt); 3175 op = gimple_assign_rhs1 (stmt);
3205 3193
3206 For example, NUNITS is four as before, and the group size is 8 3194 For example, NUNITS is four as before, and the group size is 8
3207 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and 3195 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
3208 {s5, s6, s7, s8}. */ 3196 {s5, s6, s7, s8}. */
3209 3197
3198 /* When using duplicate_and_interleave, we just need one element for
3199 each scalar statement. */
3200 if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
3201 nunits = group_size;
3202
3210 number_of_copies = nunits * number_of_vectors / group_size; 3203 number_of_copies = nunits * number_of_vectors / group_size;
3211 3204
3212 number_of_places_left_in_vector = nunits; 3205 number_of_places_left_in_vector = nunits;
3213 constant_p = true; 3206 constant_p = true;
3214 auto_vec<tree, 32> elts (nunits); 3207 tree_vector_builder elts (vector_type, nunits, 1);
3215 elts.quick_grow (nunits); 3208 elts.quick_grow (nunits);
3216 bool place_after_defs = false; 3209 bool place_after_defs = false;
3217 for (j = 0; j < number_of_copies; j++) 3210 for (j = 0; j < number_of_copies; j++)
3218 { 3211 {
3219 for (i = group_size - 1; stmts.iterate (i, &stmt); i--) 3212 for (i = group_size - 1; stmts.iterate (i, &stmt_vinfo); i--)
3220 { 3213 {
3214 stmt = stmt_vinfo->stmt;
3221 if (is_store) 3215 if (is_store)
3222 op = gimple_assign_rhs1 (stmt); 3216 op = gimple_assign_rhs1 (stmt);
3223 else 3217 else
3224 { 3218 {
3225 switch (code) 3219 switch (code)
3326 == gimple_bb (SSA_NAME_DEF_STMT (orig_op)))) 3320 == gimple_bb (SSA_NAME_DEF_STMT (orig_op))))
3327 place_after_defs = true; 3321 place_after_defs = true;
3328 3322
3329 if (number_of_places_left_in_vector == 0) 3323 if (number_of_places_left_in_vector == 0)
3330 { 3324 {
3331 if (constant_p) 3325 if (constant_p
3332 vec_cst = build_vector (vector_type, elts); 3326 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
3327 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
3328 vec_cst = gimple_build_vector (&ctor_seq, &elts);
3333 else 3329 else
3334 { 3330 {
3335 vec<constructor_elt, va_gc> *v; 3331 if (vec_oprnds->is_empty ())
3336 unsigned k; 3332 duplicate_and_interleave (&ctor_seq, vector_type, elts,
3337 vec_alloc (v, nunits); 3333 number_of_vectors,
3338 for (k = 0; k < nunits; ++k) 3334 permute_results);
3339 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, elts[k]); 3335 vec_cst = permute_results[number_of_vectors - j - 1];
3340 vec_cst = build_constructor (vector_type, v);
3341 } 3336 }
3342 tree init; 3337 tree init;
3343 gimple_stmt_iterator gsi; 3338 gimple_stmt_iterator gsi;
3344 if (place_after_defs) 3339 if (place_after_defs)
3345 { 3340 {
3346 gsi = gsi_for_stmt 3341 stmt_vec_info last_stmt_info
3347 (vect_find_last_scalar_stmt_in_slp (slp_node)); 3342 = vect_find_last_scalar_stmt_in_slp (slp_node);
3348 init = vect_init_vector (stmt, vec_cst, vector_type, &gsi); 3343 gsi = gsi_for_stmt (last_stmt_info->stmt);
3344 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3345 &gsi);
3349 } 3346 }
3350 else 3347 else
3351 init = vect_init_vector (stmt, vec_cst, vector_type, NULL); 3348 init = vect_init_vector (stmt_vinfo, vec_cst, vector_type,
3349 NULL);
3352 if (ctor_seq != NULL) 3350 if (ctor_seq != NULL)
3353 { 3351 {
3354 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init)); 3352 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (init));
3355 gsi_insert_seq_before_without_update (&gsi, ctor_seq, 3353 gsi_insert_seq_before (&gsi, ctor_seq, GSI_SAME_STMT);
3356 GSI_SAME_STMT);
3357 ctor_seq = NULL; 3354 ctor_seq = NULL;
3358 } 3355 }
3359 voprnds.quick_push (init); 3356 voprnds.quick_push (init);
3360 place_after_defs = false; 3357 place_after_defs = false;
3361 number_of_places_left_in_vector = nunits; 3358 number_of_places_left_in_vector = nunits;
3362 constant_p = true; 3359 constant_p = true;
3360 elts.new_vector (vector_type, nunits, 1);
3361 elts.quick_grow (nunits);
3363 } 3362 }
3364 } 3363 }
3365 } 3364 }
3366 3365
3367 /* Since the vectors are created in the reverse order, we should invert 3366 /* Since the vectors are created in the reverse order, we should invert
3404 3403
3405 static void 3404 static void
3406 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds) 3405 vect_get_slp_vect_defs (slp_tree slp_node, vec<tree> *vec_oprnds)
3407 { 3406 {
3408 tree vec_oprnd; 3407 tree vec_oprnd;
3409 gimple *vec_def_stmt; 3408 stmt_vec_info vec_def_stmt_info;
3410 unsigned int i; 3409 unsigned int i;
3411 3410
3412 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ()); 3411 gcc_assert (SLP_TREE_VEC_STMTS (slp_node).exists ());
3413 3412
3414 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt) 3413 FOR_EACH_VEC_ELT (SLP_TREE_VEC_STMTS (slp_node), i, vec_def_stmt_info)
3415 { 3414 {
3416 gcc_assert (vec_def_stmt); 3415 gcc_assert (vec_def_stmt_info);
3417 if (gimple_code (vec_def_stmt) == GIMPLE_PHI) 3416 if (gphi *vec_def_phi = dyn_cast <gphi *> (vec_def_stmt_info->stmt))
3418 vec_oprnd = gimple_phi_result (vec_def_stmt); 3417 vec_oprnd = gimple_phi_result (vec_def_phi);
3419 else 3418 else
3420 vec_oprnd = gimple_get_lhs (vec_def_stmt); 3419 vec_oprnd = gimple_get_lhs (vec_def_stmt_info->stmt);
3421 vec_oprnds->quick_push (vec_oprnd); 3420 vec_oprnds->quick_push (vec_oprnd);
3422 } 3421 }
3423 } 3422 }
3424 3423
3425 3424
3432 3431
3433 void 3432 void
3434 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node, 3433 vect_get_slp_defs (vec<tree> ops, slp_tree slp_node,
3435 vec<vec<tree> > *vec_oprnds) 3434 vec<vec<tree> > *vec_oprnds)
3436 { 3435 {
3437 gimple *first_stmt;
3438 int number_of_vects = 0, i; 3436 int number_of_vects = 0, i;
3439 unsigned int child_index = 0; 3437 unsigned int child_index = 0;
3440 HOST_WIDE_INT lhs_size_unit, rhs_size_unit; 3438 HOST_WIDE_INT lhs_size_unit, rhs_size_unit;
3441 slp_tree child = NULL; 3439 slp_tree child = NULL;
3442 vec<tree> vec_defs; 3440 vec<tree> vec_defs;
3443 tree oprnd; 3441 tree oprnd;
3444 bool vectorized_defs; 3442 bool vectorized_defs;
3445 3443
3446 first_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[0]; 3444 stmt_vec_info first_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[0];
3447 FOR_EACH_VEC_ELT (ops, i, oprnd) 3445 FOR_EACH_VEC_ELT (ops, i, oprnd)
3448 { 3446 {
3449 /* For each operand we check if it has vectorized definitions in a child 3447 /* For each operand we check if it has vectorized definitions in a child
3450 node or we need to create them (for invariants and constants). We 3448 node or we need to create them (for invariants and constants). We
3451 check if the LHS of the first stmt of the next child matches OPRND. 3449 check if the LHS of the first stmt of the next child matches OPRND.
3458 child = SLP_TREE_CHILDREN (slp_node)[child_index]; 3456 child = SLP_TREE_CHILDREN (slp_node)[child_index];
3459 3457
3460 /* We have to check both pattern and original def, if available. */ 3458 /* We have to check both pattern and original def, if available. */
3461 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def) 3459 if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
3462 { 3460 {
3463 gimple *first_def = SLP_TREE_SCALAR_STMTS (child)[0]; 3461 stmt_vec_info first_def_info = SLP_TREE_SCALAR_STMTS (child)[0];
3464 gimple *related 3462 stmt_vec_info related = STMT_VINFO_RELATED_STMT (first_def_info);
3465 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first_def));
3466 tree first_def_op; 3463 tree first_def_op;
3467 3464
3468 if (gimple_code (first_def) == GIMPLE_PHI) 3465 if (gphi *first_def = dyn_cast <gphi *> (first_def_info->stmt))
3469 first_def_op = gimple_phi_result (first_def); 3466 first_def_op = gimple_phi_result (first_def);
3470 else 3467 else
3471 first_def_op = gimple_get_lhs (first_def); 3468 first_def_op = gimple_get_lhs (first_def_info->stmt);
3472 if (operand_equal_p (oprnd, first_def_op, 0) 3469 if (operand_equal_p (oprnd, first_def_op, 0)
3473 || (related 3470 || (related
3474 && operand_equal_p (oprnd, gimple_get_lhs (related), 0))) 3471 && operand_equal_p (oprnd,
3472 gimple_get_lhs (related->stmt), 0)))
3475 { 3473 {
3476 /* The number of vector defs is determined by the number of 3474 /* The number of vector defs is determined by the number of
3477 vector statements in the node from which we get those 3475 vector statements in the node from which we get those
3478 statements. */ 3476 statements. */
3479 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child); 3477 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (child);
3492 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); 3490 number_of_vects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
3493 /* Number of vector stmts was calculated according to LHS in 3491 /* Number of vector stmts was calculated according to LHS in
3494 vect_schedule_slp_instance (), fix it by replacing LHS with 3492 vect_schedule_slp_instance (), fix it by replacing LHS with
3495 RHS, if necessary. See vect_get_smallest_scalar_type () for 3493 RHS, if necessary. See vect_get_smallest_scalar_type () for
3496 details. */ 3494 details. */
3497 vect_get_smallest_scalar_type (first_stmt, &lhs_size_unit, 3495 vect_get_smallest_scalar_type (first_stmt_info, &lhs_size_unit,
3498 &rhs_size_unit); 3496 &rhs_size_unit);
3499 if (rhs_size_unit != lhs_size_unit) 3497 if (rhs_size_unit != lhs_size_unit)
3500 { 3498 {
3501 number_of_vects *= rhs_size_unit; 3499 number_of_vects *= rhs_size_unit;
3502 number_of_vects /= lhs_size_unit; 3500 number_of_vects /= lhs_size_unit;
3503 } 3501 }
3527 permute statements for the SLP node NODE of the SLP instance 3525 permute statements for the SLP node NODE of the SLP instance
3528 SLP_NODE_INSTANCE. */ 3526 SLP_NODE_INSTANCE. */
3529 3527
3530 bool 3528 bool
3531 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain, 3529 vect_transform_slp_perm_load (slp_tree node, vec<tree> dr_chain,
3532 gimple_stmt_iterator *gsi, int vf, 3530 gimple_stmt_iterator *gsi, poly_uint64 vf,
3533 slp_instance slp_node_instance, bool analyze_only, 3531 slp_instance slp_node_instance, bool analyze_only,
3534 unsigned *n_perms) 3532 unsigned *n_perms)
3535 { 3533 {
3536 gimple *stmt = SLP_TREE_SCALAR_STMTS (node)[0]; 3534 stmt_vec_info stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3537 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 3535 vec_info *vinfo = stmt_info->vinfo;
3538 tree mask_element_type = NULL_TREE, mask_type; 3536 int vec_index = 0;
3539 int nunits, vec_index = 0;
3540 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 3537 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
3541 int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance); 3538 unsigned int group_size = SLP_INSTANCE_GROUP_SIZE (slp_node_instance);
3542 int mask_element; 3539 unsigned int mask_element;
3543 machine_mode mode; 3540 machine_mode mode;
3544 3541
3545 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)) 3542 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info))
3546 return false; 3543 return false;
3547 3544
3548 stmt_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (stmt_info)); 3545 stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
3549 3546
3550 mode = TYPE_MODE (vectype); 3547 mode = TYPE_MODE (vectype);
3551 3548 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3552 /* The generic VEC_PERM_EXPR code always uses an integral type of the
3553 same size as the vector element being permuted. */
3554 mask_element_type = lang_hooks.types.type_for_mode
3555 (int_mode_for_mode (TYPE_MODE (TREE_TYPE (vectype))).require (), 1);
3556 mask_type = get_vectype_for_scalar_type (mask_element_type);
3557 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3558 auto_vec_perm_indices mask (nunits);
3559 mask.quick_grow (nunits);
3560 3549
3561 /* Initialize the vect stmts of NODE to properly insert the generated 3550 /* Initialize the vect stmts of NODE to properly insert the generated
3562 stmts later. */ 3551 stmts later. */
3563 if (! analyze_only) 3552 if (! analyze_only)
3564 for (unsigned i = SLP_TREE_VEC_STMTS (node).length (); 3553 for (unsigned i = SLP_TREE_VEC_STMTS (node).length ();
3582 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation 3571 vectors: {a0,b0,c0,a1} and {b1,c1,a2,b2}, and for the last permutation
3583 we need the second and the third vectors: {b1,c1,a2,b2} and 3572 we need the second and the third vectors: {b1,c1,a2,b2} and
3584 {c2,a3,b3,c3}. */ 3573 {c2,a3,b3,c3}. */
3585 3574
3586 int vect_stmts_counter = 0; 3575 int vect_stmts_counter = 0;
3587 int index = 0; 3576 unsigned int index = 0;
3588 int first_vec_index = -1; 3577 int first_vec_index = -1;
3589 int second_vec_index = -1; 3578 int second_vec_index = -1;
3590 bool noop_p = true; 3579 bool noop_p = true;
3591 *n_perms = 0; 3580 *n_perms = 0;
3592 3581
3593 for (int j = 0; j < vf; j++) 3582 vec_perm_builder mask;
3594 { 3583 unsigned int nelts_to_build;
3595 for (int k = 0; k < group_size; k++) 3584 unsigned int nvectors_per_build;
3596 { 3585 bool repeating_p = (group_size == DR_GROUP_SIZE (stmt_info)
3597 int i = (SLP_TREE_LOAD_PERMUTATION (node)[k] 3586 && multiple_p (nunits, group_size));
3598 + j * STMT_VINFO_GROUP_SIZE (stmt_info)); 3587 if (repeating_p)
3599 vec_index = i / nunits; 3588 {
3600 mask_element = i % nunits; 3589 /* A single vector contains a whole number of copies of the node, so:
3590 (a) all permutes can use the same mask; and
3591 (b) the permutes only need a single vector input. */
3592 mask.new_vector (nunits, group_size, 3);
3593 nelts_to_build = mask.encoded_nelts ();
3594 nvectors_per_build = SLP_TREE_VEC_STMTS (node).length ();
3595 }
3596 else
3597 {
3598 /* We need to construct a separate mask for each vector statement. */
3599 unsigned HOST_WIDE_INT const_nunits, const_vf;
3600 if (!nunits.is_constant (&const_nunits)
3601 || !vf.is_constant (&const_vf))
3602 return false;
3603 mask.new_vector (const_nunits, const_nunits, 1);
3604 nelts_to_build = const_vf * group_size;
3605 nvectors_per_build = 1;
3606 }
3607
3608 unsigned int count = mask.encoded_nelts ();
3609 mask.quick_grow (count);
3610 vec_perm_indices indices;
3611
3612 for (unsigned int j = 0; j < nelts_to_build; j++)
3613 {
3614 unsigned int iter_num = j / group_size;
3615 unsigned int stmt_num = j % group_size;
3616 unsigned int i = (iter_num * DR_GROUP_SIZE (stmt_info)
3617 + SLP_TREE_LOAD_PERMUTATION (node)[stmt_num]);
3618 if (repeating_p)
3619 {
3620 first_vec_index = 0;
3621 mask_element = i;
3622 }
3623 else
3624 {
3625 /* Enforced before the loop when !repeating_p. */
3626 unsigned int const_nunits = nunits.to_constant ();
3627 vec_index = i / const_nunits;
3628 mask_element = i % const_nunits;
3601 if (vec_index == first_vec_index 3629 if (vec_index == first_vec_index
3602 || first_vec_index == -1) 3630 || first_vec_index == -1)
3603 { 3631 {
3604 first_vec_index = vec_index; 3632 first_vec_index = vec_index;
3605 } 3633 }
3606 else if (vec_index == second_vec_index 3634 else if (vec_index == second_vec_index
3607 || second_vec_index == -1) 3635 || second_vec_index == -1)
3608 { 3636 {
3609 second_vec_index = vec_index; 3637 second_vec_index = vec_index;
3610 mask_element += nunits; 3638 mask_element += const_nunits;
3611 } 3639 }
3612 else 3640 else
3613 { 3641 {
3614 if (dump_enabled_p ()) 3642 if (dump_enabled_p ())
3643 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3644 "permutation requires at "
3645 "least three vectors %G",
3646 stmt_info->stmt);
3647 gcc_assert (analyze_only);
3648 return false;
3649 }
3650
3651 gcc_assert (mask_element < 2 * const_nunits);
3652 }
3653
3654 if (mask_element != index)
3655 noop_p = false;
3656 mask[index++] = mask_element;
3657
3658 if (index == count && !noop_p)
3659 {
3660 indices.new_vector (mask, second_vec_index == -1 ? 1 : 2, nunits);
3661 if (!can_vec_perm_const_p (mode, indices))
3662 {
3663 if (dump_enabled_p ())
3615 { 3664 {
3616 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3665 dump_printf_loc (MSG_MISSED_OPTIMIZATION,
3617 "permutation requires at " 3666 vect_location,
3618 "least three vectors "); 3667 "unsupported vect permute { ");
3619 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, 3668 for (i = 0; i < count; ++i)
3620 stmt, 0); 3669 {
3670 dump_dec (MSG_MISSED_OPTIMIZATION, mask[i]);
3671 dump_printf (MSG_MISSED_OPTIMIZATION, " ");
3672 }
3673 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3621 } 3674 }
3622 gcc_assert (analyze_only); 3675 gcc_assert (analyze_only);
3623 return false; 3676 return false;
3624 } 3677 }
3625 3678
3626 gcc_assert (mask_element >= 0 3679 ++*n_perms;
3627 && mask_element < 2 * nunits); 3680 }
3628 if (mask_element != index) 3681
3629 noop_p = false; 3682 if (index == count)
3630 mask[index++] = mask_element; 3683 {
3631 3684 if (!analyze_only)
3632 if (index == nunits)
3633 { 3685 {
3634 if (! noop_p 3686 tree mask_vec = NULL_TREE;
3635 && ! can_vec_perm_p (mode, false, &mask)) 3687
3688 if (! noop_p)
3689 mask_vec = vect_gen_perm_mask_checked (vectype, indices);
3690
3691 if (second_vec_index == -1)
3692 second_vec_index = first_vec_index;
3693
3694 for (unsigned int ri = 0; ri < nvectors_per_build; ++ri)
3636 { 3695 {
3637 if (dump_enabled_p ()) 3696 /* Generate the permute statement if necessary. */
3638 { 3697 tree first_vec = dr_chain[first_vec_index + ri];
3639 dump_printf_loc (MSG_MISSED_OPTIMIZATION, 3698 tree second_vec = dr_chain[second_vec_index + ri];
3640 vect_location, 3699 stmt_vec_info perm_stmt_info;
3641 "unsupported vect permute { ");
3642 for (i = 0; i < nunits; ++i)
3643 dump_printf (MSG_MISSED_OPTIMIZATION, "%d ", mask[i]);
3644 dump_printf (MSG_MISSED_OPTIMIZATION, "}\n");
3645 }
3646 gcc_assert (analyze_only);
3647 return false;
3648 }
3649
3650 if (! noop_p)
3651 ++*n_perms;
3652
3653 if (!analyze_only)
3654 {
3655 tree mask_vec = NULL_TREE;
3656
3657 if (! noop_p) 3700 if (! noop_p)
3658 { 3701 {
3659 auto_vec<tree, 32> mask_elts (nunits); 3702 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
3660 for (int l = 0; l < nunits; ++l)
3661 mask_elts.quick_push (build_int_cst (mask_element_type,
3662 mask[l]));
3663 mask_vec = build_vector (mask_type, mask_elts);
3664 }
3665
3666 if (second_vec_index == -1)
3667 second_vec_index = first_vec_index;
3668
3669 /* Generate the permute statement if necessary. */
3670 tree first_vec = dr_chain[first_vec_index];
3671 tree second_vec = dr_chain[second_vec_index];
3672 gimple *perm_stmt;
3673 if (! noop_p)
3674 {
3675 tree perm_dest 3703 tree perm_dest
3676 = vect_create_destination_var (gimple_assign_lhs (stmt), 3704 = vect_create_destination_var (gimple_assign_lhs (stmt),
3677 vectype); 3705 vectype);
3678 perm_dest = make_ssa_name (perm_dest); 3706 perm_dest = make_ssa_name (perm_dest);
3679 perm_stmt = gimple_build_assign (perm_dest, 3707 gassign *perm_stmt
3680 VEC_PERM_EXPR, 3708 = gimple_build_assign (perm_dest, VEC_PERM_EXPR,
3681 first_vec, second_vec, 3709 first_vec, second_vec,
3682 mask_vec); 3710 mask_vec);
3683 vect_finish_stmt_generation (stmt, perm_stmt, gsi); 3711 perm_stmt_info
3712 = vect_finish_stmt_generation (stmt_info, perm_stmt,
3713 gsi);
3684 } 3714 }
3685 else 3715 else
3686 /* If mask was NULL_TREE generate the requested 3716 /* If mask was NULL_TREE generate the requested
3687 identity transform. */ 3717 identity transform. */
3688 perm_stmt = SSA_NAME_DEF_STMT (first_vec); 3718 perm_stmt_info = vinfo->lookup_def (first_vec);
3689 3719
3690 /* Store the vector statement in NODE. */ 3720 /* Store the vector statement in NODE. */
3691 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++] = perm_stmt; 3721 SLP_TREE_VEC_STMTS (node)[vect_stmts_counter++]
3722 = perm_stmt_info;
3692 } 3723 }
3693
3694 index = 0;
3695 first_vec_index = -1;
3696 second_vec_index = -1;
3697 noop_p = true;
3698 } 3724 }
3725
3726 index = 0;
3727 first_vec_index = -1;
3728 second_vec_index = -1;
3729 noop_p = true;
3699 } 3730 }
3700 } 3731 }
3701 3732
3702 return true; 3733 return true;
3703 } 3734 }
3704 3735
3705
3706
3707 /* Vectorize SLP instance tree in postorder. */ 3736 /* Vectorize SLP instance tree in postorder. */
3708 3737
3709 static bool 3738 static void
3710 vect_schedule_slp_instance (slp_tree node, slp_instance instance) 3739 vect_schedule_slp_instance (slp_tree node, slp_instance instance,
3711 { 3740 scalar_stmts_to_slp_tree_map_t *bst_map)
3712 gimple *stmt; 3741 {
3713 bool grouped_store, is_store;
3714 gimple_stmt_iterator si; 3742 gimple_stmt_iterator si;
3715 stmt_vec_info stmt_info; 3743 stmt_vec_info stmt_info;
3716 unsigned int group_size; 3744 unsigned int group_size;
3717 tree vectype; 3745 tree vectype;
3718 int i, j; 3746 int i, j;
3719 slp_tree child; 3747 slp_tree child;
3720 3748
3721 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def) 3749 if (SLP_TREE_DEF_TYPE (node) != vect_internal_def)
3722 return false; 3750 return;
3723 3751
3752 /* See if we have already vectorized the same set of stmts and reuse their
3753 vectorized stmts. */
3754 if (slp_tree *leader = bst_map->get (SLP_TREE_SCALAR_STMTS (node)))
3755 {
3756 SLP_TREE_VEC_STMTS (node).safe_splice (SLP_TREE_VEC_STMTS (*leader));
3757 return;
3758 }
3759
3760 bst_map->put (SLP_TREE_SCALAR_STMTS (node).copy (), node);
3724 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) 3761 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3725 vect_schedule_slp_instance (child, instance); 3762 vect_schedule_slp_instance (child, instance, bst_map);
3726 3763
3727 /* Push SLP node def-type to stmts. */ 3764 /* Push SLP node def-type to stmts. */
3728 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) 3765 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3729 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def) 3766 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3730 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt) 3767 {
3731 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = SLP_TREE_DEF_TYPE (child); 3768 stmt_vec_info child_stmt_info;
3732 3769 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
3733 stmt = SLP_TREE_SCALAR_STMTS (node)[0]; 3770 STMT_VINFO_DEF_TYPE (child_stmt_info) = SLP_TREE_DEF_TYPE (child);
3734 stmt_info = vinfo_for_stmt (stmt); 3771 }
3772
3773 stmt_info = SLP_TREE_SCALAR_STMTS (node)[0];
3735 3774
3736 /* VECTYPE is the type of the destination. */ 3775 /* VECTYPE is the type of the destination. */
3737 vectype = STMT_VINFO_VECTYPE (stmt_info); 3776 vectype = STMT_VINFO_VECTYPE (stmt_info);
3777 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3738 group_size = SLP_INSTANCE_GROUP_SIZE (instance); 3778 group_size = SLP_INSTANCE_GROUP_SIZE (instance);
3739 3779
3780 gcc_assert (SLP_TREE_NUMBER_OF_VEC_STMTS (node) != 0);
3740 if (!SLP_TREE_VEC_STMTS (node).exists ()) 3781 if (!SLP_TREE_VEC_STMTS (node).exists ())
3741 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node)); 3782 SLP_TREE_VEC_STMTS (node).create (SLP_TREE_NUMBER_OF_VEC_STMTS (node));
3742 3783
3743 if (dump_enabled_p ()) 3784 if (dump_enabled_p ())
3744 { 3785 dump_printf_loc (MSG_NOTE, vect_location,
3745 dump_printf_loc (MSG_NOTE,vect_location, 3786 "------>vectorizing SLP node starting from: %G",
3746 "------>vectorizing SLP node starting from: "); 3787 stmt_info->stmt);
3747 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
3748 }
3749 3788
3750 /* Vectorized stmts go before the last scalar stmt which is where 3789 /* Vectorized stmts go before the last scalar stmt which is where
3751 all uses are ready. */ 3790 all uses are ready. */
3752 si = gsi_for_stmt (vect_find_last_scalar_stmt_in_slp (node)); 3791 stmt_vec_info last_stmt_info = vect_find_last_scalar_stmt_in_slp (node);
3792 si = gsi_for_stmt (last_stmt_info->stmt);
3753 3793
3754 /* Mark the first element of the reduction chain as reduction to properly 3794 /* Mark the first element of the reduction chain as reduction to properly
3755 transform the node. In the analysis phase only the last element of the 3795 transform the node. In the analysis phase only the last element of the
3756 chain is marked as reduction. */ 3796 chain is marked as reduction. */
3757 if (GROUP_FIRST_ELEMENT (stmt_info) && !STMT_VINFO_GROUPED_ACCESS (stmt_info) 3797 if (!STMT_VINFO_GROUPED_ACCESS (stmt_info)
3758 && GROUP_FIRST_ELEMENT (stmt_info) == stmt) 3798 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)
3799 && REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info)
3759 { 3800 {
3760 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def; 3801 STMT_VINFO_DEF_TYPE (stmt_info) = vect_reduction_def;
3761 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type; 3802 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
3762 } 3803 }
3763 3804
3764 /* Handle two-operation SLP nodes by vectorizing the group with 3805 /* Handle two-operation SLP nodes by vectorizing the group with
3765 both operations and then performing a merge. */ 3806 both operations and then performing a merge. */
3766 if (SLP_TREE_TWO_OPERATORS (node)) 3807 if (SLP_TREE_TWO_OPERATORS (node))
3767 { 3808 {
3809 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
3768 enum tree_code code0 = gimple_assign_rhs_code (stmt); 3810 enum tree_code code0 = gimple_assign_rhs_code (stmt);
3769 enum tree_code ocode = ERROR_MARK; 3811 enum tree_code ocode = ERROR_MARK;
3770 gimple *ostmt; 3812 stmt_vec_info ostmt_info;
3771 auto_vec_perm_indices mask (group_size); 3813 vec_perm_builder mask (group_size, group_size, 1);
3772 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt) 3814 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, ostmt_info)
3773 if (gimple_assign_rhs_code (ostmt) != code0) 3815 {
3774 { 3816 gassign *ostmt = as_a <gassign *> (ostmt_info->stmt);
3775 mask.quick_push (1); 3817 if (gimple_assign_rhs_code (ostmt) != code0)
3776 ocode = gimple_assign_rhs_code (ostmt); 3818 {
3777 } 3819 mask.quick_push (1);
3778 else 3820 ocode = gimple_assign_rhs_code (ostmt);
3779 mask.quick_push (0); 3821 }
3822 else
3823 mask.quick_push (0);
3824 }
3780 if (ocode != ERROR_MARK) 3825 if (ocode != ERROR_MARK)
3781 { 3826 {
3782 vec<gimple *> v0; 3827 vec<stmt_vec_info> v0;
3783 vec<gimple *> v1; 3828 vec<stmt_vec_info> v1;
3784 unsigned j; 3829 unsigned j;
3785 tree tmask = NULL_TREE; 3830 tree tmask = NULL_TREE;
3786 vect_transform_stmt (stmt, &si, &grouped_store, node, instance); 3831 vect_transform_stmt (stmt_info, &si, node, instance);
3787 v0 = SLP_TREE_VEC_STMTS (node).copy (); 3832 v0 = SLP_TREE_VEC_STMTS (node).copy ();
3788 SLP_TREE_VEC_STMTS (node).truncate (0); 3833 SLP_TREE_VEC_STMTS (node).truncate (0);
3789 gimple_assign_set_rhs_code (stmt, ocode); 3834 gimple_assign_set_rhs_code (stmt, ocode);
3790 vect_transform_stmt (stmt, &si, &grouped_store, node, instance); 3835 vect_transform_stmt (stmt_info, &si, node, instance);
3791 gimple_assign_set_rhs_code (stmt, code0); 3836 gimple_assign_set_rhs_code (stmt, code0);
3792 v1 = SLP_TREE_VEC_STMTS (node).copy (); 3837 v1 = SLP_TREE_VEC_STMTS (node).copy ();
3793 SLP_TREE_VEC_STMTS (node).truncate (0); 3838 SLP_TREE_VEC_STMTS (node).truncate (0);
3794 tree meltype = build_nonstandard_integer_type 3839 tree meltype = build_nonstandard_integer_type
3795 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1); 3840 (GET_MODE_BITSIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype))), 1);
3796 tree mvectype = get_same_sized_vectype (meltype, vectype); 3841 tree mvectype = get_same_sized_vectype (meltype, vectype);
3797 unsigned k = 0, l; 3842 unsigned k = 0, l;
3798 for (j = 0; j < v0.length (); ++j) 3843 for (j = 0; j < v0.length (); ++j)
3799 { 3844 {
3800 unsigned int nunits = TYPE_VECTOR_SUBPARTS (vectype); 3845 /* Enforced by vect_build_slp_tree, which rejects variable-length
3801 auto_vec<tree, 32> melts (nunits); 3846 vectors for SLP_TREE_TWO_OPERATORS. */
3802 for (l = 0; l < nunits; ++l) 3847 unsigned int const_nunits = nunits.to_constant ();
3848 tree_vector_builder melts (mvectype, const_nunits, 1);
3849 for (l = 0; l < const_nunits; ++l)
3803 { 3850 {
3804 if (k >= group_size) 3851 if (k >= group_size)
3805 k = 0; 3852 k = 0;
3806 tree t = build_int_cst (meltype, mask[k++] * nunits + l); 3853 tree t = build_int_cst (meltype,
3854 mask[k++] * const_nunits + l);
3807 melts.quick_push (t); 3855 melts.quick_push (t);
3808 } 3856 }
3809 tmask = build_vector (mvectype, melts); 3857 tmask = melts.build ();
3810 3858
3811 /* ??? Not all targets support a VEC_PERM_EXPR with a 3859 /* ??? Not all targets support a VEC_PERM_EXPR with a
3812 constant mask that would translate to a vec_merge RTX 3860 constant mask that would translate to a vec_merge RTX
3813 (with their vec_perm_const_ok). We can either not 3861 (with their vec_perm_const_ok). We can either not
3814 vectorize in that case or let veclower do its job. 3862 vectorize in that case or let veclower do its job.
3816 plus/minus we'd eventually like to match targets 3864 plus/minus we'd eventually like to match targets
3817 vector addsub instructions. */ 3865 vector addsub instructions. */
3818 gimple *vstmt; 3866 gimple *vstmt;
3819 vstmt = gimple_build_assign (make_ssa_name (vectype), 3867 vstmt = gimple_build_assign (make_ssa_name (vectype),
3820 VEC_PERM_EXPR, 3868 VEC_PERM_EXPR,
3821 gimple_assign_lhs (v0[j]), 3869 gimple_assign_lhs (v0[j]->stmt),
3822 gimple_assign_lhs (v1[j]), tmask); 3870 gimple_assign_lhs (v1[j]->stmt),
3823 vect_finish_stmt_generation (stmt, vstmt, &si); 3871 tmask);
3824 SLP_TREE_VEC_STMTS (node).quick_push (vstmt); 3872 SLP_TREE_VEC_STMTS (node).quick_push
3873 (vect_finish_stmt_generation (stmt_info, vstmt, &si));
3825 } 3874 }
3826 v0.release (); 3875 v0.release ();
3827 v1.release (); 3876 v1.release ();
3828 return false; 3877 return;
3829 } 3878 }
3830 } 3879 }
3831 is_store = vect_transform_stmt (stmt, &si, &grouped_store, node, instance); 3880 vect_transform_stmt (stmt_info, &si, node, instance);
3832 3881
3833 /* Restore stmt def-types. */ 3882 /* Restore stmt def-types. */
3834 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) 3883 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3835 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def) 3884 if (SLP_TREE_DEF_TYPE (child) != vect_internal_def)
3836 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, stmt) 3885 {
3837 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) = vect_internal_def; 3886 stmt_vec_info child_stmt_info;
3838 3887 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (child), j, child_stmt_info)
3839 return is_store; 3888 STMT_VINFO_DEF_TYPE (child_stmt_info) = vect_internal_def;
3889 }
3840 } 3890 }
3841 3891
3842 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero. 3892 /* Replace scalar calls from SLP node NODE with setting of their lhs to zero.
3843 For loop vectorization this is done in vectorizable_call, but for SLP 3893 For loop vectorization this is done in vectorizable_call, but for SLP
3844 it needs to be deferred until end of vect_schedule_slp, because multiple 3894 it needs to be deferred until end of vect_schedule_slp, because multiple
3845 SLP instances may refer to the same scalar stmt. */ 3895 SLP instances may refer to the same scalar stmt. */
3846 3896
3847 static void 3897 static void
3848 vect_remove_slp_scalar_calls (slp_tree node) 3898 vect_remove_slp_scalar_calls (slp_tree node)
3849 { 3899 {
3850 gimple *stmt, *new_stmt; 3900 gimple *new_stmt;
3851 gimple_stmt_iterator gsi; 3901 gimple_stmt_iterator gsi;
3852 int i; 3902 int i;
3853 slp_tree child; 3903 slp_tree child;
3854 tree lhs; 3904 tree lhs;
3855 stmt_vec_info stmt_info; 3905 stmt_vec_info stmt_info;
3858 return; 3908 return;
3859 3909
3860 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child) 3910 FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
3861 vect_remove_slp_scalar_calls (child); 3911 vect_remove_slp_scalar_calls (child);
3862 3912
3863 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt) 3913 FOR_EACH_VEC_ELT (SLP_TREE_SCALAR_STMTS (node), i, stmt_info)
3864 { 3914 {
3865 if (!is_gimple_call (stmt) || gimple_bb (stmt) == NULL) 3915 gcall *stmt = dyn_cast <gcall *> (stmt_info->stmt);
3916 if (!stmt || gimple_bb (stmt) == NULL)
3866 continue; 3917 continue;
3867 stmt_info = vinfo_for_stmt (stmt); 3918 if (is_pattern_stmt_p (stmt_info)
3868 if (stmt_info == NULL
3869 || is_pattern_stmt_p (stmt_info)
3870 || !PURE_SLP_STMT (stmt_info)) 3919 || !PURE_SLP_STMT (stmt_info))
3871 continue; 3920 continue;
3872 lhs = gimple_call_lhs (stmt); 3921 lhs = gimple_call_lhs (stmt);
3873 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs))); 3922 new_stmt = gimple_build_assign (lhs, build_zero_cst (TREE_TYPE (lhs)));
3874 set_vinfo_for_stmt (new_stmt, stmt_info);
3875 set_vinfo_for_stmt (stmt, NULL);
3876 STMT_VINFO_STMT (stmt_info) = new_stmt;
3877 gsi = gsi_for_stmt (stmt); 3923 gsi = gsi_for_stmt (stmt);
3878 gsi_replace (&gsi, new_stmt, false); 3924 stmt_info->vinfo->replace_stmt (&gsi, stmt_info, new_stmt);
3879 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt; 3925 SSA_NAME_DEF_STMT (gimple_assign_lhs (new_stmt)) = new_stmt;
3880 } 3926 }
3881 } 3927 }
3882 3928
3883 /* Generate vector code for all SLP instances in the loop/basic block. */ 3929 /* Generate vector code for all SLP instances in the loop/basic block. */
3884 3930
3885 bool 3931 void
3886 vect_schedule_slp (vec_info *vinfo) 3932 vect_schedule_slp (vec_info *vinfo)
3887 { 3933 {
3888 vec<slp_instance> slp_instances; 3934 vec<slp_instance> slp_instances;
3889 slp_instance instance; 3935 slp_instance instance;
3890 unsigned int i; 3936 unsigned int i;
3891 bool is_store = false; 3937
3892 3938 scalar_stmts_to_slp_tree_map_t *bst_map
3939 = new scalar_stmts_to_slp_tree_map_t ();
3893 slp_instances = vinfo->slp_instances; 3940 slp_instances = vinfo->slp_instances;
3894 FOR_EACH_VEC_ELT (slp_instances, i, instance) 3941 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3895 { 3942 {
3896 /* Schedule the tree of INSTANCE. */ 3943 /* Schedule the tree of INSTANCE. */
3897 is_store = vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance), 3944 vect_schedule_slp_instance (SLP_INSTANCE_TREE (instance),
3898 instance); 3945 instance, bst_map);
3899 if (dump_enabled_p ()) 3946 if (dump_enabled_p ())
3900 dump_printf_loc (MSG_NOTE, vect_location, 3947 dump_printf_loc (MSG_NOTE, vect_location,
3901 "vectorizing stmts using SLP.\n"); 3948 "vectorizing stmts using SLP.\n");
3902 } 3949 }
3950 delete bst_map;
3903 3951
3904 FOR_EACH_VEC_ELT (slp_instances, i, instance) 3952 FOR_EACH_VEC_ELT (slp_instances, i, instance)
3905 { 3953 {
3906 slp_tree root = SLP_INSTANCE_TREE (instance); 3954 slp_tree root = SLP_INSTANCE_TREE (instance);
3907 gimple *store; 3955 stmt_vec_info store_info;
3908 unsigned int j; 3956 unsigned int j;
3909 gimple_stmt_iterator gsi;
3910 3957
3911 /* Remove scalar call stmts. Do not do this for basic-block 3958 /* Remove scalar call stmts. Do not do this for basic-block
3912 vectorization as not all uses may be vectorized. 3959 vectorization as not all uses may be vectorized.
3913 ??? Why should this be necessary? DCE should be able to 3960 ??? Why should this be necessary? DCE should be able to
3914 remove the stmts itself. 3961 remove the stmts itself.
3916 stmts starting from the SLP tree root if they have no 3963 stmts starting from the SLP tree root if they have no
3917 uses. */ 3964 uses. */
3918 if (is_a <loop_vec_info> (vinfo)) 3965 if (is_a <loop_vec_info> (vinfo))
3919 vect_remove_slp_scalar_calls (root); 3966 vect_remove_slp_scalar_calls (root);
3920 3967
3921 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store) 3968 for (j = 0; SLP_TREE_SCALAR_STMTS (root).iterate (j, &store_info)
3922 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++) 3969 && j < SLP_INSTANCE_GROUP_SIZE (instance); j++)
3923 { 3970 {
3924 if (!STMT_VINFO_DATA_REF (vinfo_for_stmt (store))) 3971 if (!STMT_VINFO_DATA_REF (store_info))
3925 break; 3972 break;
3926 3973
3927 if (is_pattern_stmt_p (vinfo_for_stmt (store))) 3974 store_info = vect_orig_stmt (store_info);
3928 store = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (store)); 3975 /* Free the attached stmt_vec_info and remove the stmt. */
3929 /* Free the attached stmt_vec_info and remove the stmt. */ 3976 vinfo->remove_stmt (store_info);
3930 gsi = gsi_for_stmt (store);
3931 unlink_stmt_vdef (store);
3932 gsi_remove (&gsi, true);
3933 release_defs (store);
3934 free_stmt_vec_info (store);
3935 } 3977 }
3936 } 3978 }
3937 3979 }
3938 return is_store;
3939 }