comparison gcc/tree-vect-loop.c @ 131:84e7813d76e9

gcc-8.2
author mir3636
date Thu, 25 Oct 2018 07:37:49 +0900
parents 04ced10e8804
children 1830386684a0
comparison
equal deleted inserted replaced
111:04ced10e8804 131:84e7813d76e9
1 /* Loop Vectorization 1 /* Loop Vectorization
2 Copyright (C) 2003-2017 Free Software Foundation, Inc. 2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
3 Contributed by Dorit Naishlos <dorit@il.ibm.com> and 3 Contributed by Dorit Naishlos <dorit@il.ibm.com> and
4 Ira Rosen <irar@il.ibm.com> 4 Ira Rosen <irar@il.ibm.com>
5 5
6 This file is part of GCC. 6 This file is part of GCC.
7 7
48 #include "tree-vectorizer.h" 48 #include "tree-vectorizer.h"
49 #include "gimple-fold.h" 49 #include "gimple-fold.h"
50 #include "cgraph.h" 50 #include "cgraph.h"
51 #include "tree-cfg.h" 51 #include "tree-cfg.h"
52 #include "tree-if-conv.h" 52 #include "tree-if-conv.h"
53 #include "internal-fn.h"
54 #include "tree-vector-builder.h"
55 #include "vec-perm-indices.h"
56 #include "tree-eh.h"
53 57
54 /* Loop Vectorization Pass. 58 /* Loop Vectorization Pass.
55 59
56 This pass tries to vectorize loops. 60 This pass tries to vectorize loops.
57 61
149 http://gcc.gnu.org/projects/tree-ssa/vectorization.html 153 http://gcc.gnu.org/projects/tree-ssa/vectorization.html
150 */ 154 */
151 155
152 static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *); 156 static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *);
153 157
158 /* Subroutine of vect_determine_vf_for_stmt that handles only one
159 statement. VECTYPE_MAYBE_SET_P is true if STMT_VINFO_VECTYPE
160 may already be set for general statements (not just data refs). */
161
162 static opt_result
163 vect_determine_vf_for_stmt_1 (stmt_vec_info stmt_info,
164 bool vectype_maybe_set_p,
165 poly_uint64 *vf,
166 vec<stmt_vec_info > *mask_producers)
167 {
168 gimple *stmt = stmt_info->stmt;
169
170 if ((!STMT_VINFO_RELEVANT_P (stmt_info)
171 && !STMT_VINFO_LIVE_P (stmt_info))
172 || gimple_clobber_p (stmt))
173 {
174 if (dump_enabled_p ())
175 dump_printf_loc (MSG_NOTE, vect_location, "skip.\n");
176 return opt_result::success ();
177 }
178
179 tree stmt_vectype, nunits_vectype;
180 opt_result res = vect_get_vector_types_for_stmt (stmt_info, &stmt_vectype,
181 &nunits_vectype);
182 if (!res)
183 return res;
184
185 if (stmt_vectype)
186 {
187 if (STMT_VINFO_VECTYPE (stmt_info))
188 /* The only case when a vectype had been already set is for stmts
189 that contain a data ref, or for "pattern-stmts" (stmts generated
190 by the vectorizer to represent/replace a certain idiom). */
191 gcc_assert ((STMT_VINFO_DATA_REF (stmt_info)
192 || vectype_maybe_set_p)
193 && STMT_VINFO_VECTYPE (stmt_info) == stmt_vectype);
194 else if (stmt_vectype == boolean_type_node)
195 mask_producers->safe_push (stmt_info);
196 else
197 STMT_VINFO_VECTYPE (stmt_info) = stmt_vectype;
198 }
199
200 if (nunits_vectype)
201 vect_update_max_nunits (vf, nunits_vectype);
202
203 return opt_result::success ();
204 }
205
206 /* Subroutine of vect_determine_vectorization_factor. Set the vector
207 types of STMT_INFO and all attached pattern statements and update
208 the vectorization factor VF accordingly. If some of the statements
209 produce a mask result whose vector type can only be calculated later,
210 add them to MASK_PRODUCERS. Return true on success or false if
211 something prevented vectorization. */
212
213 static opt_result
214 vect_determine_vf_for_stmt (stmt_vec_info stmt_info, poly_uint64 *vf,
215 vec<stmt_vec_info > *mask_producers)
216 {
217 vec_info *vinfo = stmt_info->vinfo;
218 if (dump_enabled_p ())
219 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G",
220 stmt_info->stmt);
221 opt_result res
222 = vect_determine_vf_for_stmt_1 (stmt_info, false, vf, mask_producers);
223 if (!res)
224 return res;
225
226 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
227 && STMT_VINFO_RELATED_STMT (stmt_info))
228 {
229 gimple *pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
230 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
231
232 /* If a pattern statement has def stmts, analyze them too. */
233 for (gimple_stmt_iterator si = gsi_start (pattern_def_seq);
234 !gsi_end_p (si); gsi_next (&si))
235 {
236 stmt_vec_info def_stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
237 if (dump_enabled_p ())
238 dump_printf_loc (MSG_NOTE, vect_location,
239 "==> examining pattern def stmt: %G",
240 def_stmt_info->stmt);
241 if (!vect_determine_vf_for_stmt_1 (def_stmt_info, true,
242 vf, mask_producers))
243 res = vect_determine_vf_for_stmt_1 (def_stmt_info, true,
244 vf, mask_producers);
245 if (!res)
246 return res;
247 }
248
249 if (dump_enabled_p ())
250 dump_printf_loc (MSG_NOTE, vect_location,
251 "==> examining pattern statement: %G",
252 stmt_info->stmt);
253 res = vect_determine_vf_for_stmt_1 (stmt_info, true, vf, mask_producers);
254 if (!res)
255 return res;
256 }
257
258 return opt_result::success ();
259 }
260
154 /* Function vect_determine_vectorization_factor 261 /* Function vect_determine_vectorization_factor
155 262
156 Determine the vectorization factor (VF). VF is the number of data elements 263 Determine the vectorization factor (VF). VF is the number of data elements
157 that are operated upon in parallel in a single iteration of the vectorized 264 that are operated upon in parallel in a single iteration of the vectorized
158 loop. For example, when vectorizing a loop that operates on 4byte elements, 265 loop. For example, when vectorizing a loop that operates on 4byte elements,
174 for (i=0; i<N; i+=VF){ 281 for (i=0; i<N; i+=VF){
175 a[i:VF] = b[i:VF] + c[i:VF]; 282 a[i:VF] = b[i:VF] + c[i:VF];
176 } 283 }
177 */ 284 */
178 285
179 static bool 286 static opt_result
180 vect_determine_vectorization_factor (loop_vec_info loop_vinfo) 287 vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
181 { 288 {
182 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 289 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
183 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); 290 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
184 unsigned nbbs = loop->num_nodes; 291 unsigned nbbs = loop->num_nodes;
185 unsigned int vectorization_factor = 0; 292 poly_uint64 vectorization_factor = 1;
186 tree scalar_type = NULL_TREE; 293 tree scalar_type = NULL_TREE;
187 gphi *phi; 294 gphi *phi;
188 tree vectype; 295 tree vectype;
189 unsigned int nunits;
190 stmt_vec_info stmt_info; 296 stmt_vec_info stmt_info;
191 unsigned i; 297 unsigned i;
192 HOST_WIDE_INT dummy;
193 gimple *stmt, *pattern_stmt = NULL;
194 gimple_seq pattern_def_seq = NULL;
195 gimple_stmt_iterator pattern_def_si = gsi_none ();
196 bool analyze_pattern_stmt = false;
197 bool bool_result;
198 auto_vec<stmt_vec_info> mask_producers; 298 auto_vec<stmt_vec_info> mask_producers;
199 299
200 if (dump_enabled_p ()) 300 DUMP_VECT_SCOPE ("vect_determine_vectorization_factor");
201 dump_printf_loc (MSG_NOTE, vect_location,
202 "=== vect_determine_vectorization_factor ===\n");
203 301
204 for (i = 0; i < nbbs; i++) 302 for (i = 0; i < nbbs; i++)
205 { 303 {
206 basic_block bb = bbs[i]; 304 basic_block bb = bbs[i];
207 305
208 for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si); 306 for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
209 gsi_next (&si)) 307 gsi_next (&si))
210 { 308 {
211 phi = si.phi (); 309 phi = si.phi ();
212 stmt_info = vinfo_for_stmt (phi); 310 stmt_info = loop_vinfo->lookup_stmt (phi);
213 if (dump_enabled_p ()) 311 if (dump_enabled_p ())
214 { 312 dump_printf_loc (MSG_NOTE, vect_location, "==> examining phi: %G",
215 dump_printf_loc (MSG_NOTE, vect_location, "==> examining phi: "); 313 phi);
216 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
217 }
218 314
219 gcc_assert (stmt_info); 315 gcc_assert (stmt_info);
220 316
221 if (STMT_VINFO_RELEVANT_P (stmt_info) 317 if (STMT_VINFO_RELEVANT_P (stmt_info)
222 || STMT_VINFO_LIVE_P (stmt_info)) 318 || STMT_VINFO_LIVE_P (stmt_info))
223 { 319 {
224 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info)); 320 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info));
225 scalar_type = TREE_TYPE (PHI_RESULT (phi)); 321 scalar_type = TREE_TYPE (PHI_RESULT (phi));
226 322
227 if (dump_enabled_p ()) 323 if (dump_enabled_p ())
228 { 324 dump_printf_loc (MSG_NOTE, vect_location,
229 dump_printf_loc (MSG_NOTE, vect_location, 325 "get vectype for scalar type: %T\n",
230 "get vectype for scalar type: "); 326 scalar_type);
231 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
232 dump_printf (MSG_NOTE, "\n");
233 }
234 327
235 vectype = get_vectype_for_scalar_type (scalar_type); 328 vectype = get_vectype_for_scalar_type (scalar_type);
236 if (!vectype) 329 if (!vectype)
237 { 330 return opt_result::failure_at (phi,
238 if (dump_enabled_p ()) 331 "not vectorized: unsupported "
239 { 332 "data-type %T\n",
240 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 333 scalar_type);
241 "not vectorized: unsupported "
242 "data-type ");
243 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
244 scalar_type);
245 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
246 }
247 return false;
248 }
249 STMT_VINFO_VECTYPE (stmt_info) = vectype; 334 STMT_VINFO_VECTYPE (stmt_info) = vectype;
335
336 if (dump_enabled_p ())
337 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n",
338 vectype);
250 339
251 if (dump_enabled_p ()) 340 if (dump_enabled_p ())
252 { 341 {
253 dump_printf_loc (MSG_NOTE, vect_location, "vectype: "); 342 dump_printf_loc (MSG_NOTE, vect_location, "nunits = ");
254 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype); 343 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (vectype));
255 dump_printf (MSG_NOTE, "\n");
256 }
257
258 nunits = TYPE_VECTOR_SUBPARTS (vectype);
259 if (dump_enabled_p ())
260 dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n",
261 nunits);
262
263 if (!vectorization_factor
264 || (nunits > vectorization_factor))
265 vectorization_factor = nunits;
266 }
267 }
268
269 for (gimple_stmt_iterator si = gsi_start_bb (bb);
270 !gsi_end_p (si) || analyze_pattern_stmt;)
271 {
272 tree vf_vectype;
273
274 if (analyze_pattern_stmt)
275 stmt = pattern_stmt;
276 else
277 stmt = gsi_stmt (si);
278
279 stmt_info = vinfo_for_stmt (stmt);
280
281 if (dump_enabled_p ())
282 {
283 dump_printf_loc (MSG_NOTE, vect_location,
284 "==> examining statement: ");
285 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
286 }
287
288 gcc_assert (stmt_info);
289
290 /* Skip stmts which do not need to be vectorized. */
291 if ((!STMT_VINFO_RELEVANT_P (stmt_info)
292 && !STMT_VINFO_LIVE_P (stmt_info))
293 || gimple_clobber_p (stmt))
294 {
295 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
296 && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
297 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
298 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
299 {
300 stmt = pattern_stmt;
301 stmt_info = vinfo_for_stmt (pattern_stmt);
302 if (dump_enabled_p ())
303 {
304 dump_printf_loc (MSG_NOTE, vect_location,
305 "==> examining pattern statement: ");
306 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
307 }
308 }
309 else
310 {
311 if (dump_enabled_p ())
312 dump_printf_loc (MSG_NOTE, vect_location, "skip.\n");
313 gsi_next (&si);
314 continue;
315 }
316 }
317 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
318 && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
319 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
320 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
321 analyze_pattern_stmt = true;
322
323 /* If a pattern statement has def stmts, analyze them too. */
324 if (is_pattern_stmt_p (stmt_info))
325 {
326 if (pattern_def_seq == NULL)
327 {
328 pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
329 pattern_def_si = gsi_start (pattern_def_seq);
330 }
331 else if (!gsi_end_p (pattern_def_si))
332 gsi_next (&pattern_def_si);
333 if (pattern_def_seq != NULL)
334 {
335 gimple *pattern_def_stmt = NULL;
336 stmt_vec_info pattern_def_stmt_info = NULL;
337
338 while (!gsi_end_p (pattern_def_si))
339 {
340 pattern_def_stmt = gsi_stmt (pattern_def_si);
341 pattern_def_stmt_info
342 = vinfo_for_stmt (pattern_def_stmt);
343 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
344 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
345 break;
346 gsi_next (&pattern_def_si);
347 }
348
349 if (!gsi_end_p (pattern_def_si))
350 {
351 if (dump_enabled_p ())
352 {
353 dump_printf_loc (MSG_NOTE, vect_location,
354 "==> examining pattern def stmt: ");
355 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
356 pattern_def_stmt, 0);
357 }
358
359 stmt = pattern_def_stmt;
360 stmt_info = pattern_def_stmt_info;
361 }
362 else
363 {
364 pattern_def_si = gsi_none ();
365 analyze_pattern_stmt = false;
366 }
367 }
368 else
369 analyze_pattern_stmt = false;
370 }
371
372 if (gimple_get_lhs (stmt) == NULL_TREE
373 /* MASK_STORE has no lhs, but is ok. */
374 && (!is_gimple_call (stmt)
375 || !gimple_call_internal_p (stmt)
376 || gimple_call_internal_fn (stmt) != IFN_MASK_STORE))
377 {
378 if (is_gimple_call (stmt))
379 {
380 /* Ignore calls with no lhs. These must be calls to
381 #pragma omp simd functions, and what vectorization factor
382 it really needs can't be determined until
383 vectorizable_simd_clone_call. */
384 if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
385 {
386 pattern_def_seq = NULL;
387 gsi_next (&si);
388 }
389 continue;
390 }
391 if (dump_enabled_p ())
392 {
393 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
394 "not vectorized: irregular stmt.");
395 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt,
396 0);
397 }
398 return false;
399 }
400
401 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt))))
402 {
403 if (dump_enabled_p ())
404 {
405 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
406 "not vectorized: vector stmt in loop:");
407 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0);
408 }
409 return false;
410 }
411
412 bool_result = false;
413
414 if (STMT_VINFO_VECTYPE (stmt_info))
415 {
416 /* The only case when a vectype had been already set is for stmts
417 that contain a dataref, or for "pattern-stmts" (stmts
418 generated by the vectorizer to represent/replace a certain
419 idiom). */
420 gcc_assert (STMT_VINFO_DATA_REF (stmt_info)
421 || is_pattern_stmt_p (stmt_info)
422 || !gsi_end_p (pattern_def_si));
423 vectype = STMT_VINFO_VECTYPE (stmt_info);
424 }
425 else
426 {
427 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info));
428 if (gimple_call_internal_p (stmt, IFN_MASK_STORE))
429 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3));
430 else
431 scalar_type = TREE_TYPE (gimple_get_lhs (stmt));
432
433 /* Bool ops don't participate in vectorization factor
434 computation. For comparison use compared types to
435 compute a factor. */
436 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type)
437 && is_gimple_assign (stmt)
438 && gimple_assign_rhs_code (stmt) != COND_EXPR)
439 {
440 if (STMT_VINFO_RELEVANT_P (stmt_info)
441 || STMT_VINFO_LIVE_P (stmt_info))
442 mask_producers.safe_push (stmt_info);
443 bool_result = true;
444
445 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt))
446 == tcc_comparison
447 && !VECT_SCALAR_BOOLEAN_TYPE_P
448 (TREE_TYPE (gimple_assign_rhs1 (stmt))))
449 scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
450 else
451 {
452 if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
453 {
454 pattern_def_seq = NULL;
455 gsi_next (&si);
456 }
457 continue;
458 }
459 }
460
461 if (dump_enabled_p ())
462 {
463 dump_printf_loc (MSG_NOTE, vect_location,
464 "get vectype for scalar type: ");
465 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
466 dump_printf (MSG_NOTE, "\n");
467 }
468 vectype = get_vectype_for_scalar_type (scalar_type);
469 if (!vectype)
470 {
471 if (dump_enabled_p ())
472 {
473 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
474 "not vectorized: unsupported "
475 "data-type ");
476 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
477 scalar_type);
478 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
479 }
480 return false;
481 }
482
483 if (!bool_result)
484 STMT_VINFO_VECTYPE (stmt_info) = vectype;
485
486 if (dump_enabled_p ())
487 {
488 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
489 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype);
490 dump_printf (MSG_NOTE, "\n");
491 }
492 }
493
494 /* Don't try to compute VF out scalar types if we stmt
495 produces boolean vector. Use result vectype instead. */
496 if (VECTOR_BOOLEAN_TYPE_P (vectype))
497 vf_vectype = vectype;
498 else
499 {
500 /* The vectorization factor is according to the smallest
501 scalar type (or the largest vector size, but we only
502 support one vector size per loop). */
503 if (!bool_result)
504 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy,
505 &dummy);
506 if (dump_enabled_p ())
507 {
508 dump_printf_loc (MSG_NOTE, vect_location,
509 "get vectype for scalar type: ");
510 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type);
511 dump_printf (MSG_NOTE, "\n"); 344 dump_printf (MSG_NOTE, "\n");
512 } 345 }
513 vf_vectype = get_vectype_for_scalar_type (scalar_type); 346
347 vect_update_max_nunits (&vectorization_factor, vectype);
514 } 348 }
515 if (!vf_vectype) 349 }
516 { 350
517 if (dump_enabled_p ()) 351 for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
518 { 352 gsi_next (&si))
519 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 353 {
520 "not vectorized: unsupported data-type "); 354 stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
521 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, 355 opt_result res
522 scalar_type); 356 = vect_determine_vf_for_stmt (stmt_info, &vectorization_factor,
523 dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); 357 &mask_producers);
524 } 358 if (!res)
525 return false; 359 return res;
526 }
527
528 if ((GET_MODE_SIZE (TYPE_MODE (vectype))
529 != GET_MODE_SIZE (TYPE_MODE (vf_vectype))))
530 {
531 if (dump_enabled_p ())
532 {
533 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
534 "not vectorized: different sized vector "
535 "types in statement, ");
536 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
537 vectype);
538 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
539 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
540 vf_vectype);
541 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
542 }
543 return false;
544 }
545
546 if (dump_enabled_p ())
547 {
548 dump_printf_loc (MSG_NOTE, vect_location, "vectype: ");
549 dump_generic_expr (MSG_NOTE, TDF_SLIM, vf_vectype);
550 dump_printf (MSG_NOTE, "\n");
551 }
552
553 nunits = TYPE_VECTOR_SUBPARTS (vf_vectype);
554 if (dump_enabled_p ())
555 dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n", nunits);
556 if (!vectorization_factor
557 || (nunits > vectorization_factor))
558 vectorization_factor = nunits;
559
560 if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si))
561 {
562 pattern_def_seq = NULL;
563 gsi_next (&si);
564 }
565 } 360 }
566 } 361 }
567 362
568 /* TODO: Analyze cost. Decide if worth while to vectorize. */ 363 /* TODO: Analyze cost. Decide if worth while to vectorize. */
569 if (dump_enabled_p ()) 364 if (dump_enabled_p ())
570 dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = %d\n", 365 {
571 vectorization_factor); 366 dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = ");
572 if (vectorization_factor <= 1) 367 dump_dec (MSG_NOTE, vectorization_factor);
573 { 368 dump_printf (MSG_NOTE, "\n");
574 if (dump_enabled_p ()) 369 }
575 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 370
576 "not vectorized: unsupported data-type\n"); 371 if (known_le (vectorization_factor, 1U))
577 return false; 372 return opt_result::failure_at (vect_location,
578 } 373 "not vectorized: unsupported data-type\n");
579 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor; 374 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
580 375
581 for (i = 0; i < mask_producers.length (); i++) 376 for (i = 0; i < mask_producers.length (); i++)
582 { 377 {
583 tree mask_type = NULL; 378 stmt_info = mask_producers[i];
584 379 opt_tree mask_type = vect_get_mask_type_for_stmt (stmt_info);
585 stmt = STMT_VINFO_STMT (mask_producers[i]);
586
587 if (is_gimple_assign (stmt)
588 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison
589 && !VECT_SCALAR_BOOLEAN_TYPE_P
590 (TREE_TYPE (gimple_assign_rhs1 (stmt))))
591 {
592 scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt));
593 mask_type = get_mask_type_for_scalar_type (scalar_type);
594
595 if (!mask_type)
596 {
597 if (dump_enabled_p ())
598 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
599 "not vectorized: unsupported mask\n");
600 return false;
601 }
602 }
603 else
604 {
605 tree rhs;
606 ssa_op_iter iter;
607 gimple *def_stmt;
608 enum vect_def_type dt;
609
610 FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE)
611 {
612 if (!vect_is_simple_use (rhs, mask_producers[i]->vinfo,
613 &def_stmt, &dt, &vectype))
614 {
615 if (dump_enabled_p ())
616 {
617 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
618 "not vectorized: can't compute mask type "
619 "for statement, ");
620 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt,
621 0);
622 }
623 return false;
624 }
625
626 /* No vectype probably means external definition.
627 Allow it in case there is another operand which
628 allows to determine mask type. */
629 if (!vectype)
630 continue;
631
632 if (!mask_type)
633 mask_type = vectype;
634 else if (TYPE_VECTOR_SUBPARTS (mask_type)
635 != TYPE_VECTOR_SUBPARTS (vectype))
636 {
637 if (dump_enabled_p ())
638 {
639 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
640 "not vectorized: different sized masks "
641 "types in statement, ");
642 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
643 mask_type);
644 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
645 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
646 vectype);
647 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
648 }
649 return false;
650 }
651 else if (VECTOR_BOOLEAN_TYPE_P (mask_type)
652 != VECTOR_BOOLEAN_TYPE_P (vectype))
653 {
654 if (dump_enabled_p ())
655 {
656 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
657 "not vectorized: mixed mask and "
658 "nonmask vector types in statement, ");
659 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
660 mask_type);
661 dump_printf (MSG_MISSED_OPTIMIZATION, " and ");
662 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM,
663 vectype);
664 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
665 }
666 return false;
667 }
668 }
669
670 /* We may compare boolean value loaded as vector of integers.
671 Fix mask_type in such case. */
672 if (mask_type
673 && !VECTOR_BOOLEAN_TYPE_P (mask_type)
674 && gimple_code (stmt) == GIMPLE_ASSIGN
675 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison)
676 mask_type = build_same_sized_truth_vector_type (mask_type);
677 }
678
679 /* No mask_type should mean loop invariant predicate.
680 This is probably a subject for optimization in
681 if-conversion. */
682 if (!mask_type) 380 if (!mask_type)
683 { 381 return opt_result::propagate_failure (mask_type);
684 if (dump_enabled_p ()) 382 STMT_VINFO_VECTYPE (stmt_info) = mask_type;
685 { 383 }
686 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 384
687 "not vectorized: can't compute mask type " 385 return opt_result::success ();
688 "for statement, ");
689 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt,
690 0);
691 }
692 return false;
693 }
694
695 STMT_VINFO_VECTYPE (mask_producers[i]) = mask_type;
696 }
697
698 return true;
699 } 386 }
700 387
701 388
702 /* Function vect_is_simple_iv_evolution. 389 /* Function vect_is_simple_iv_evolution.
703 390
725 412
726 step_expr = evolution_part; 413 step_expr = evolution_part;
727 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb)); 414 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb));
728 415
729 if (dump_enabled_p ()) 416 if (dump_enabled_p ())
730 { 417 dump_printf_loc (MSG_NOTE, vect_location, "step: %T, init: %T\n",
731 dump_printf_loc (MSG_NOTE, vect_location, "step: "); 418 step_expr, init_expr);
732 dump_generic_expr (MSG_NOTE, TDF_SLIM, step_expr);
733 dump_printf (MSG_NOTE, ", init: ");
734 dump_generic_expr (MSG_NOTE, TDF_SLIM, init_expr);
735 dump_printf (MSG_NOTE, "\n");
736 }
737 419
738 *init = init_expr; 420 *init = init_expr;
739 *step = step_expr; 421 *step = step_expr;
740 422
741 if (TREE_CODE (step_expr) != INTEGER_CST 423 if (TREE_CODE (step_expr) != INTEGER_CST
755 } 437 }
756 438
757 return true; 439 return true;
758 } 440 }
759 441
442 /* Return true if PHI, described by STMT_INFO, is the inner PHI in
443 what we are assuming is a double reduction. For example, given
444 a structure like this:
445
446 outer1:
447 x_1 = PHI <x_4(outer2), ...>;
448 ...
449
450 inner:
451 x_2 = PHI <x_1(outer1), ...>;
452 ...
453 x_3 = ...;
454 ...
455
456 outer2:
457 x_4 = PHI <x_3(inner)>;
458 ...
459
460 outer loop analysis would treat x_1 as a double reduction phi and
461 this function would then return true for x_2. */
462
463 static bool
464 vect_inner_phi_in_double_reduction_p (stmt_vec_info stmt_info, gphi *phi)
465 {
466 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
467 use_operand_p use_p;
468 ssa_op_iter op_iter;
469 FOR_EACH_PHI_ARG (use_p, phi, op_iter, SSA_OP_USE)
470 if (stmt_vec_info def_info = loop_vinfo->lookup_def (USE_FROM_PTR (use_p)))
471 if (STMT_VINFO_DEF_TYPE (def_info) == vect_double_reduction_def)
472 return true;
473 return false;
474 }
475
760 /* Function vect_analyze_scalar_cycles_1. 476 /* Function vect_analyze_scalar_cycles_1.
761 477
762 Examine the cross iteration def-use cycles of scalar variables 478 Examine the cross iteration def-use cycles of scalar variables
763 in LOOP. LOOP_VINFO represents the loop that is now being 479 in LOOP. LOOP_VINFO represents the loop that is now being
764 considered for vectorization (can be LOOP, or an outer-loop 480 considered for vectorization (can be LOOP, or an outer-loop
767 static void 483 static void
768 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop) 484 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop)
769 { 485 {
770 basic_block bb = loop->header; 486 basic_block bb = loop->header;
771 tree init, step; 487 tree init, step;
772 auto_vec<gimple *, 64> worklist; 488 auto_vec<stmt_vec_info, 64> worklist;
773 gphi_iterator gsi; 489 gphi_iterator gsi;
774 bool double_reduc; 490 bool double_reduc;
775 491
776 if (dump_enabled_p ()) 492 DUMP_VECT_SCOPE ("vect_analyze_scalar_cycles");
777 dump_printf_loc (MSG_NOTE, vect_location,
778 "=== vect_analyze_scalar_cycles ===\n");
779 493
780 /* First - identify all inductions. Reduction detection assumes that all the 494 /* First - identify all inductions. Reduction detection assumes that all the
781 inductions have been identified, therefore, this order must not be 495 inductions have been identified, therefore, this order must not be
782 changed. */ 496 changed. */
783 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 497 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
784 { 498 {
785 gphi *phi = gsi.phi (); 499 gphi *phi = gsi.phi ();
786 tree access_fn = NULL; 500 tree access_fn = NULL;
787 tree def = PHI_RESULT (phi); 501 tree def = PHI_RESULT (phi);
788 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi); 502 stmt_vec_info stmt_vinfo = loop_vinfo->lookup_stmt (phi);
789 503
790 if (dump_enabled_p ()) 504 if (dump_enabled_p ())
791 { 505 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: %G", phi);
792 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
793 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
794 }
795 506
796 /* Skip virtual phi's. The data dependences that are associated with 507 /* Skip virtual phi's. The data dependences that are associated with
797 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */ 508 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */
798 if (virtual_operand_p (def)) 509 if (virtual_operand_p (def))
799 continue; 510 continue;
804 access_fn = analyze_scalar_evolution (loop, def); 515 access_fn = analyze_scalar_evolution (loop, def);
805 if (access_fn) 516 if (access_fn)
806 { 517 {
807 STRIP_NOPS (access_fn); 518 STRIP_NOPS (access_fn);
808 if (dump_enabled_p ()) 519 if (dump_enabled_p ())
809 { 520 dump_printf_loc (MSG_NOTE, vect_location,
810 dump_printf_loc (MSG_NOTE, vect_location, 521 "Access function of PHI: %T\n", access_fn);
811 "Access function of PHI: ");
812 dump_generic_expr (MSG_NOTE, TDF_SLIM, access_fn);
813 dump_printf (MSG_NOTE, "\n");
814 }
815 STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo) 522 STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo)
816 = initial_condition_in_loop_num (access_fn, loop->num); 523 = initial_condition_in_loop_num (access_fn, loop->num);
817 STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) 524 STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo)
818 = evolution_part_in_loop_num (access_fn, loop->num); 525 = evolution_part_in_loop_num (access_fn, loop->num);
819 } 526 }
820 527
821 if (!access_fn 528 if (!access_fn
529 || vect_inner_phi_in_double_reduction_p (stmt_vinfo, phi)
822 || !vect_is_simple_iv_evolution (loop->num, access_fn, &init, &step) 530 || !vect_is_simple_iv_evolution (loop->num, access_fn, &init, &step)
823 || (LOOP_VINFO_LOOP (loop_vinfo) != loop 531 || (LOOP_VINFO_LOOP (loop_vinfo) != loop
824 && TREE_CODE (step) != INTEGER_CST)) 532 && TREE_CODE (step) != INTEGER_CST))
825 { 533 {
826 worklist.safe_push (phi); 534 worklist.safe_push (stmt_vinfo);
827 continue; 535 continue;
828 } 536 }
829 537
830 gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo) 538 gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo)
831 != NULL_TREE); 539 != NULL_TREE);
838 546
839 547
840 /* Second - identify all reductions and nested cycles. */ 548 /* Second - identify all reductions and nested cycles. */
841 while (worklist.length () > 0) 549 while (worklist.length () > 0)
842 { 550 {
843 gimple *phi = worklist.pop (); 551 stmt_vec_info stmt_vinfo = worklist.pop ();
552 gphi *phi = as_a <gphi *> (stmt_vinfo->stmt);
844 tree def = PHI_RESULT (phi); 553 tree def = PHI_RESULT (phi);
845 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi);
846 gimple *reduc_stmt;
847 554
848 if (dump_enabled_p ()) 555 if (dump_enabled_p ())
849 { 556 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: %G", phi);
850 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: ");
851 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
852 }
853 557
854 gcc_assert (!virtual_operand_p (def) 558 gcc_assert (!virtual_operand_p (def)
855 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type); 559 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type);
856 560
857 reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi, 561 stmt_vec_info reduc_stmt_info
858 &double_reduc, false); 562 = vect_force_simple_reduction (loop_vinfo, stmt_vinfo,
859 if (reduc_stmt) 563 &double_reduc, false);
564 if (reduc_stmt_info)
860 { 565 {
861 if (double_reduc) 566 if (double_reduc)
862 { 567 {
863 if (dump_enabled_p ()) 568 if (dump_enabled_p ())
864 dump_printf_loc (MSG_NOTE, vect_location, 569 dump_printf_loc (MSG_NOTE, vect_location,
865 "Detected double reduction.\n"); 570 "Detected double reduction.\n");
866 571
867 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def; 572 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def;
868 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) = 573 STMT_VINFO_DEF_TYPE (reduc_stmt_info)
869 vect_double_reduction_def; 574 = vect_double_reduction_def;
870 } 575 }
871 else 576 else
872 { 577 {
873 if (loop != LOOP_VINFO_LOOP (loop_vinfo)) 578 if (loop != LOOP_VINFO_LOOP (loop_vinfo))
874 { 579 {
875 if (dump_enabled_p ()) 580 if (dump_enabled_p ())
876 dump_printf_loc (MSG_NOTE, vect_location, 581 dump_printf_loc (MSG_NOTE, vect_location,
877 "Detected vectorizable nested cycle.\n"); 582 "Detected vectorizable nested cycle.\n");
878 583
879 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle; 584 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle;
880 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) = 585 STMT_VINFO_DEF_TYPE (reduc_stmt_info) = vect_nested_cycle;
881 vect_nested_cycle;
882 } 586 }
883 else 587 else
884 { 588 {
885 if (dump_enabled_p ()) 589 if (dump_enabled_p ())
886 dump_printf_loc (MSG_NOTE, vect_location, 590 dump_printf_loc (MSG_NOTE, vect_location,
887 "Detected reduction.\n"); 591 "Detected reduction.\n");
888 592
889 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def; 593 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def;
890 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) = 594 STMT_VINFO_DEF_TYPE (reduc_stmt_info) = vect_reduction_def;
891 vect_reduction_def;
892 /* Store the reduction cycles for possible vectorization in 595 /* Store the reduction cycles for possible vectorization in
893 loop-aware SLP if it was not detected as reduction 596 loop-aware SLP if it was not detected as reduction
894 chain. */ 597 chain. */
895 if (! GROUP_FIRST_ELEMENT (vinfo_for_stmt (reduc_stmt))) 598 if (! REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info))
896 LOOP_VINFO_REDUCTIONS (loop_vinfo).safe_push (reduc_stmt); 599 LOOP_VINFO_REDUCTIONS (loop_vinfo).safe_push
600 (reduc_stmt_info);
897 } 601 }
898 } 602 }
899 } 603 }
900 else 604 else
901 if (dump_enabled_p ()) 605 if (dump_enabled_p ())
944 648
945 if (loop->inner) 649 if (loop->inner)
946 vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner); 650 vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner);
947 } 651 }
948 652
949 /* Transfer group and reduction information from STMT to its pattern stmt. */ 653 /* Transfer group and reduction information from STMT_INFO to its
654 pattern stmt. */
950 655
951 static void 656 static void
952 vect_fixup_reduc_chain (gimple *stmt) 657 vect_fixup_reduc_chain (stmt_vec_info stmt_info)
953 { 658 {
954 gimple *firstp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)); 659 stmt_vec_info firstp = STMT_VINFO_RELATED_STMT (stmt_info);
955 gimple *stmtp; 660 stmt_vec_info stmtp;
956 gcc_assert (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (firstp)) 661 gcc_assert (!REDUC_GROUP_FIRST_ELEMENT (firstp)
957 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))); 662 && REDUC_GROUP_FIRST_ELEMENT (stmt_info));
958 GROUP_SIZE (vinfo_for_stmt (firstp)) = GROUP_SIZE (vinfo_for_stmt (stmt)); 663 REDUC_GROUP_SIZE (firstp) = REDUC_GROUP_SIZE (stmt_info);
959 do 664 do
960 { 665 {
961 stmtp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)); 666 stmtp = STMT_VINFO_RELATED_STMT (stmt_info);
962 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmtp)) = firstp; 667 REDUC_GROUP_FIRST_ELEMENT (stmtp) = firstp;
963 stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)); 668 stmt_info = REDUC_GROUP_NEXT_ELEMENT (stmt_info);
964 if (stmt) 669 if (stmt_info)
965 GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmtp)) 670 REDUC_GROUP_NEXT_ELEMENT (stmtp)
966 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)); 671 = STMT_VINFO_RELATED_STMT (stmt_info);
967 } 672 }
968 while (stmt); 673 while (stmt_info);
969 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmtp)) = vect_reduction_def; 674 STMT_VINFO_DEF_TYPE (stmtp) = vect_reduction_def;
970 } 675 }
971 676
972 /* Fixup scalar cycles that now have their stmts detected as patterns. */ 677 /* Fixup scalar cycles that now have their stmts detected as patterns. */
973 678
974 static void 679 static void
975 vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo) 680 vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo)
976 { 681 {
977 gimple *first; 682 stmt_vec_info first;
978 unsigned i; 683 unsigned i;
979 684
980 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo), i, first) 685 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo), i, first)
981 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first))) 686 if (STMT_VINFO_IN_PATTERN_P (first))
982 { 687 {
983 gimple *next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first)); 688 stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (first);
984 while (next) 689 while (next)
985 { 690 {
986 if (! STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))) 691 if (! STMT_VINFO_IN_PATTERN_P (next))
987 break; 692 break;
988 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next)); 693 next = REDUC_GROUP_NEXT_ELEMENT (next);
989 } 694 }
990 /* If not all stmt in the chain are patterns try to handle 695 /* If not all stmt in the chain are patterns try to handle
991 the chain without patterns. */ 696 the chain without patterns. */
992 if (! next) 697 if (! next)
993 { 698 {
994 vect_fixup_reduc_chain (first); 699 vect_fixup_reduc_chain (first);
995 LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)[i] 700 LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)[i]
996 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first)); 701 = STMT_VINFO_RELATED_STMT (first);
997 } 702 }
998 } 703 }
999 } 704 }
1000 705
1001 /* Function vect_get_loop_niters. 706 /* Function vect_get_loop_niters.
1018 gcond *cond = get_loop_exit_condition (loop); 723 gcond *cond = get_loop_exit_condition (loop);
1019 724
1020 *assumptions = boolean_true_node; 725 *assumptions = boolean_true_node;
1021 *number_of_iterationsm1 = chrec_dont_know; 726 *number_of_iterationsm1 = chrec_dont_know;
1022 *number_of_iterations = chrec_dont_know; 727 *number_of_iterations = chrec_dont_know;
1023 if (dump_enabled_p ()) 728 DUMP_VECT_SCOPE ("get_loop_niters");
1024 dump_printf_loc (MSG_NOTE, vect_location,
1025 "=== get_loop_niters ===\n");
1026 729
1027 if (!exit) 730 if (!exit)
1028 return cond; 731 return cond;
1029 732
1030 niter = chrec_dont_know; 733 niter = chrec_dont_know;
1053 fold_build1 (TRUTH_NOT_EXPR, 756 fold_build1 (TRUTH_NOT_EXPR,
1054 boolean_type_node, 757 boolean_type_node,
1055 may_be_zero)); 758 may_be_zero));
1056 else 759 else
1057 niter = fold_build3 (COND_EXPR, TREE_TYPE (niter), may_be_zero, 760 niter = fold_build3 (COND_EXPR, TREE_TYPE (niter), may_be_zero,
1058 build_int_cst (TREE_TYPE (niter), 0), niter); 761 build_int_cst (TREE_TYPE (niter), 0),
762 rewrite_to_non_trapping_overflow (niter));
1059 763
1060 may_be_zero = NULL_TREE; 764 may_be_zero = NULL_TREE;
1061 } 765 }
1062 else if (integer_nonzerop (may_be_zero)) 766 else if (integer_nonzerop (may_be_zero))
1063 { 767 {
1099 803
1100 804
1101 /* Create and initialize a new loop_vec_info struct for LOOP_IN, as well as 805 /* Create and initialize a new loop_vec_info struct for LOOP_IN, as well as
1102 stmt_vec_info structs for all the stmts in LOOP_IN. */ 806 stmt_vec_info structs for all the stmts in LOOP_IN. */
1103 807
1104 _loop_vec_info::_loop_vec_info (struct loop *loop_in) 808 _loop_vec_info::_loop_vec_info (struct loop *loop_in, vec_info_shared *shared)
1105 : vec_info (vec_info::loop, init_cost (loop_in)), 809 : vec_info (vec_info::loop, init_cost (loop_in), shared),
1106 loop (loop_in), 810 loop (loop_in),
1107 bbs (XCNEWVEC (basic_block, loop->num_nodes)), 811 bbs (XCNEWVEC (basic_block, loop->num_nodes)),
1108 num_itersm1 (NULL_TREE), 812 num_itersm1 (NULL_TREE),
1109 num_iters (NULL_TREE), 813 num_iters (NULL_TREE),
1110 num_iters_unchanged (NULL_TREE), 814 num_iters_unchanged (NULL_TREE),
1111 num_iters_assumptions (NULL_TREE), 815 num_iters_assumptions (NULL_TREE),
1112 th (0), 816 th (0),
817 versioning_threshold (0),
1113 vectorization_factor (0), 818 vectorization_factor (0),
1114 max_vectorization_factor (0), 819 max_vectorization_factor (0),
820 mask_skip_niters (NULL_TREE),
821 mask_compare_type (NULL_TREE),
1115 unaligned_dr (NULL), 822 unaligned_dr (NULL),
1116 peeling_for_alignment (0), 823 peeling_for_alignment (0),
1117 ptr_mask (0), 824 ptr_mask (0),
825 ivexpr_map (NULL),
1118 slp_unrolling_factor (1), 826 slp_unrolling_factor (1),
1119 single_scalar_iteration_cost (0), 827 single_scalar_iteration_cost (0),
1120 vectorizable (false), 828 vectorizable (false),
829 can_fully_mask_p (true),
830 fully_masked_p (false),
1121 peeling_for_gaps (false), 831 peeling_for_gaps (false),
1122 peeling_for_niter (false), 832 peeling_for_niter (false),
1123 operands_swapped (false), 833 operands_swapped (false),
1124 no_data_dependencies (false), 834 no_data_dependencies (false),
1125 has_mask_store (false), 835 has_mask_store (false),
1126 scalar_loop (NULL), 836 scalar_loop (NULL),
1127 orig_loop_info (NULL) 837 orig_loop_info (NULL)
1128 { 838 {
1129 /* Create/Update stmt_info for all stmts in the loop. */
1130 basic_block *body = get_loop_body (loop);
1131 for (unsigned int i = 0; i < loop->num_nodes; i++)
1132 {
1133 basic_block bb = body[i];
1134 gimple_stmt_iterator si;
1135
1136 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1137 {
1138 gimple *phi = gsi_stmt (si);
1139 gimple_set_uid (phi, 0);
1140 set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, this));
1141 }
1142
1143 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1144 {
1145 gimple *stmt = gsi_stmt (si);
1146 gimple_set_uid (stmt, 0);
1147 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, this));
1148 }
1149 }
1150 free (body);
1151
1152 /* CHECKME: We want to visit all BBs before their successors (except for 839 /* CHECKME: We want to visit all BBs before their successors (except for
1153 latch blocks, for which this assertion wouldn't hold). In the simple 840 latch blocks, for which this assertion wouldn't hold). In the simple
1154 case of the loop forms we allow, a dfs order of the BBs would the same 841 case of the loop forms we allow, a dfs order of the BBs would the same
1155 as reversed postorder traversal, so we are safe. */ 842 as reversed postorder traversal, so we are safe. */
1156 843
1157 unsigned int nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p, 844 unsigned int nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p,
1158 bbs, loop->num_nodes, loop); 845 bbs, loop->num_nodes, loop);
1159 gcc_assert (nbbs == loop->num_nodes); 846 gcc_assert (nbbs == loop->num_nodes);
847
848 for (unsigned int i = 0; i < nbbs; i++)
849 {
850 basic_block bb = bbs[i];
851 gimple_stmt_iterator si;
852
853 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
854 {
855 gimple *phi = gsi_stmt (si);
856 gimple_set_uid (phi, 0);
857 add_stmt (phi);
858 }
859
860 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
861 {
862 gimple *stmt = gsi_stmt (si);
863 gimple_set_uid (stmt, 0);
864 add_stmt (stmt);
865 }
866 }
1160 } 867 }
1161 868
869 /* Free all levels of MASKS. */
870
871 void
872 release_vec_loop_masks (vec_loop_masks *masks)
873 {
874 rgroup_masks *rgm;
875 unsigned int i;
876 FOR_EACH_VEC_ELT (*masks, i, rgm)
877 rgm->masks.release ();
878 masks->release ();
879 }
1162 880
1163 /* Free all memory used by the _loop_vec_info, as well as all the 881 /* Free all memory used by the _loop_vec_info, as well as all the
1164 stmt_vec_info structs of all the stmts in the loop. */ 882 stmt_vec_info structs of all the stmts in the loop. */
1165 883
1166 _loop_vec_info::~_loop_vec_info () 884 _loop_vec_info::~_loop_vec_info ()
1171 889
1172 nbbs = loop->num_nodes; 890 nbbs = loop->num_nodes;
1173 for (j = 0; j < nbbs; j++) 891 for (j = 0; j < nbbs; j++)
1174 { 892 {
1175 basic_block bb = bbs[j]; 893 basic_block bb = bbs[j];
1176 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si))
1177 free_stmt_vec_info (gsi_stmt (si));
1178
1179 for (si = gsi_start_bb (bb); !gsi_end_p (si); ) 894 for (si = gsi_start_bb (bb); !gsi_end_p (si); )
1180 { 895 {
1181 gimple *stmt = gsi_stmt (si); 896 gimple *stmt = gsi_stmt (si);
1182 897
1183 /* We may have broken canonical form by moving a constant 898 /* We may have broken canonical form by moving a constant
1213 gimple_assign_rhs3_ptr (stmt)); 928 gimple_assign_rhs3_ptr (stmt));
1214 } 929 }
1215 } 930 }
1216 } 931 }
1217 } 932 }
1218
1219 /* Free stmt_vec_info. */
1220 free_stmt_vec_info (stmt);
1221 gsi_next (&si); 933 gsi_next (&si);
1222 } 934 }
1223 } 935 }
1224 936
1225 free (bbs); 937 free (bbs);
1226 938
939 release_vec_loop_masks (&masks);
940 delete ivexpr_map;
941
1227 loop->aux = NULL; 942 loop->aux = NULL;
1228 } 943 }
1229 944
945 /* Return an invariant or register for EXPR and emit necessary
946 computations in the LOOP_VINFO loop preheader. */
947
948 tree
949 cse_and_gimplify_to_preheader (loop_vec_info loop_vinfo, tree expr)
950 {
951 if (is_gimple_reg (expr)
952 || is_gimple_min_invariant (expr))
953 return expr;
954
955 if (! loop_vinfo->ivexpr_map)
956 loop_vinfo->ivexpr_map = new hash_map<tree_operand_hash, tree>;
957 tree &cached = loop_vinfo->ivexpr_map->get_or_insert (expr);
958 if (! cached)
959 {
960 gimple_seq stmts = NULL;
961 cached = force_gimple_operand (unshare_expr (expr),
962 &stmts, true, NULL_TREE);
963 if (stmts)
964 {
965 edge e = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
966 gsi_insert_seq_on_edge_immediate (e, stmts);
967 }
968 }
969 return cached;
970 }
971
972 /* Return true if we can use CMP_TYPE as the comparison type to produce
973 all masks required to mask LOOP_VINFO. */
974
975 static bool
976 can_produce_all_loop_masks_p (loop_vec_info loop_vinfo, tree cmp_type)
977 {
978 rgroup_masks *rgm;
979 unsigned int i;
980 FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), i, rgm)
981 if (rgm->mask_type != NULL_TREE
982 && !direct_internal_fn_supported_p (IFN_WHILE_ULT,
983 cmp_type, rgm->mask_type,
984 OPTIMIZE_FOR_SPEED))
985 return false;
986 return true;
987 }
988
989 /* Calculate the maximum number of scalars per iteration for every
990 rgroup in LOOP_VINFO. */
991
992 static unsigned int
993 vect_get_max_nscalars_per_iter (loop_vec_info loop_vinfo)
994 {
995 unsigned int res = 1;
996 unsigned int i;
997 rgroup_masks *rgm;
998 FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), i, rgm)
999 res = MAX (res, rgm->max_nscalars_per_iter);
1000 return res;
1001 }
1002
1003 /* Each statement in LOOP_VINFO can be masked where necessary. Check
1004 whether we can actually generate the masks required. Return true if so,
1005 storing the type of the scalar IV in LOOP_VINFO_MASK_COMPARE_TYPE. */
1006
1007 static bool
1008 vect_verify_full_masking (loop_vec_info loop_vinfo)
1009 {
1010 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1011 unsigned int min_ni_width;
1012
1013 /* Use a normal loop if there are no statements that need masking.
1014 This only happens in rare degenerate cases: it means that the loop
1015 has no loads, no stores, and no live-out values. */
1016 if (LOOP_VINFO_MASKS (loop_vinfo).is_empty ())
1017 return false;
1018
1019 /* Get the maximum number of iterations that is representable
1020 in the counter type. */
1021 tree ni_type = TREE_TYPE (LOOP_VINFO_NITERSM1 (loop_vinfo));
1022 widest_int max_ni = wi::to_widest (TYPE_MAX_VALUE (ni_type)) + 1;
1023
1024 /* Get a more refined estimate for the number of iterations. */
1025 widest_int max_back_edges;
1026 if (max_loop_iterations (loop, &max_back_edges))
1027 max_ni = wi::smin (max_ni, max_back_edges + 1);
1028
1029 /* Account for rgroup masks, in which each bit is replicated N times. */
1030 max_ni *= vect_get_max_nscalars_per_iter (loop_vinfo);
1031
1032 /* Work out how many bits we need to represent the limit. */
1033 min_ni_width = wi::min_precision (max_ni, UNSIGNED);
1034
1035 /* Find a scalar mode for which WHILE_ULT is supported. */
1036 opt_scalar_int_mode cmp_mode_iter;
1037 tree cmp_type = NULL_TREE;
1038 FOR_EACH_MODE_IN_CLASS (cmp_mode_iter, MODE_INT)
1039 {
1040 unsigned int cmp_bits = GET_MODE_BITSIZE (cmp_mode_iter.require ());
1041 if (cmp_bits >= min_ni_width
1042 && targetm.scalar_mode_supported_p (cmp_mode_iter.require ()))
1043 {
1044 tree this_type = build_nonstandard_integer_type (cmp_bits, true);
1045 if (this_type
1046 && can_produce_all_loop_masks_p (loop_vinfo, this_type))
1047 {
1048 /* Although we could stop as soon as we find a valid mode,
1049 it's often better to continue until we hit Pmode, since the
1050 operands to the WHILE are more likely to be reusable in
1051 address calculations. */
1052 cmp_type = this_type;
1053 if (cmp_bits >= GET_MODE_BITSIZE (Pmode))
1054 break;
1055 }
1056 }
1057 }
1058
1059 if (!cmp_type)
1060 return false;
1061
1062 LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo) = cmp_type;
1063 return true;
1064 }
1230 1065
1231 /* Calculate the cost of one scalar iteration of the loop. */ 1066 /* Calculate the cost of one scalar iteration of the loop. */
1232 static void 1067 static void
1233 vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo) 1068 vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo)
1234 { 1069 {
1235 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 1070 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1236 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); 1071 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1237 int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0; 1072 int nbbs = loop->num_nodes, factor;
1238 int innerloop_iters, i; 1073 int innerloop_iters, i;
1239 1074
1240 /* Count statements in scalar loop. Using this as scalar cost for a single 1075 DUMP_VECT_SCOPE ("vect_compute_single_scalar_iteration_cost");
1241 iteration for now. 1076
1242 1077 /* Gather costs for statements in the scalar loop. */
1243 TODO: Add outer loop support.
1244
1245 TODO: Consider assigning different costs to different scalar
1246 statements. */
1247 1078
1248 /* FORNOW. */ 1079 /* FORNOW. */
1249 innerloop_iters = 1; 1080 innerloop_iters = 1;
1250 if (loop->inner) 1081 if (loop->inner)
1251 innerloop_iters = 50; /* FIXME */ 1082 innerloop_iters = 50; /* FIXME */
1261 factor = 1; 1092 factor = 1;
1262 1093
1263 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) 1094 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
1264 { 1095 {
1265 gimple *stmt = gsi_stmt (si); 1096 gimple *stmt = gsi_stmt (si);
1266 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1097 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt);
1267 1098
1268 if (!is_gimple_assign (stmt) && !is_gimple_call (stmt)) 1099 if (!is_gimple_assign (stmt) && !is_gimple_call (stmt))
1269 continue; 1100 continue;
1270 1101
1271 /* Skip stmts that are not vectorized inside the loop. */ 1102 /* Skip stmts that are not vectorized inside the loop. */
1285 kind = scalar_store; 1116 kind = scalar_store;
1286 } 1117 }
1287 else 1118 else
1288 kind = scalar_stmt; 1119 kind = scalar_stmt;
1289 1120
1290 scalar_single_iter_cost 1121 record_stmt_cost (&LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
1291 += record_stmt_cost (&LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), 1122 factor, kind, stmt_info, 0, vect_prologue);
1292 factor, kind, stmt_info, 0, vect_prologue);
1293 } 1123 }
1294 } 1124 }
1295 LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo) 1125
1296 = scalar_single_iter_cost; 1126 /* Now accumulate cost. */
1127 void *target_cost_data = init_cost (loop);
1128 stmt_info_for_cost *si;
1129 int j;
1130 FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
1131 j, si)
1132 (void) add_stmt_cost (target_cost_data, si->count,
1133 si->kind, si->stmt_info, si->misalign,
1134 vect_body);
1135 unsigned dummy, body_cost = 0;
1136 finish_cost (target_cost_data, &dummy, &body_cost, &dummy);
1137 destroy_cost_data (target_cost_data);
1138 LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo) = body_cost;
1297 } 1139 }
1298 1140
1299 1141
1300 /* Function vect_analyze_loop_form_1. 1142 /* Function vect_analyze_loop_form_1.
1301 1143
1304 - the loop has a single entry and exit 1146 - the loop has a single entry and exit
1305 - the loop exit condition is simple enough 1147 - the loop exit condition is simple enough
1306 - the number of iterations can be analyzed, i.e, a countable loop. The 1148 - the number of iterations can be analyzed, i.e, a countable loop. The
1307 niter could be analyzed under some assumptions. */ 1149 niter could be analyzed under some assumptions. */
1308 1150
1309 bool 1151 opt_result
1310 vect_analyze_loop_form_1 (struct loop *loop, gcond **loop_cond, 1152 vect_analyze_loop_form_1 (struct loop *loop, gcond **loop_cond,
1311 tree *assumptions, tree *number_of_iterationsm1, 1153 tree *assumptions, tree *number_of_iterationsm1,
1312 tree *number_of_iterations, gcond **inner_loop_cond) 1154 tree *number_of_iterations, gcond **inner_loop_cond)
1313 { 1155 {
1314 if (dump_enabled_p ()) 1156 DUMP_VECT_SCOPE ("vect_analyze_loop_form");
1315 dump_printf_loc (MSG_NOTE, vect_location,
1316 "=== vect_analyze_loop_form ===\n");
1317 1157
1318 /* Different restrictions apply when we are considering an inner-most loop, 1158 /* Different restrictions apply when we are considering an inner-most loop,
1319 vs. an outer (nested) loop. 1159 vs. an outer (nested) loop.
1320 (FORNOW. May want to relax some of these restrictions in the future). */ 1160 (FORNOW. May want to relax some of these restrictions in the future). */
1321 1161
1332 | +--> latch --+ 1172 | +--> latch --+
1333 | 1173 |
1334 (exit-bb) */ 1174 (exit-bb) */
1335 1175
1336 if (loop->num_nodes != 2) 1176 if (loop->num_nodes != 2)
1337 { 1177 return opt_result::failure_at (vect_location,
1338 if (dump_enabled_p ()) 1178 "not vectorized:"
1339 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1179 " control flow in loop.\n");
1340 "not vectorized: control flow in loop.\n");
1341 return false;
1342 }
1343 1180
1344 if (empty_block_p (loop->header)) 1181 if (empty_block_p (loop->header))
1345 { 1182 return opt_result::failure_at (vect_location,
1346 if (dump_enabled_p ()) 1183 "not vectorized: empty loop.\n");
1347 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1348 "not vectorized: empty loop.\n");
1349 return false;
1350 }
1351 } 1184 }
1352 else 1185 else
1353 { 1186 {
1354 struct loop *innerloop = loop->inner; 1187 struct loop *innerloop = loop->inner;
1355 edge entryedge; 1188 edge entryedge;
1370 1203
1371 The inner-loop has the properties expected of inner-most loops 1204 The inner-loop has the properties expected of inner-most loops
1372 as described above. */ 1205 as described above. */
1373 1206
1374 if ((loop->inner)->inner || (loop->inner)->next) 1207 if ((loop->inner)->inner || (loop->inner)->next)
1375 { 1208 return opt_result::failure_at (vect_location,
1376 if (dump_enabled_p ()) 1209 "not vectorized:"
1377 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1210 " multiple nested loops.\n");
1378 "not vectorized: multiple nested loops.\n");
1379 return false;
1380 }
1381 1211
1382 if (loop->num_nodes != 5) 1212 if (loop->num_nodes != 5)
1383 { 1213 return opt_result::failure_at (vect_location,
1384 if (dump_enabled_p ()) 1214 "not vectorized:"
1385 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1215 " control flow in loop.\n");
1386 "not vectorized: control flow in loop.\n");
1387 return false;
1388 }
1389 1216
1390 entryedge = loop_preheader_edge (innerloop); 1217 entryedge = loop_preheader_edge (innerloop);
1391 if (entryedge->src != loop->header 1218 if (entryedge->src != loop->header
1392 || !single_exit (innerloop) 1219 || !single_exit (innerloop)
1393 || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)->src) 1220 || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)->src)
1221 return opt_result::failure_at (vect_location,
1222 "not vectorized:"
1223 " unsupported outerloop form.\n");
1224
1225 /* Analyze the inner-loop. */
1226 tree inner_niterm1, inner_niter, inner_assumptions;
1227 opt_result res
1228 = vect_analyze_loop_form_1 (loop->inner, inner_loop_cond,
1229 &inner_assumptions, &inner_niterm1,
1230 &inner_niter, NULL);
1231 if (!res)
1394 { 1232 {
1395 if (dump_enabled_p ()) 1233 if (dump_enabled_p ())
1396 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1234 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1397 "not vectorized: unsupported outerloop form.\n");
1398 return false;
1399 }
1400
1401 /* Analyze the inner-loop. */
1402 tree inner_niterm1, inner_niter, inner_assumptions;
1403 if (! vect_analyze_loop_form_1 (loop->inner, inner_loop_cond,
1404 &inner_assumptions, &inner_niterm1,
1405 &inner_niter, NULL)
1406 /* Don't support analyzing niter under assumptions for inner
1407 loop. */
1408 || !integer_onep (inner_assumptions))
1409 {
1410 if (dump_enabled_p ())
1411 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1412 "not vectorized: Bad inner loop.\n"); 1235 "not vectorized: Bad inner loop.\n");
1413 return false; 1236 return res;
1414 } 1237 }
1238
1239 /* Don't support analyzing niter under assumptions for inner
1240 loop. */
1241 if (!integer_onep (inner_assumptions))
1242 return opt_result::failure_at (vect_location,
1243 "not vectorized: Bad inner loop.\n");
1415 1244
1416 if (!expr_invariant_in_loop_p (loop, inner_niter)) 1245 if (!expr_invariant_in_loop_p (loop, inner_niter))
1417 { 1246 return opt_result::failure_at (vect_location,
1418 if (dump_enabled_p ()) 1247 "not vectorized: inner-loop count not"
1419 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1248 " invariant.\n");
1420 "not vectorized: inner-loop count not"
1421 " invariant.\n");
1422 return false;
1423 }
1424 1249
1425 if (dump_enabled_p ()) 1250 if (dump_enabled_p ())
1426 dump_printf_loc (MSG_NOTE, vect_location, 1251 dump_printf_loc (MSG_NOTE, vect_location,
1427 "Considering outer-loop vectorization.\n"); 1252 "Considering outer-loop vectorization.\n");
1428 } 1253 }
1429 1254
1430 if (!single_exit (loop) 1255 if (!single_exit (loop))
1431 || EDGE_COUNT (loop->header->preds) != 2) 1256 return opt_result::failure_at (vect_location,
1432 { 1257 "not vectorized: multiple exits.\n");
1433 if (dump_enabled_p ()) 1258 if (EDGE_COUNT (loop->header->preds) != 2)
1434 { 1259 return opt_result::failure_at (vect_location,
1435 if (!single_exit (loop)) 1260 "not vectorized:"
1436 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1261 " too many incoming edges.\n");
1437 "not vectorized: multiple exits.\n");
1438 else if (EDGE_COUNT (loop->header->preds) != 2)
1439 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1440 "not vectorized: too many incoming edges.\n");
1441 }
1442 return false;
1443 }
1444 1262
1445 /* We assume that the loop exit condition is at the end of the loop. i.e, 1263 /* We assume that the loop exit condition is at the end of the loop. i.e,
1446 that the loop is represented as a do-while (with a proper if-guard 1264 that the loop is represented as a do-while (with a proper if-guard
1447 before the loop if needed), where the loop header contains all the 1265 before the loop if needed), where the loop header contains all the
1448 executable statements, and the latch is empty. */ 1266 executable statements, and the latch is empty. */
1449 if (!empty_block_p (loop->latch) 1267 if (!empty_block_p (loop->latch)
1450 || !gimple_seq_empty_p (phi_nodes (loop->latch))) 1268 || !gimple_seq_empty_p (phi_nodes (loop->latch)))
1451 { 1269 return opt_result::failure_at (vect_location,
1452 if (dump_enabled_p ()) 1270 "not vectorized: latch block not empty.\n");
1453 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1454 "not vectorized: latch block not empty.\n");
1455 return false;
1456 }
1457 1271
1458 /* Make sure the exit is not abnormal. */ 1272 /* Make sure the exit is not abnormal. */
1459 edge e = single_exit (loop); 1273 edge e = single_exit (loop);
1460 if (e->flags & EDGE_ABNORMAL) 1274 if (e->flags & EDGE_ABNORMAL)
1461 { 1275 return opt_result::failure_at (vect_location,
1462 if (dump_enabled_p ()) 1276 "not vectorized:"
1463 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1277 " abnormal loop exit edge.\n");
1464 "not vectorized: abnormal loop exit edge.\n");
1465 return false;
1466 }
1467 1278
1468 *loop_cond = vect_get_loop_niters (loop, assumptions, number_of_iterations, 1279 *loop_cond = vect_get_loop_niters (loop, assumptions, number_of_iterations,
1469 number_of_iterationsm1); 1280 number_of_iterationsm1);
1470 if (!*loop_cond) 1281 if (!*loop_cond)
1471 { 1282 return opt_result::failure_at
1472 if (dump_enabled_p ()) 1283 (vect_location,
1473 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1284 "not vectorized: complicated exit condition.\n");
1474 "not vectorized: complicated exit condition.\n");
1475 return false;
1476 }
1477 1285
1478 if (integer_zerop (*assumptions) 1286 if (integer_zerop (*assumptions)
1479 || !*number_of_iterations 1287 || !*number_of_iterations
1480 || chrec_contains_undetermined (*number_of_iterations)) 1288 || chrec_contains_undetermined (*number_of_iterations))
1481 { 1289 return opt_result::failure_at
1482 if (dump_enabled_p ()) 1290 (*loop_cond,
1483 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1291 "not vectorized: number of iterations cannot be computed.\n");
1484 "not vectorized: number of iterations cannot be "
1485 "computed.\n");
1486 return false;
1487 }
1488 1292
1489 if (integer_zerop (*number_of_iterations)) 1293 if (integer_zerop (*number_of_iterations))
1490 { 1294 return opt_result::failure_at
1491 if (dump_enabled_p ()) 1295 (*loop_cond,
1492 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1296 "not vectorized: number of iterations = 0.\n");
1493 "not vectorized: number of iterations = 0.\n"); 1297
1494 return false; 1298 return opt_result::success ();
1495 }
1496
1497 return true;
1498 } 1299 }
1499 1300
1500 /* Analyze LOOP form and return a loop_vec_info if it is of suitable form. */ 1301 /* Analyze LOOP form and return a loop_vec_info if it is of suitable form. */
1501 1302
1502 loop_vec_info 1303 opt_loop_vec_info
1503 vect_analyze_loop_form (struct loop *loop) 1304 vect_analyze_loop_form (struct loop *loop, vec_info_shared *shared)
1504 { 1305 {
1505 tree assumptions, number_of_iterations, number_of_iterationsm1; 1306 tree assumptions, number_of_iterations, number_of_iterationsm1;
1506 gcond *loop_cond, *inner_loop_cond = NULL; 1307 gcond *loop_cond, *inner_loop_cond = NULL;
1507 1308
1508 if (! vect_analyze_loop_form_1 (loop, &loop_cond, 1309 opt_result res
1509 &assumptions, &number_of_iterationsm1, 1310 = vect_analyze_loop_form_1 (loop, &loop_cond,
1510 &number_of_iterations, &inner_loop_cond)) 1311 &assumptions, &number_of_iterationsm1,
1511 return NULL; 1312 &number_of_iterations, &inner_loop_cond);
1512 1313 if (!res)
1513 loop_vec_info loop_vinfo = new _loop_vec_info (loop); 1314 return opt_loop_vec_info::propagate_failure (res);
1315
1316 loop_vec_info loop_vinfo = new _loop_vec_info (loop, shared);
1514 LOOP_VINFO_NITERSM1 (loop_vinfo) = number_of_iterationsm1; 1317 LOOP_VINFO_NITERSM1 (loop_vinfo) = number_of_iterationsm1;
1515 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations; 1318 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations;
1516 LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations; 1319 LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations;
1517 if (!integer_onep (assumptions)) 1320 if (!integer_onep (assumptions))
1518 { 1321 {
1537 dump_generic_expr (MSG_NOTE, TDF_DETAILS, number_of_iterations); 1340 dump_generic_expr (MSG_NOTE, TDF_DETAILS, number_of_iterations);
1538 dump_printf (MSG_NOTE, "\n"); 1341 dump_printf (MSG_NOTE, "\n");
1539 } 1342 }
1540 } 1343 }
1541 1344
1542 STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type; 1345 stmt_vec_info loop_cond_info = loop_vinfo->lookup_stmt (loop_cond);
1346 STMT_VINFO_TYPE (loop_cond_info) = loop_exit_ctrl_vec_info_type;
1543 if (inner_loop_cond) 1347 if (inner_loop_cond)
1544 STMT_VINFO_TYPE (vinfo_for_stmt (inner_loop_cond)) 1348 {
1545 = loop_exit_ctrl_vec_info_type; 1349 stmt_vec_info inner_loop_cond_info
1350 = loop_vinfo->lookup_stmt (inner_loop_cond);
1351 STMT_VINFO_TYPE (inner_loop_cond_info) = loop_exit_ctrl_vec_info_type;
1352 }
1546 1353
1547 gcc_assert (!loop->aux); 1354 gcc_assert (!loop->aux);
1548 loop->aux = loop_vinfo; 1355 loop->aux = loop_vinfo;
1549 return loop_vinfo; 1356 return opt_loop_vec_info::success (loop_vinfo);
1550 } 1357 }
1551 1358
1552 1359
1553 1360
1554 /* Scan the loop stmts and dependent on whether there are any (non-)SLP 1361 /* Scan the loop stmts and dependent on whether there are any (non-)SLP
1558 vect_update_vf_for_slp (loop_vec_info loop_vinfo) 1365 vect_update_vf_for_slp (loop_vec_info loop_vinfo)
1559 { 1366 {
1560 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 1367 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1561 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); 1368 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1562 int nbbs = loop->num_nodes; 1369 int nbbs = loop->num_nodes;
1563 unsigned int vectorization_factor; 1370 poly_uint64 vectorization_factor;
1564 int i; 1371 int i;
1565 1372
1566 if (dump_enabled_p ()) 1373 DUMP_VECT_SCOPE ("vect_update_vf_for_slp");
1567 dump_printf_loc (MSG_NOTE, vect_location,
1568 "=== vect_update_vf_for_slp ===\n");
1569 1374
1570 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 1375 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1571 gcc_assert (vectorization_factor != 0); 1376 gcc_assert (known_ne (vectorization_factor, 0U));
1572 1377
1573 /* If all the stmts in the loop can be SLPed, we perform only SLP, and 1378 /* If all the stmts in the loop can be SLPed, we perform only SLP, and
1574 vectorization factor of the loop is the unrolling factor required by 1379 vectorization factor of the loop is the unrolling factor required by
1575 the SLP instances. If that unrolling factor is 1, we say, that we 1380 the SLP instances. If that unrolling factor is 1, we say, that we
1576 perform pure SLP on loop - cross iteration parallelism is not 1381 perform pure SLP on loop - cross iteration parallelism is not
1580 { 1385 {
1581 basic_block bb = bbs[i]; 1386 basic_block bb = bbs[i];
1582 for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); 1387 for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
1583 gsi_next (&si)) 1388 gsi_next (&si))
1584 { 1389 {
1585 gimple *stmt = gsi_stmt (si); 1390 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
1586 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); 1391 stmt_info = vect_stmt_to_vectorize (stmt_info);
1587 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
1588 && STMT_VINFO_RELATED_STMT (stmt_info))
1589 {
1590 stmt = STMT_VINFO_RELATED_STMT (stmt_info);
1591 stmt_info = vinfo_for_stmt (stmt);
1592 }
1593 if ((STMT_VINFO_RELEVANT_P (stmt_info) 1392 if ((STMT_VINFO_RELEVANT_P (stmt_info)
1594 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))) 1393 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info)))
1595 && !PURE_SLP_STMT (stmt_info)) 1394 && !PURE_SLP_STMT (stmt_info))
1596 /* STMT needs both SLP and loop-based vectorization. */ 1395 /* STMT needs both SLP and loop-based vectorization. */
1597 only_slp_in_loop = false; 1396 only_slp_in_loop = false;
1606 } 1405 }
1607 else 1406 else
1608 { 1407 {
1609 dump_printf_loc (MSG_NOTE, vect_location, 1408 dump_printf_loc (MSG_NOTE, vect_location,
1610 "Loop contains SLP and non-SLP stmts\n"); 1409 "Loop contains SLP and non-SLP stmts\n");
1410 /* Both the vectorization factor and unroll factor have the form
1411 current_vector_size * X for some rational X, so they must have
1412 a common multiple. */
1611 vectorization_factor 1413 vectorization_factor
1612 = least_common_multiple (vectorization_factor, 1414 = force_common_multiple (vectorization_factor,
1613 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo)); 1415 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo));
1614 } 1416 }
1615 1417
1616 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor; 1418 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor;
1617 if (dump_enabled_p ()) 1419 if (dump_enabled_p ())
1618 dump_printf_loc (MSG_NOTE, vect_location, 1420 {
1619 "Updating vectorization factor to %d\n", 1421 dump_printf_loc (MSG_NOTE, vect_location,
1620 vectorization_factor); 1422 "Updating vectorization factor to ");
1423 dump_dec (MSG_NOTE, vectorization_factor);
1424 dump_printf (MSG_NOTE, ".\n");
1425 }
1621 } 1426 }
1622 1427
1428 /* Return true if STMT_INFO describes a double reduction phi and if
1429 the other phi in the reduction is also relevant for vectorization.
1430 This rejects cases such as:
1431
1432 outer1:
1433 x_1 = PHI <x_3(outer2), ...>;
1434 ...
1435
1436 inner:
1437 x_2 = ...;
1438 ...
1439
1440 outer2:
1441 x_3 = PHI <x_2(inner)>;
1442
1443 if nothing in x_2 or elsewhere makes x_1 relevant. */
1444
1445 static bool
1446 vect_active_double_reduction_p (stmt_vec_info stmt_info)
1447 {
1448 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def)
1449 return false;
1450
1451 return STMT_VINFO_RELEVANT_P (STMT_VINFO_REDUC_DEF (stmt_info));
1452 }
1453
1623 /* Function vect_analyze_loop_operations. 1454 /* Function vect_analyze_loop_operations.
1624 1455
1625 Scan the loop stmts and make sure they are all vectorizable. */ 1456 Scan the loop stmts and make sure they are all vectorizable. */
1626 1457
1627 static bool 1458 static opt_result
1628 vect_analyze_loop_operations (loop_vec_info loop_vinfo) 1459 vect_analyze_loop_operations (loop_vec_info loop_vinfo)
1629 { 1460 {
1630 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 1461 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1631 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); 1462 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1632 int nbbs = loop->num_nodes; 1463 int nbbs = loop->num_nodes;
1633 int i; 1464 int i;
1634 stmt_vec_info stmt_info; 1465 stmt_vec_info stmt_info;
1635 bool need_to_vectorize = false; 1466 bool need_to_vectorize = false;
1636 bool ok; 1467 bool ok;
1637 1468
1638 if (dump_enabled_p ()) 1469 DUMP_VECT_SCOPE ("vect_analyze_loop_operations");
1639 dump_printf_loc (MSG_NOTE, vect_location, 1470
1640 "=== vect_analyze_loop_operations ===\n"); 1471 stmt_vector_for_cost cost_vec;
1472 cost_vec.create (2);
1641 1473
1642 for (i = 0; i < nbbs; i++) 1474 for (i = 0; i < nbbs; i++)
1643 { 1475 {
1644 basic_block bb = bbs[i]; 1476 basic_block bb = bbs[i];
1645 1477
1647 gsi_next (&si)) 1479 gsi_next (&si))
1648 { 1480 {
1649 gphi *phi = si.phi (); 1481 gphi *phi = si.phi ();
1650 ok = true; 1482 ok = true;
1651 1483
1652 stmt_info = vinfo_for_stmt (phi); 1484 stmt_info = loop_vinfo->lookup_stmt (phi);
1653 if (dump_enabled_p ()) 1485 if (dump_enabled_p ())
1654 { 1486 dump_printf_loc (MSG_NOTE, vect_location, "examining phi: %G", phi);
1655 dump_printf_loc (MSG_NOTE, vect_location, "examining phi: ");
1656 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
1657 }
1658 if (virtual_operand_p (gimple_phi_result (phi))) 1487 if (virtual_operand_p (gimple_phi_result (phi)))
1659 continue; 1488 continue;
1660 1489
1661 /* Inner-loop loop-closed exit phi in outer-loop vectorization 1490 /* Inner-loop loop-closed exit phi in outer-loop vectorization
1662 (i.e., a phi in the tail of the outer-loop). */ 1491 (i.e., a phi in the tail of the outer-loop). */
1665 /* FORNOW: we currently don't support the case that these phis 1494 /* FORNOW: we currently don't support the case that these phis
1666 are not used in the outerloop (unless it is double reduction, 1495 are not used in the outerloop (unless it is double reduction,
1667 i.e., this phi is vect_reduction_def), cause this case 1496 i.e., this phi is vect_reduction_def), cause this case
1668 requires to actually do something here. */ 1497 requires to actually do something here. */
1669 if (STMT_VINFO_LIVE_P (stmt_info) 1498 if (STMT_VINFO_LIVE_P (stmt_info)
1670 && STMT_VINFO_DEF_TYPE (stmt_info) 1499 && !vect_active_double_reduction_p (stmt_info))
1671 != vect_double_reduction_def) 1500 return opt_result::failure_at (phi,
1672 { 1501 "Unsupported loop-closed phi"
1673 if (dump_enabled_p ()) 1502 " in outer-loop.\n");
1674 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1675 "Unsupported loop-closed phi in "
1676 "outer-loop.\n");
1677 return false;
1678 }
1679 1503
1680 /* If PHI is used in the outer loop, we check that its operand 1504 /* If PHI is used in the outer loop, we check that its operand
1681 is defined in the inner loop. */ 1505 is defined in the inner loop. */
1682 if (STMT_VINFO_RELEVANT_P (stmt_info)) 1506 if (STMT_VINFO_RELEVANT_P (stmt_info))
1683 { 1507 {
1684 tree phi_op; 1508 tree phi_op;
1685 gimple *op_def_stmt;
1686 1509
1687 if (gimple_phi_num_args (phi) != 1) 1510 if (gimple_phi_num_args (phi) != 1)
1688 return false; 1511 return opt_result::failure_at (phi, "unsupported phi");
1689 1512
1690 phi_op = PHI_ARG_DEF (phi, 0); 1513 phi_op = PHI_ARG_DEF (phi, 0);
1691 if (TREE_CODE (phi_op) != SSA_NAME) 1514 stmt_vec_info op_def_info = loop_vinfo->lookup_def (phi_op);
1692 return false; 1515 if (!op_def_info)
1693 1516 return opt_result::failure_at (phi, "unsupported phi");
1694 op_def_stmt = SSA_NAME_DEF_STMT (phi_op); 1517
1695 if (gimple_nop_p (op_def_stmt) 1518 if (STMT_VINFO_RELEVANT (op_def_info) != vect_used_in_outer
1696 || !flow_bb_inside_loop_p (loop, gimple_bb (op_def_stmt)) 1519 && (STMT_VINFO_RELEVANT (op_def_info)
1697 || !vinfo_for_stmt (op_def_stmt)) 1520 != vect_used_in_outer_by_reduction))
1698 return false; 1521 return opt_result::failure_at (phi, "unsupported phi");
1699
1700 if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1701 != vect_used_in_outer
1702 && STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt))
1703 != vect_used_in_outer_by_reduction)
1704 return false;
1705 } 1522 }
1706 1523
1707 continue; 1524 continue;
1708 } 1525 }
1709 1526
1710 gcc_assert (stmt_info); 1527 gcc_assert (stmt_info);
1711 1528
1712 if ((STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope 1529 if ((STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope
1713 || STMT_VINFO_LIVE_P (stmt_info)) 1530 || STMT_VINFO_LIVE_P (stmt_info))
1714 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def) 1531 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
1715 { 1532 /* A scalar-dependence cycle that we don't support. */
1716 /* A scalar-dependence cycle that we don't support. */ 1533 return opt_result::failure_at (phi,
1717 if (dump_enabled_p ()) 1534 "not vectorized:"
1718 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1535 " scalar dependence cycle.\n");
1719 "not vectorized: scalar dependence cycle.\n");
1720 return false;
1721 }
1722 1536
1723 if (STMT_VINFO_RELEVANT_P (stmt_info)) 1537 if (STMT_VINFO_RELEVANT_P (stmt_info))
1724 { 1538 {
1725 need_to_vectorize = true; 1539 need_to_vectorize = true;
1726 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def 1540 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def
1727 && ! PURE_SLP_STMT (stmt_info)) 1541 && ! PURE_SLP_STMT (stmt_info))
1728 ok = vectorizable_induction (phi, NULL, NULL, NULL); 1542 ok = vectorizable_induction (stmt_info, NULL, NULL, NULL,
1543 &cost_vec);
1729 else if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def 1544 else if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
1730 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle) 1545 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle)
1731 && ! PURE_SLP_STMT (stmt_info)) 1546 && ! PURE_SLP_STMT (stmt_info))
1732 ok = vectorizable_reduction (phi, NULL, NULL, NULL, NULL); 1547 ok = vectorizable_reduction (stmt_info, NULL, NULL, NULL, NULL,
1548 &cost_vec);
1733 } 1549 }
1734 1550
1735 if (ok && STMT_VINFO_LIVE_P (stmt_info)) 1551 /* SLP PHIs are tested by vect_slp_analyze_node_operations. */
1736 ok = vectorizable_live_operation (phi, NULL, NULL, -1, NULL); 1552 if (ok
1553 && STMT_VINFO_LIVE_P (stmt_info)
1554 && !PURE_SLP_STMT (stmt_info))
1555 ok = vectorizable_live_operation (stmt_info, NULL, NULL, -1, NULL,
1556 &cost_vec);
1737 1557
1738 if (!ok) 1558 if (!ok)
1739 { 1559 return opt_result::failure_at (phi,
1740 if (dump_enabled_p ()) 1560 "not vectorized: relevant phi not "
1741 { 1561 "supported: %G",
1742 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1562 static_cast <gimple *> (phi));
1743 "not vectorized: relevant phi not "
1744 "supported: ");
1745 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, phi, 0);
1746 }
1747 return false;
1748 }
1749 } 1563 }
1750 1564
1751 for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); 1565 for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
1752 gsi_next (&si)) 1566 gsi_next (&si))
1753 { 1567 {
1754 gimple *stmt = gsi_stmt (si); 1568 gimple *stmt = gsi_stmt (si);
1755 if (!gimple_clobber_p (stmt) 1569 if (!gimple_clobber_p (stmt))
1756 && !vect_analyze_stmt (stmt, &need_to_vectorize, NULL, NULL)) 1570 {
1757 return false; 1571 opt_result res
1572 = vect_analyze_stmt (loop_vinfo->lookup_stmt (stmt),
1573 &need_to_vectorize,
1574 NULL, NULL, &cost_vec);
1575 if (!res)
1576 return res;
1577 }
1758 } 1578 }
1759 } /* bbs */ 1579 } /* bbs */
1580
1581 add_stmt_costs (loop_vinfo->target_cost_data, &cost_vec);
1582 cost_vec.release ();
1760 1583
1761 /* All operations in the loop are either irrelevant (deal with loop 1584 /* All operations in the loop are either irrelevant (deal with loop
1762 control, or dead), or only used outside the loop and can be moved 1585 control, or dead), or only used outside the loop and can be moved
1763 out of the loop (e.g. invariants, inductions). The loop can be 1586 out of the loop (e.g. invariants, inductions). The loop can be
1764 optimized away by scalar optimizations. We're better off not 1587 optimized away by scalar optimizations. We're better off not
1766 if (!need_to_vectorize) 1589 if (!need_to_vectorize)
1767 { 1590 {
1768 if (dump_enabled_p ()) 1591 if (dump_enabled_p ())
1769 dump_printf_loc (MSG_NOTE, vect_location, 1592 dump_printf_loc (MSG_NOTE, vect_location,
1770 "All the computation can be taken out of the loop.\n"); 1593 "All the computation can be taken out of the loop.\n");
1594 return opt_result::failure_at
1595 (vect_location,
1596 "not vectorized: redundant loop. no profit to vectorize.\n");
1597 }
1598
1599 return opt_result::success ();
1600 }
1601
1602 /* Analyze the cost of the loop described by LOOP_VINFO. Decide if it
1603 is worthwhile to vectorize. Return 1 if definitely yes, 0 if
1604 definitely no, or -1 if it's worth retrying. */
1605
1606 static int
1607 vect_analyze_loop_costing (loop_vec_info loop_vinfo)
1608 {
1609 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
1610 unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
1611
1612 /* Only fully-masked loops can have iteration counts less than the
1613 vectorization factor. */
1614 if (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
1615 {
1616 HOST_WIDE_INT max_niter;
1617
1618 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
1619 max_niter = LOOP_VINFO_INT_NITERS (loop_vinfo);
1620 else
1621 max_niter = max_stmt_executions_int (loop);
1622
1623 if (max_niter != -1
1624 && (unsigned HOST_WIDE_INT) max_niter < assumed_vf)
1625 {
1626 if (dump_enabled_p ())
1627 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1628 "not vectorized: iteration count smaller than "
1629 "vectorization factor.\n");
1630 return 0;
1631 }
1632 }
1633
1634 int min_profitable_iters, min_profitable_estimate;
1635 vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
1636 &min_profitable_estimate);
1637
1638 if (min_profitable_iters < 0)
1639 {
1771 if (dump_enabled_p ()) 1640 if (dump_enabled_p ())
1772 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1641 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1773 "not vectorized: redundant loop. no profit to " 1642 "not vectorized: vectorization not profitable.\n");
1774 "vectorize.\n");
1775 return false;
1776 }
1777
1778 return true;
1779 }
1780
1781
1782 /* Function vect_analyze_loop_2.
1783
1784 Apply a set of analyses on LOOP, and create a loop_vec_info struct
1785 for it. The different analyses will record information in the
1786 loop_vec_info struct. */
1787 static bool
1788 vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal)
1789 {
1790 bool ok;
1791 int max_vf = MAX_VECTORIZATION_FACTOR;
1792 int min_vf = 2;
1793 unsigned int n_stmts = 0;
1794
1795 /* The first group of checks is independent of the vector size. */
1796 fatal = true;
1797
1798 /* Find all data references in the loop (which correspond to vdefs/vuses)
1799 and analyze their evolution in the loop. */
1800
1801 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
1802
1803 loop_p loop = LOOP_VINFO_LOOP (loop_vinfo);
1804 if (!find_loop_nest (loop, &LOOP_VINFO_LOOP_NEST (loop_vinfo)))
1805 {
1806 if (dump_enabled_p ()) 1643 if (dump_enabled_p ())
1807 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1644 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1808 "not vectorized: loop nest containing two " 1645 "not vectorized: vector version will never be "
1809 "or more consecutive inner loops cannot be " 1646 "profitable.\n");
1810 "vectorized\n"); 1647 return -1;
1811 return false; 1648 }
1812 } 1649
1813 1650 int min_scalar_loop_bound = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND)
1651 * assumed_vf);
1652
1653 /* Use the cost model only if it is more conservative than user specified
1654 threshold. */
1655 unsigned int th = (unsigned) MAX (min_scalar_loop_bound,
1656 min_profitable_iters);
1657
1658 LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = th;
1659
1660 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1661 && LOOP_VINFO_INT_NITERS (loop_vinfo) < th)
1662 {
1663 if (dump_enabled_p ())
1664 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1665 "not vectorized: vectorization not profitable.\n");
1666 if (dump_enabled_p ())
1667 dump_printf_loc (MSG_NOTE, vect_location,
1668 "not vectorized: iteration count smaller than user "
1669 "specified loop bound parameter or minimum profitable "
1670 "iterations (whichever is more conservative).\n");
1671 return 0;
1672 }
1673
1674 HOST_WIDE_INT estimated_niter = estimated_stmt_executions_int (loop);
1675 if (estimated_niter == -1)
1676 estimated_niter = likely_max_stmt_executions_int (loop);
1677 if (estimated_niter != -1
1678 && ((unsigned HOST_WIDE_INT) estimated_niter
1679 < MAX (th, (unsigned) min_profitable_estimate)))
1680 {
1681 if (dump_enabled_p ())
1682 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1683 "not vectorized: estimated iteration count too "
1684 "small.\n");
1685 if (dump_enabled_p ())
1686 dump_printf_loc (MSG_NOTE, vect_location,
1687 "not vectorized: estimated iteration count smaller "
1688 "than specified loop bound parameter or minimum "
1689 "profitable iterations (whichever is more "
1690 "conservative).\n");
1691 return -1;
1692 }
1693
1694 return 1;
1695 }
1696
1697 static opt_result
1698 vect_get_datarefs_in_loop (loop_p loop, basic_block *bbs,
1699 vec<data_reference_p> *datarefs,
1700 unsigned int *n_stmts)
1701 {
1702 *n_stmts = 0;
1814 for (unsigned i = 0; i < loop->num_nodes; i++) 1703 for (unsigned i = 0; i < loop->num_nodes; i++)
1815 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); 1704 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
1816 !gsi_end_p (gsi); gsi_next (&gsi)) 1705 !gsi_end_p (gsi); gsi_next (&gsi))
1817 { 1706 {
1818 gimple *stmt = gsi_stmt (gsi); 1707 gimple *stmt = gsi_stmt (gsi);
1819 if (is_gimple_debug (stmt)) 1708 if (is_gimple_debug (stmt))
1820 continue; 1709 continue;
1821 ++n_stmts; 1710 ++(*n_stmts);
1822 if (!find_data_references_in_stmt (loop, stmt, 1711 opt_result res = vect_find_stmt_data_reference (loop, stmt, datarefs);
1823 &LOOP_VINFO_DATAREFS (loop_vinfo))) 1712 if (!res)
1824 { 1713 {
1825 if (is_gimple_call (stmt) && loop->safelen) 1714 if (is_gimple_call (stmt) && loop->safelen)
1826 { 1715 {
1827 tree fndecl = gimple_call_fndecl (stmt), op; 1716 tree fndecl = gimple_call_fndecl (stmt), op;
1828 if (fndecl != NULL_TREE) 1717 if (fndecl != NULL_TREE)
1850 && get_base_address (op))))) 1739 && get_base_address (op)))))
1851 continue; 1740 continue;
1852 } 1741 }
1853 } 1742 }
1854 } 1743 }
1855 if (dump_enabled_p ()) 1744 return res;
1856 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1857 "not vectorized: loop contains function "
1858 "calls or data references that cannot "
1859 "be analyzed\n");
1860 return false;
1861 } 1745 }
1746 /* If dependence analysis will give up due to the limit on the
1747 number of datarefs stop here and fail fatally. */
1748 if (datarefs->length ()
1749 > (unsigned)PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
1750 return opt_result::failure_at (stmt, "exceeded param "
1751 "loop-max-datarefs-for-datadeps\n");
1862 } 1752 }
1753 return opt_result::success ();
1754 }
1755
1756 /* Function vect_analyze_loop_2.
1757
1758 Apply a set of analyses on LOOP, and create a loop_vec_info struct
1759 for it. The different analyses will record information in the
1760 loop_vec_info struct. */
1761 static opt_result
1762 vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts)
1763 {
1764 opt_result ok = opt_result::success ();
1765 int res;
1766 unsigned int max_vf = MAX_VECTORIZATION_FACTOR;
1767 poly_uint64 min_vf = 2;
1768
1769 /* The first group of checks is independent of the vector size. */
1770 fatal = true;
1771
1772 /* Find all data references in the loop (which correspond to vdefs/vuses)
1773 and analyze their evolution in the loop. */
1774
1775 loop_p loop = LOOP_VINFO_LOOP (loop_vinfo);
1776
1777 /* Gather the data references and count stmts in the loop. */
1778 if (!LOOP_VINFO_DATAREFS (loop_vinfo).exists ())
1779 {
1780 opt_result res
1781 = vect_get_datarefs_in_loop (loop, LOOP_VINFO_BBS (loop_vinfo),
1782 &LOOP_VINFO_DATAREFS (loop_vinfo),
1783 n_stmts);
1784 if (!res)
1785 {
1786 if (dump_enabled_p ())
1787 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1788 "not vectorized: loop contains function "
1789 "calls or data references that cannot "
1790 "be analyzed\n");
1791 return res;
1792 }
1793 loop_vinfo->shared->save_datarefs ();
1794 }
1795 else
1796 loop_vinfo->shared->check_datarefs ();
1863 1797
1864 /* Analyze the data references and also adjust the minimal 1798 /* Analyze the data references and also adjust the minimal
1865 vectorization factor according to the loads and stores. */ 1799 vectorization factor according to the loads and stores. */
1866 1800
1867 ok = vect_analyze_data_refs (loop_vinfo, &min_vf); 1801 ok = vect_analyze_data_refs (loop_vinfo, &min_vf);
1868 if (!ok) 1802 if (!ok)
1869 { 1803 {
1870 if (dump_enabled_p ()) 1804 if (dump_enabled_p ())
1871 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1805 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1872 "bad data references.\n"); 1806 "bad data references.\n");
1873 return false; 1807 return ok;
1874 } 1808 }
1875 1809
1876 /* Classify all cross-iteration scalar data-flow cycles. 1810 /* Classify all cross-iteration scalar data-flow cycles.
1877 Cross-iteration cycles caused by virtual phis are analyzed separately. */ 1811 Cross-iteration cycles caused by virtual phis are analyzed separately. */
1878 vect_analyze_scalar_cycles (loop_vinfo); 1812 vect_analyze_scalar_cycles (loop_vinfo);
1888 if (!ok) 1822 if (!ok)
1889 { 1823 {
1890 if (dump_enabled_p ()) 1824 if (dump_enabled_p ())
1891 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1825 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1892 "bad data access.\n"); 1826 "bad data access.\n");
1893 return false; 1827 return ok;
1894 } 1828 }
1895 1829
1896 /* Data-flow analysis to detect stmts that do not need to be vectorized. */ 1830 /* Data-flow analysis to detect stmts that do not need to be vectorized. */
1897 1831
1898 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo); 1832 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo);
1899 if (!ok) 1833 if (!ok)
1900 { 1834 {
1901 if (dump_enabled_p ()) 1835 if (dump_enabled_p ())
1902 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1836 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1903 "unexpected pattern.\n"); 1837 "unexpected pattern.\n");
1904 return false; 1838 return ok;
1905 } 1839 }
1906 1840
1907 /* While the rest of the analysis below depends on it in some way. */ 1841 /* While the rest of the analysis below depends on it in some way. */
1908 fatal = false; 1842 fatal = false;
1909 1843
1911 and adjust the maximum vectorization factor according to 1845 and adjust the maximum vectorization factor according to
1912 the dependences. 1846 the dependences.
1913 FORNOW: fail at the first data dependence that we encounter. */ 1847 FORNOW: fail at the first data dependence that we encounter. */
1914 1848
1915 ok = vect_analyze_data_ref_dependences (loop_vinfo, &max_vf); 1849 ok = vect_analyze_data_ref_dependences (loop_vinfo, &max_vf);
1916 if (!ok 1850 if (!ok)
1917 || max_vf < min_vf)
1918 { 1851 {
1919 if (dump_enabled_p ()) 1852 if (dump_enabled_p ())
1920 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1853 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1921 "bad data dependence.\n"); 1854 "bad data dependence.\n");
1922 return false; 1855 return ok;
1923 } 1856 }
1857 if (max_vf != MAX_VECTORIZATION_FACTOR
1858 && maybe_lt (max_vf, min_vf))
1859 return opt_result::failure_at (vect_location, "bad data dependence.\n");
1924 LOOP_VINFO_MAX_VECT_FACTOR (loop_vinfo) = max_vf; 1860 LOOP_VINFO_MAX_VECT_FACTOR (loop_vinfo) = max_vf;
1925 1861
1926 ok = vect_determine_vectorization_factor (loop_vinfo); 1862 ok = vect_determine_vectorization_factor (loop_vinfo);
1927 if (!ok) 1863 if (!ok)
1928 { 1864 {
1929 if (dump_enabled_p ()) 1865 if (dump_enabled_p ())
1930 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1866 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1931 "can't determine vectorization factor.\n"); 1867 "can't determine vectorization factor.\n");
1932 return false; 1868 return ok;
1933 } 1869 }
1934 if (max_vf < LOOP_VINFO_VECT_FACTOR (loop_vinfo)) 1870 if (max_vf != MAX_VECTORIZATION_FACTOR
1935 { 1871 && maybe_lt (max_vf, LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
1936 if (dump_enabled_p ()) 1872 return opt_result::failure_at (vect_location, "bad data dependence.\n");
1937 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1938 "bad data dependence.\n");
1939 return false;
1940 }
1941 1873
1942 /* Compute the scalar iteration cost. */ 1874 /* Compute the scalar iteration cost. */
1943 vect_compute_single_scalar_iteration_cost (loop_vinfo); 1875 vect_compute_single_scalar_iteration_cost (loop_vinfo);
1944 1876
1945 int saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 1877 poly_uint64 saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1946 HOST_WIDE_INT estimated_niter;
1947 unsigned th; 1878 unsigned th;
1948 int min_scalar_loop_bound;
1949 1879
1950 /* Check the SLP opportunities in the loop, analyze and build SLP trees. */ 1880 /* Check the SLP opportunities in the loop, analyze and build SLP trees. */
1951 ok = vect_analyze_slp (loop_vinfo, n_stmts); 1881 ok = vect_analyze_slp (loop_vinfo, *n_stmts);
1952 if (!ok) 1882 if (!ok)
1953 return false; 1883 return ok;
1954 1884
1955 /* If there are any SLP instances mark them as pure_slp. */ 1885 /* If there are any SLP instances mark them as pure_slp. */
1956 bool slp = vect_make_slp_decision (loop_vinfo); 1886 bool slp = vect_make_slp_decision (loop_vinfo);
1957 if (slp) 1887 if (slp)
1958 { 1888 {
1961 1891
1962 /* Update the vectorization factor based on the SLP decision. */ 1892 /* Update the vectorization factor based on the SLP decision. */
1963 vect_update_vf_for_slp (loop_vinfo); 1893 vect_update_vf_for_slp (loop_vinfo);
1964 } 1894 }
1965 1895
1896 bool saved_can_fully_mask_p = LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo);
1897
1898 /* We don't expect to have to roll back to anything other than an empty
1899 set of rgroups. */
1900 gcc_assert (LOOP_VINFO_MASKS (loop_vinfo).is_empty ());
1901
1966 /* This is the point where we can re-start analysis with SLP forced off. */ 1902 /* This is the point where we can re-start analysis with SLP forced off. */
1967 start_over: 1903 start_over:
1968 1904
1969 /* Now the vectorization factor is final. */ 1905 /* Now the vectorization factor is final. */
1970 unsigned vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 1906 poly_uint64 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
1971 gcc_assert (vectorization_factor != 0); 1907 gcc_assert (known_ne (vectorization_factor, 0U));
1972 1908
1973 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && dump_enabled_p ()) 1909 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && dump_enabled_p ())
1974 dump_printf_loc (MSG_NOTE, vect_location, 1910 {
1975 "vectorization_factor = %d, niters = " 1911 dump_printf_loc (MSG_NOTE, vect_location,
1976 HOST_WIDE_INT_PRINT_DEC "\n", vectorization_factor, 1912 "vectorization_factor = ");
1977 LOOP_VINFO_INT_NITERS (loop_vinfo)); 1913 dump_dec (MSG_NOTE, vectorization_factor);
1914 dump_printf (MSG_NOTE, ", niters = %wd\n",
1915 LOOP_VINFO_INT_NITERS (loop_vinfo));
1916 }
1978 1917
1979 HOST_WIDE_INT max_niter 1918 HOST_WIDE_INT max_niter
1980 = likely_max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo)); 1919 = likely_max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
1981 if ((LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
1982 && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor))
1983 || (max_niter != -1
1984 && (unsigned HOST_WIDE_INT) max_niter < vectorization_factor))
1985 {
1986 if (dump_enabled_p ())
1987 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
1988 "not vectorized: iteration count smaller than "
1989 "vectorization factor.\n");
1990 return false;
1991 }
1992 1920
1993 /* Analyze the alignment of the data-refs in the loop. 1921 /* Analyze the alignment of the data-refs in the loop.
1994 Fail if a data reference is found that cannot be vectorized. */ 1922 Fail if a data reference is found that cannot be vectorized. */
1995 1923
1996 ok = vect_analyze_data_refs_alignment (loop_vinfo); 1924 ok = vect_analyze_data_refs_alignment (loop_vinfo);
1997 if (!ok) 1925 if (!ok)
1998 { 1926 {
1999 if (dump_enabled_p ()) 1927 if (dump_enabled_p ())
2000 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1928 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2001 "bad data alignment.\n"); 1929 "bad data alignment.\n");
2002 return false; 1930 return ok;
2003 } 1931 }
2004 1932
2005 /* Prune the list of ddrs to be tested at run-time by versioning for alias. 1933 /* Prune the list of ddrs to be tested at run-time by versioning for alias.
2006 It is important to call pruning after vect_analyze_data_ref_accesses, 1934 It is important to call pruning after vect_analyze_data_ref_accesses,
2007 since we use grouping information gathered by interleaving analysis. */ 1935 since we use grouping information gathered by interleaving analysis. */
2008 ok = vect_prune_runtime_alias_test_list (loop_vinfo); 1936 ok = vect_prune_runtime_alias_test_list (loop_vinfo);
2009 if (!ok) 1937 if (!ok)
2010 return false; 1938 return ok;
2011 1939
2012 /* Do not invoke vect_enhance_data_refs_alignment for eplilogue 1940 /* Do not invoke vect_enhance_data_refs_alignment for epilogue
2013 vectorization. */ 1941 vectorization, since we do not want to add extra peeling or
1942 add versioning for alignment. */
2014 if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo)) 1943 if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo))
2015 {
2016 /* This pass will decide on using loop versioning and/or loop peeling in 1944 /* This pass will decide on using loop versioning and/or loop peeling in
2017 order to enhance the alignment of data references in the loop. */ 1945 order to enhance the alignment of data references in the loop. */
2018 ok = vect_enhance_data_refs_alignment (loop_vinfo); 1946 ok = vect_enhance_data_refs_alignment (loop_vinfo);
2019 if (!ok) 1947 else
2020 { 1948 ok = vect_verify_datarefs_alignment (loop_vinfo);
2021 if (dump_enabled_p ()) 1949 if (!ok)
2022 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1950 return ok;
2023 "bad data alignment.\n");
2024 return false;
2025 }
2026 }
2027 1951
2028 if (slp) 1952 if (slp)
2029 { 1953 {
2030 /* Analyze operations in the SLP instances. Note this may 1954 /* Analyze operations in the SLP instances. Note this may
2031 remove unsupported SLP instances which makes the above 1955 remove unsupported SLP instances which makes the above
2032 SLP kind detection invalid. */ 1956 SLP kind detection invalid. */
2033 unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length (); 1957 unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length ();
2034 vect_slp_analyze_operations (loop_vinfo); 1958 vect_slp_analyze_operations (loop_vinfo);
2035 if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length () != old_size) 1959 if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length () != old_size)
2036 goto again; 1960 {
1961 ok = opt_result::failure_at (vect_location,
1962 "unsupported SLP instances\n");
1963 goto again;
1964 }
2037 } 1965 }
2038 1966
2039 /* Scan all the remaining operations in the loop that are not subject 1967 /* Scan all the remaining operations in the loop that are not subject
2040 to SLP and make sure they are vectorizable. */ 1968 to SLP and make sure they are vectorizable. */
2041 ok = vect_analyze_loop_operations (loop_vinfo); 1969 ok = vect_analyze_loop_operations (loop_vinfo);
2042 if (!ok) 1970 if (!ok)
2043 { 1971 {
2044 if (dump_enabled_p ()) 1972 if (dump_enabled_p ())
2045 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 1973 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2046 "bad operation or unsupported loop bound.\n"); 1974 "bad operation or unsupported loop bound.\n");
2047 return false; 1975 return ok;
1976 }
1977
1978 /* Decide whether to use a fully-masked loop for this vectorization
1979 factor. */
1980 LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
1981 = (LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)
1982 && vect_verify_full_masking (loop_vinfo));
1983 if (dump_enabled_p ())
1984 {
1985 if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
1986 dump_printf_loc (MSG_NOTE, vect_location,
1987 "using a fully-masked loop.\n");
1988 else
1989 dump_printf_loc (MSG_NOTE, vect_location,
1990 "not using a fully-masked loop.\n");
2048 } 1991 }
2049 1992
2050 /* If epilog loop is required because of data accesses with gaps, 1993 /* If epilog loop is required because of data accesses with gaps,
2051 one additional iteration needs to be peeled. Check if there is 1994 one additional iteration needs to be peeled. Check if there is
2052 enough iterations for vectorization. */ 1995 enough iterations for vectorization. */
2053 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) 1996 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
2054 && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) 1997 && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2055 { 1998 && !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
2056 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 1999 {
2000 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2057 tree scalar_niters = LOOP_VINFO_NITERSM1 (loop_vinfo); 2001 tree scalar_niters = LOOP_VINFO_NITERSM1 (loop_vinfo);
2058 2002
2059 if (wi::to_widest (scalar_niters) < vf) 2003 if (known_lt (wi::to_widest (scalar_niters), vf))
2060 { 2004 return opt_result::failure_at (vect_location,
2061 if (dump_enabled_p ()) 2005 "loop has no enough iterations to"
2062 dump_printf_loc (MSG_NOTE, vect_location, 2006 " support peeling for gaps.\n");
2063 "loop has no enough iterations to support" 2007 }
2064 " peeling for gaps.\n"); 2008
2065 return false; 2009 /* Check the costings of the loop make vectorizing worthwhile. */
2066 } 2010 res = vect_analyze_loop_costing (loop_vinfo);
2067 } 2011 if (res < 0)
2068 2012 {
2069 /* Analyze cost. Decide if worth while to vectorize. */ 2013 ok = opt_result::failure_at (vect_location,
2070 int min_profitable_estimate, min_profitable_iters; 2014 "Loop costings may not be worthwhile.\n");
2071 vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters,
2072 &min_profitable_estimate);
2073
2074 if (min_profitable_iters < 0)
2075 {
2076 if (dump_enabled_p ())
2077 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2078 "not vectorized: vectorization not profitable.\n");
2079 if (dump_enabled_p ())
2080 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2081 "not vectorized: vector version will never be "
2082 "profitable.\n");
2083 goto again; 2015 goto again;
2084 } 2016 }
2085 2017 if (!res)
2086 min_scalar_loop_bound = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND) 2018 return opt_result::failure_at (vect_location,
2087 * vectorization_factor); 2019 "Loop costings not worthwhile.\n");
2088
2089 /* Use the cost model only if it is more conservative than user specified
2090 threshold. */
2091 th = (unsigned) MAX (min_scalar_loop_bound, min_profitable_iters);
2092
2093 LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = th;
2094
2095 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2096 && LOOP_VINFO_INT_NITERS (loop_vinfo) < th)
2097 {
2098 if (dump_enabled_p ())
2099 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2100 "not vectorized: vectorization not profitable.\n");
2101 if (dump_enabled_p ())
2102 dump_printf_loc (MSG_NOTE, vect_location,
2103 "not vectorized: iteration count smaller than user "
2104 "specified loop bound parameter or minimum profitable "
2105 "iterations (whichever is more conservative).\n");
2106 goto again;
2107 }
2108
2109 estimated_niter
2110 = estimated_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo));
2111 if (estimated_niter == -1)
2112 estimated_niter = max_niter;
2113 if (estimated_niter != -1
2114 && ((unsigned HOST_WIDE_INT) estimated_niter
2115 < MAX (th, (unsigned) min_profitable_estimate)))
2116 {
2117 if (dump_enabled_p ())
2118 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2119 "not vectorized: estimated iteration count too "
2120 "small.\n");
2121 if (dump_enabled_p ())
2122 dump_printf_loc (MSG_NOTE, vect_location,
2123 "not vectorized: estimated iteration count smaller "
2124 "than specified loop bound parameter or minimum "
2125 "profitable iterations (whichever is more "
2126 "conservative).\n");
2127 goto again;
2128 }
2129 2020
2130 /* Decide whether we need to create an epilogue loop to handle 2021 /* Decide whether we need to create an epilogue loop to handle
2131 remaining scalar iterations. */ 2022 remaining scalar iterations. */
2132 th = ((LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) 2023 th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
2133 / LOOP_VINFO_VECT_FACTOR (loop_vinfo)) 2024
2134 * LOOP_VINFO_VECT_FACTOR (loop_vinfo)); 2025 unsigned HOST_WIDE_INT const_vf;
2135 2026 if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
2136 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) 2027 /* The main loop handles all iterations. */
2137 && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0) 2028 LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
2138 { 2029 else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
2139 if (ctz_hwi (LOOP_VINFO_INT_NITERS (loop_vinfo) 2030 && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
2140 - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)) 2031 {
2141 < exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))) 2032 /* Work out the (constant) number of iterations that need to be
2033 peeled for reasons other than niters. */
2034 unsigned int peel_niter = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
2035 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
2036 peel_niter += 1;
2037 if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter,
2038 LOOP_VINFO_VECT_FACTOR (loop_vinfo)))
2142 LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true; 2039 LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
2143 } 2040 }
2144 else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) 2041 else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)
2145 || (tree_ctz (LOOP_VINFO_NITERS (loop_vinfo)) 2042 /* ??? When peeling for gaps but not alignment, we could
2146 < (unsigned)exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo)) 2043 try to check whether the (variable) niters is known to be
2147 /* In case of versioning, check if the maximum number of 2044 VF * N + 1. That's something of a niche case though. */
2148 iterations is greater than th. If they are identical, 2045 || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
2149 the epilogue is unnecessary. */ 2046 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&const_vf)
2047 || ((tree_ctz (LOOP_VINFO_NITERS (loop_vinfo))
2048 < (unsigned) exact_log2 (const_vf))
2049 /* In case of versioning, check if the maximum number of
2050 iterations is greater than th. If they are identical,
2051 the epilogue is unnecessary. */
2150 && (!LOOP_REQUIRES_VERSIONING (loop_vinfo) 2052 && (!LOOP_REQUIRES_VERSIONING (loop_vinfo)
2151 || (unsigned HOST_WIDE_INT) max_niter > th))) 2053 || ((unsigned HOST_WIDE_INT) max_niter
2054 > (th / const_vf) * const_vf))))
2152 LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true; 2055 LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true;
2153 2056
2154 /* If an epilogue loop is required make sure we can create one. */ 2057 /* If an epilogue loop is required make sure we can create one. */
2155 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) 2058 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)
2156 || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)) 2059 || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
2160 if (!vect_can_advance_ivs_p (loop_vinfo) 2063 if (!vect_can_advance_ivs_p (loop_vinfo)
2161 || !slpeel_can_duplicate_loop_p (LOOP_VINFO_LOOP (loop_vinfo), 2064 || !slpeel_can_duplicate_loop_p (LOOP_VINFO_LOOP (loop_vinfo),
2162 single_exit (LOOP_VINFO_LOOP 2065 single_exit (LOOP_VINFO_LOOP
2163 (loop_vinfo)))) 2066 (loop_vinfo))))
2164 { 2067 {
2165 if (dump_enabled_p ()) 2068 ok = opt_result::failure_at (vect_location,
2166 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2069 "not vectorized: can't create required "
2167 "not vectorized: can't create required " 2070 "epilog loop\n");
2168 "epilog loop\n");
2169 goto again; 2071 goto again;
2170 } 2072 }
2171 } 2073 }
2172 2074
2173 /* During peeling, we need to check if number of loop iterations is 2075 /* During peeling, we need to check if number of loop iterations is
2174 enough for both peeled prolog loop and vector loop. This check 2076 enough for both peeled prolog loop and vector loop. This check
2175 can be merged along with threshold check of loop versioning, so 2077 can be merged along with threshold check of loop versioning, so
2176 increase threshold for this case if necessary. */ 2078 increase threshold for this case if necessary. */
2177 if (LOOP_REQUIRES_VERSIONING (loop_vinfo) 2079 if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
2178 && (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) 2080 {
2179 || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))) 2081 poly_uint64 niters_th = 0;
2180 { 2082
2181 unsigned niters_th; 2083 if (!vect_use_loop_mask_for_alignment_p (loop_vinfo))
2182 2084 {
2183 /* Niters for peeled prolog loop. */ 2085 /* Niters for peeled prolog loop. */
2184 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0) 2086 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0)
2185 { 2087 {
2186 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo); 2088 dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
2187 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))); 2089 tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt);
2188 2090 niters_th += TYPE_VECTOR_SUBPARTS (vectype) - 1;
2189 niters_th = TYPE_VECTOR_SUBPARTS (vectype) - 1; 2091 }
2190 } 2092 else
2191 else 2093 niters_th += LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
2192 niters_th = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo); 2094 }
2193 2095
2194 /* Niters for at least one iteration of vectorized loop. */ 2096 /* Niters for at least one iteration of vectorized loop. */
2195 niters_th += LOOP_VINFO_VECT_FACTOR (loop_vinfo); 2097 if (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
2098 niters_th += LOOP_VINFO_VECT_FACTOR (loop_vinfo);
2196 /* One additional iteration because of peeling for gap. */ 2099 /* One additional iteration because of peeling for gap. */
2197 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) 2100 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
2198 niters_th++; 2101 niters_th += 1;
2199 if (LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) < niters_th) 2102 LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo) = niters_th;
2200 LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = niters_th; 2103 }
2201 } 2104
2202 2105 gcc_assert (known_eq (vectorization_factor,
2203 gcc_assert (vectorization_factor 2106 LOOP_VINFO_VECT_FACTOR (loop_vinfo)));
2204 == (unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo));
2205 2107
2206 /* Ok to vectorize! */ 2108 /* Ok to vectorize! */
2207 return true; 2109 return opt_result::success ();
2208 2110
2209 again: 2111 again:
2112 /* Ensure that "ok" is false (with an opt_problem if dumping is enabled). */
2113 gcc_assert (!ok);
2114
2210 /* Try again with SLP forced off but if we didn't do any SLP there is 2115 /* Try again with SLP forced off but if we didn't do any SLP there is
2211 no point in re-trying. */ 2116 no point in re-trying. */
2212 if (!slp) 2117 if (!slp)
2213 return false; 2118 return ok;
2214 2119
2215 /* If there are reduction chains re-trying will fail anyway. */ 2120 /* If there are reduction chains re-trying will fail anyway. */
2216 if (! LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).is_empty ()) 2121 if (! LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).is_empty ())
2217 return false; 2122 return ok;
2218 2123
2219 /* Likewise if the grouped loads or stores in the SLP cannot be handled 2124 /* Likewise if the grouped loads or stores in the SLP cannot be handled
2220 via interleaving or lane instructions. */ 2125 via interleaving or lane instructions. */
2221 slp_instance instance; 2126 slp_instance instance;
2222 slp_tree node; 2127 slp_tree node;
2223 unsigned i, j; 2128 unsigned i, j;
2224 FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance) 2129 FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance)
2225 { 2130 {
2226 stmt_vec_info vinfo; 2131 stmt_vec_info vinfo;
2227 vinfo = vinfo_for_stmt 2132 vinfo = SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance))[0];
2228 (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance))[0]);
2229 if (! STMT_VINFO_GROUPED_ACCESS (vinfo)) 2133 if (! STMT_VINFO_GROUPED_ACCESS (vinfo))
2230 continue; 2134 continue;
2231 vinfo = vinfo_for_stmt (STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo)); 2135 vinfo = DR_GROUP_FIRST_ELEMENT (vinfo);
2232 unsigned int size = STMT_VINFO_GROUP_SIZE (vinfo); 2136 unsigned int size = DR_GROUP_SIZE (vinfo);
2233 tree vectype = STMT_VINFO_VECTYPE (vinfo); 2137 tree vectype = STMT_VINFO_VECTYPE (vinfo);
2234 if (! vect_store_lanes_supported (vectype, size) 2138 if (! vect_store_lanes_supported (vectype, size, false)
2235 && ! vect_grouped_store_supported (vectype, size)) 2139 && ! known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U)
2236 return false; 2140 && ! vect_grouped_store_supported (vectype, size))
2141 return opt_result::failure_at (vinfo->stmt,
2142 "unsupported grouped store\n");
2237 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), j, node) 2143 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), j, node)
2238 { 2144 {
2239 vinfo = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]); 2145 vinfo = SLP_TREE_SCALAR_STMTS (node)[0];
2240 vinfo = vinfo_for_stmt (STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo)); 2146 vinfo = DR_GROUP_FIRST_ELEMENT (vinfo);
2241 bool single_element_p = !STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo); 2147 bool single_element_p = !DR_GROUP_NEXT_ELEMENT (vinfo);
2242 size = STMT_VINFO_GROUP_SIZE (vinfo); 2148 size = DR_GROUP_SIZE (vinfo);
2243 vectype = STMT_VINFO_VECTYPE (vinfo); 2149 vectype = STMT_VINFO_VECTYPE (vinfo);
2244 if (! vect_load_lanes_supported (vectype, size) 2150 if (! vect_load_lanes_supported (vectype, size, false)
2245 && ! vect_grouped_load_supported (vectype, single_element_p, 2151 && ! vect_grouped_load_supported (vectype, single_element_p,
2246 size)) 2152 size))
2247 return false; 2153 return opt_result::failure_at (vinfo->stmt,
2154 "unsupported grouped load\n");
2248 } 2155 }
2249 } 2156 }
2250 2157
2251 if (dump_enabled_p ()) 2158 if (dump_enabled_p ())
2252 dump_printf_loc (MSG_NOTE, vect_location, 2159 dump_printf_loc (MSG_NOTE, vect_location,
2256 slp = false; 2163 slp = false;
2257 /* Restore vectorization factor as it were without SLP. */ 2164 /* Restore vectorization factor as it were without SLP. */
2258 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = saved_vectorization_factor; 2165 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = saved_vectorization_factor;
2259 /* Free the SLP instances. */ 2166 /* Free the SLP instances. */
2260 FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), j, instance) 2167 FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), j, instance)
2261 vect_free_slp_instance (instance); 2168 vect_free_slp_instance (instance, false);
2262 LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release (); 2169 LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
2263 /* Reset SLP type to loop_vect on all stmts. */ 2170 /* Reset SLP type to loop_vect on all stmts. */
2264 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i) 2171 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i)
2265 { 2172 {
2266 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i]; 2173 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i];
2267 for (gimple_stmt_iterator si = gsi_start_phis (bb); 2174 for (gimple_stmt_iterator si = gsi_start_phis (bb);
2268 !gsi_end_p (si); gsi_next (&si)) 2175 !gsi_end_p (si); gsi_next (&si))
2269 { 2176 {
2270 stmt_vec_info stmt_info = vinfo_for_stmt (gsi_stmt (si)); 2177 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
2271 STMT_SLP_TYPE (stmt_info) = loop_vect; 2178 STMT_SLP_TYPE (stmt_info) = loop_vect;
2272 } 2179 }
2273 for (gimple_stmt_iterator si = gsi_start_bb (bb); 2180 for (gimple_stmt_iterator si = gsi_start_bb (bb);
2274 !gsi_end_p (si); gsi_next (&si)) 2181 !gsi_end_p (si); gsi_next (&si))
2275 { 2182 {
2276 stmt_vec_info stmt_info = vinfo_for_stmt (gsi_stmt (si)); 2183 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si));
2277 STMT_SLP_TYPE (stmt_info) = loop_vect; 2184 STMT_SLP_TYPE (stmt_info) = loop_vect;
2278 if (STMT_VINFO_IN_PATTERN_P (stmt_info)) 2185 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
2279 { 2186 {
2280 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)); 2187 gimple *pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
2188 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
2281 STMT_SLP_TYPE (stmt_info) = loop_vect; 2189 STMT_SLP_TYPE (stmt_info) = loop_vect;
2282 for (gimple_stmt_iterator pi 2190 for (gimple_stmt_iterator pi = gsi_start (pattern_def_seq);
2283 = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info));
2284 !gsi_end_p (pi); gsi_next (&pi)) 2191 !gsi_end_p (pi); gsi_next (&pi))
2285 { 2192 STMT_SLP_TYPE (loop_vinfo->lookup_stmt (gsi_stmt (pi)))
2286 gimple *pstmt = gsi_stmt (pi); 2193 = loop_vect;
2287 STMT_SLP_TYPE (vinfo_for_stmt (pstmt)) = loop_vect;
2288 }
2289 } 2194 }
2290 } 2195 }
2291 } 2196 }
2292 /* Free optimized alias test DDRS. */ 2197 /* Free optimized alias test DDRS. */
2198 LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).truncate (0);
2293 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).release (); 2199 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).release ();
2294 LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).release (); 2200 LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).release ();
2295 /* Reset target cost data. */ 2201 /* Reset target cost data. */
2296 destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)); 2202 destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo));
2297 LOOP_VINFO_TARGET_COST_DATA (loop_vinfo) 2203 LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)
2298 = init_cost (LOOP_VINFO_LOOP (loop_vinfo)); 2204 = init_cost (LOOP_VINFO_LOOP (loop_vinfo));
2205 /* Reset accumulated rgroup information. */
2206 release_vec_loop_masks (&LOOP_VINFO_MASKS (loop_vinfo));
2299 /* Reset assorted flags. */ 2207 /* Reset assorted flags. */
2300 LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false; 2208 LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false;
2301 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = false; 2209 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = false;
2302 LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = 0; 2210 LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = 0;
2211 LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo) = 0;
2212 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = saved_can_fully_mask_p;
2303 2213
2304 goto start_over; 2214 goto start_over;
2305 } 2215 }
2306 2216
2307 /* Function vect_analyze_loop. 2217 /* Function vect_analyze_loop.
2308 2218
2309 Apply a set of analyses on LOOP, and create a loop_vec_info struct 2219 Apply a set of analyses on LOOP, and create a loop_vec_info struct
2310 for it. The different analyses will record information in the 2220 for it. The different analyses will record information in the
2311 loop_vec_info struct. If ORIG_LOOP_VINFO is not NULL epilogue must 2221 loop_vec_info struct. If ORIG_LOOP_VINFO is not NULL epilogue must
2312 be vectorized. */ 2222 be vectorized. */
2313 loop_vec_info 2223 opt_loop_vec_info
2314 vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo) 2224 vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo,
2225 vec_info_shared *shared)
2315 { 2226 {
2316 loop_vec_info loop_vinfo; 2227 auto_vector_sizes vector_sizes;
2317 unsigned int vector_sizes;
2318 2228
2319 /* Autodetect first vector size we try. */ 2229 /* Autodetect first vector size we try. */
2320 current_vector_size = 0; 2230 current_vector_size = 0;
2321 vector_sizes = targetm.vectorize.autovectorize_vector_sizes (); 2231 targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
2322 2232 unsigned int next_size = 0;
2323 if (dump_enabled_p ()) 2233
2324 dump_printf_loc (MSG_NOTE, vect_location, 2234 DUMP_VECT_SCOPE ("analyze_loop_nest");
2325 "===== analyze_loop_nest =====\n");
2326 2235
2327 if (loop_outer (loop) 2236 if (loop_outer (loop)
2328 && loop_vec_info_for_loop (loop_outer (loop)) 2237 && loop_vec_info_for_loop (loop_outer (loop))
2329 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop)))) 2238 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop))))
2330 { 2239 return opt_loop_vec_info::failure_at (vect_location,
2331 if (dump_enabled_p ()) 2240 "outer-loop already vectorized.\n");
2332 dump_printf_loc (MSG_NOTE, vect_location, 2241
2333 "outer-loop already vectorized.\n"); 2242 if (!find_loop_nest (loop, &shared->loop_nest))
2334 return NULL; 2243 return opt_loop_vec_info::failure_at
2335 } 2244 (vect_location,
2336 2245 "not vectorized: loop nest containing two or more consecutive inner"
2246 " loops cannot be vectorized\n");
2247
2248 unsigned n_stmts = 0;
2249 poly_uint64 autodetected_vector_size = 0;
2337 while (1) 2250 while (1)
2338 { 2251 {
2339 /* Check the CFG characteristics of the loop (nesting, entry/exit). */ 2252 /* Check the CFG characteristics of the loop (nesting, entry/exit). */
2340 loop_vinfo = vect_analyze_loop_form (loop); 2253 opt_loop_vec_info loop_vinfo
2254 = vect_analyze_loop_form (loop, shared);
2341 if (!loop_vinfo) 2255 if (!loop_vinfo)
2342 { 2256 {
2343 if (dump_enabled_p ()) 2257 if (dump_enabled_p ())
2344 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2258 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2345 "bad loop form.\n"); 2259 "bad loop form.\n");
2346 return NULL; 2260 return loop_vinfo;
2347 } 2261 }
2348 2262
2349 bool fatal = false; 2263 bool fatal = false;
2350 2264
2351 if (orig_loop_vinfo) 2265 if (orig_loop_vinfo)
2352 LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = orig_loop_vinfo; 2266 LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = orig_loop_vinfo;
2353 2267
2354 if (vect_analyze_loop_2 (loop_vinfo, fatal)) 2268 opt_result res = vect_analyze_loop_2 (loop_vinfo, fatal, &n_stmts);
2269 if (res)
2355 { 2270 {
2356 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1; 2271 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1;
2357 2272
2358 return loop_vinfo; 2273 return loop_vinfo;
2359 } 2274 }
2360 2275
2361 delete loop_vinfo; 2276 delete loop_vinfo;
2362 2277
2363 vector_sizes &= ~current_vector_size; 2278 if (next_size == 0)
2279 autodetected_vector_size = current_vector_size;
2280
2281 if (next_size < vector_sizes.length ()
2282 && known_eq (vector_sizes[next_size], autodetected_vector_size))
2283 next_size += 1;
2284
2364 if (fatal 2285 if (fatal
2365 || vector_sizes == 0 2286 || next_size == vector_sizes.length ()
2366 || current_vector_size == 0) 2287 || known_eq (current_vector_size, 0U))
2367 return NULL; 2288 return opt_loop_vec_info::propagate_failure (res);
2368 2289
2369 /* Try the next biggest vector size. */ 2290 /* Try the next biggest vector size. */
2370 current_vector_size = 1 << floor_log2 (vector_sizes); 2291 current_vector_size = vector_sizes[next_size++];
2371 if (dump_enabled_p ()) 2292 if (dump_enabled_p ())
2372 dump_printf_loc (MSG_NOTE, vect_location, 2293 {
2373 "***** Re-trying analysis with " 2294 dump_printf_loc (MSG_NOTE, vect_location,
2374 "vector size %d\n", current_vector_size); 2295 "***** Re-trying analysis with "
2296 "vector size ");
2297 dump_dec (MSG_NOTE, current_vector_size);
2298 dump_printf (MSG_NOTE, "\n");
2299 }
2375 } 2300 }
2376 } 2301 }
2377 2302
2378 2303 /* Return true if there is an in-order reduction function for CODE, storing
2379 /* Function reduction_code_for_scalar_code 2304 it in *REDUC_FN if so. */
2305
2306 static bool
2307 fold_left_reduction_fn (tree_code code, internal_fn *reduc_fn)
2308 {
2309 switch (code)
2310 {
2311 case PLUS_EXPR:
2312 *reduc_fn = IFN_FOLD_LEFT_PLUS;
2313 return true;
2314
2315 default:
2316 return false;
2317 }
2318 }
2319
2320 /* Function reduction_fn_for_scalar_code
2380 2321
2381 Input: 2322 Input:
2382 CODE - tree_code of a reduction operations. 2323 CODE - tree_code of a reduction operations.
2383 2324
2384 Output: 2325 Output:
2385 REDUC_CODE - the corresponding tree-code to be used to reduce the 2326 REDUC_FN - the corresponding internal function to be used to reduce the
2386 vector of partial results into a single scalar result, or ERROR_MARK 2327 vector of partial results into a single scalar result, or IFN_LAST
2387 if the operation is a supported reduction operation, but does not have 2328 if the operation is a supported reduction operation, but does not have
2388 such a tree-code. 2329 such an internal function.
2389 2330
2390 Return FALSE if CODE currently cannot be vectorized as reduction. */ 2331 Return FALSE if CODE currently cannot be vectorized as reduction. */
2391 2332
2392 static bool 2333 static bool
2393 reduction_code_for_scalar_code (enum tree_code code, 2334 reduction_fn_for_scalar_code (enum tree_code code, internal_fn *reduc_fn)
2394 enum tree_code *reduc_code)
2395 { 2335 {
2396 switch (code) 2336 switch (code)
2397 { 2337 {
2398 case MAX_EXPR: 2338 case MAX_EXPR:
2399 *reduc_code = REDUC_MAX_EXPR; 2339 *reduc_fn = IFN_REDUC_MAX;
2400 return true; 2340 return true;
2401 2341
2402 case MIN_EXPR: 2342 case MIN_EXPR:
2403 *reduc_code = REDUC_MIN_EXPR; 2343 *reduc_fn = IFN_REDUC_MIN;
2404 return true; 2344 return true;
2405 2345
2406 case PLUS_EXPR: 2346 case PLUS_EXPR:
2407 *reduc_code = REDUC_PLUS_EXPR; 2347 *reduc_fn = IFN_REDUC_PLUS;
2408 return true; 2348 return true;
2349
2350 case BIT_AND_EXPR:
2351 *reduc_fn = IFN_REDUC_AND;
2352 return true;
2353
2354 case BIT_IOR_EXPR:
2355 *reduc_fn = IFN_REDUC_IOR;
2356 return true;
2357
2358 case BIT_XOR_EXPR:
2359 *reduc_fn = IFN_REDUC_XOR;
2360 return true;
2409 2361
2410 case MULT_EXPR: 2362 case MULT_EXPR:
2411 case MINUS_EXPR: 2363 case MINUS_EXPR:
2412 case BIT_IOR_EXPR: 2364 *reduc_fn = IFN_LAST;
2413 case BIT_XOR_EXPR:
2414 case BIT_AND_EXPR:
2415 *reduc_code = ERROR_MARK;
2416 return true; 2365 return true;
2417 2366
2418 default: 2367 default:
2419 return false; 2368 return false;
2420 } 2369 }
2421 } 2370 }
2422 2371
2372 /* If there is a neutral value X such that SLP reduction NODE would not
2373 be affected by the introduction of additional X elements, return that X,
2374 otherwise return null. CODE is the code of the reduction. REDUC_CHAIN
2375 is true if the SLP statements perform a single reduction, false if each
2376 statement performs an independent reduction. */
2377
2378 static tree
2379 neutral_op_for_slp_reduction (slp_tree slp_node, tree_code code,
2380 bool reduc_chain)
2381 {
2382 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
2383 stmt_vec_info stmt_vinfo = stmts[0];
2384 tree vector_type = STMT_VINFO_VECTYPE (stmt_vinfo);
2385 tree scalar_type = TREE_TYPE (vector_type);
2386 struct loop *loop = gimple_bb (stmt_vinfo->stmt)->loop_father;
2387 gcc_assert (loop);
2388
2389 switch (code)
2390 {
2391 case WIDEN_SUM_EXPR:
2392 case DOT_PROD_EXPR:
2393 case SAD_EXPR:
2394 case PLUS_EXPR:
2395 case MINUS_EXPR:
2396 case BIT_IOR_EXPR:
2397 case BIT_XOR_EXPR:
2398 return build_zero_cst (scalar_type);
2399
2400 case MULT_EXPR:
2401 return build_one_cst (scalar_type);
2402
2403 case BIT_AND_EXPR:
2404 return build_all_ones_cst (scalar_type);
2405
2406 case MAX_EXPR:
2407 case MIN_EXPR:
2408 /* For MIN/MAX the initial values are neutral. A reduction chain
2409 has only a single initial value, so that value is neutral for
2410 all statements. */
2411 if (reduc_chain)
2412 return PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt,
2413 loop_preheader_edge (loop));
2414 return NULL_TREE;
2415
2416 default:
2417 return NULL_TREE;
2418 }
2419 }
2423 2420
2424 /* Error reporting helper for vect_is_simple_reduction below. GIMPLE statement 2421 /* Error reporting helper for vect_is_simple_reduction below. GIMPLE statement
2425 STMT is printed with a message MSG. */ 2422 STMT is printed with a message MSG. */
2426 2423
2427 static void 2424 static void
2428 report_vect_op (dump_flags_t msg_type, gimple *stmt, const char *msg) 2425 report_vect_op (dump_flags_t msg_type, gimple *stmt, const char *msg)
2429 { 2426 {
2430 dump_printf_loc (msg_type, vect_location, "%s", msg); 2427 dump_printf_loc (msg_type, vect_location, "%s%G", msg, stmt);
2431 dump_gimple_stmt (msg_type, TDF_SLIM, stmt, 0);
2432 } 2428 }
2433 2429
2430 /* DEF_STMT_INFO occurs in a loop that contains a potential reduction
2431 operation. Return true if the results of DEF_STMT_INFO are something
2432 that can be accumulated by such a reduction. */
2433
2434 static bool
2435 vect_valid_reduction_input_p (stmt_vec_info def_stmt_info)
2436 {
2437 return (is_gimple_assign (def_stmt_info->stmt)
2438 || is_gimple_call (def_stmt_info->stmt)
2439 || STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_induction_def
2440 || (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI
2441 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def
2442 && !is_loop_header_bb_p (gimple_bb (def_stmt_info->stmt))));
2443 }
2434 2444
2435 /* Detect SLP reduction of the form: 2445 /* Detect SLP reduction of the form:
2436 2446
2437 #a1 = phi <a5, a0> 2447 #a1 = phi <a5, a0>
2438 a2 = operation (a1) 2448 a2 = operation (a1)
2453 gimple *first_stmt) 2463 gimple *first_stmt)
2454 { 2464 {
2455 struct loop *loop = (gimple_bb (phi))->loop_father; 2465 struct loop *loop = (gimple_bb (phi))->loop_father;
2456 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info); 2466 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
2457 enum tree_code code; 2467 enum tree_code code;
2458 gimple *current_stmt = NULL, *loop_use_stmt = NULL, *first, *next_stmt; 2468 gimple *loop_use_stmt = NULL;
2459 stmt_vec_info use_stmt_info, current_stmt_info; 2469 stmt_vec_info use_stmt_info, current_stmt_info = NULL;
2460 tree lhs; 2470 tree lhs;
2461 imm_use_iterator imm_iter; 2471 imm_use_iterator imm_iter;
2462 use_operand_p use_p; 2472 use_operand_p use_p;
2463 int nloop_uses, size = 0, n_out_of_loop_uses; 2473 int nloop_uses, size = 0, n_out_of_loop_uses;
2464 bool found = false; 2474 bool found = false;
2515 || code != gimple_assign_rhs_code (loop_use_stmt) 2525 || code != gimple_assign_rhs_code (loop_use_stmt)
2516 || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt))) 2526 || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt)))
2517 return false; 2527 return false;
2518 2528
2519 /* Insert USE_STMT into reduction chain. */ 2529 /* Insert USE_STMT into reduction chain. */
2520 use_stmt_info = vinfo_for_stmt (loop_use_stmt); 2530 use_stmt_info = loop_info->lookup_stmt (loop_use_stmt);
2521 if (current_stmt) 2531 if (current_stmt_info)
2522 { 2532 {
2523 current_stmt_info = vinfo_for_stmt (current_stmt); 2533 REDUC_GROUP_NEXT_ELEMENT (current_stmt_info) = use_stmt_info;
2524 GROUP_NEXT_ELEMENT (current_stmt_info) = loop_use_stmt; 2534 REDUC_GROUP_FIRST_ELEMENT (use_stmt_info)
2525 GROUP_FIRST_ELEMENT (use_stmt_info) 2535 = REDUC_GROUP_FIRST_ELEMENT (current_stmt_info);
2526 = GROUP_FIRST_ELEMENT (current_stmt_info);
2527 } 2536 }
2528 else 2537 else
2529 GROUP_FIRST_ELEMENT (use_stmt_info) = loop_use_stmt; 2538 REDUC_GROUP_FIRST_ELEMENT (use_stmt_info) = use_stmt_info;
2530 2539
2531 lhs = gimple_assign_lhs (loop_use_stmt); 2540 lhs = gimple_assign_lhs (loop_use_stmt);
2532 current_stmt = loop_use_stmt; 2541 current_stmt_info = use_stmt_info;
2533 size++; 2542 size++;
2534 } 2543 }
2535 2544
2536 if (!found || loop_use_stmt != phi || size < 2) 2545 if (!found || loop_use_stmt != phi || size < 2)
2537 return false; 2546 return false;
2538 2547
2539 /* Swap the operands, if needed, to make the reduction operand be the second 2548 /* Swap the operands, if needed, to make the reduction operand be the second
2540 operand. */ 2549 operand. */
2541 lhs = PHI_RESULT (phi); 2550 lhs = PHI_RESULT (phi);
2542 next_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt)); 2551 stmt_vec_info next_stmt_info = REDUC_GROUP_FIRST_ELEMENT (current_stmt_info);
2543 while (next_stmt) 2552 while (next_stmt_info)
2544 { 2553 {
2554 gassign *next_stmt = as_a <gassign *> (next_stmt_info->stmt);
2545 if (gimple_assign_rhs2 (next_stmt) == lhs) 2555 if (gimple_assign_rhs2 (next_stmt) == lhs)
2546 { 2556 {
2547 tree op = gimple_assign_rhs1 (next_stmt); 2557 tree op = gimple_assign_rhs1 (next_stmt);
2548 gimple *def_stmt = NULL; 2558 stmt_vec_info def_stmt_info = loop_info->lookup_def (op);
2549
2550 if (TREE_CODE (op) == SSA_NAME)
2551 def_stmt = SSA_NAME_DEF_STMT (op);
2552 2559
2553 /* Check that the other def is either defined in the loop 2560 /* Check that the other def is either defined in the loop
2554 ("vect_internal_def"), or it's an induction (defined by a 2561 ("vect_internal_def"), or it's an induction (defined by a
2555 loop-header phi-node). */ 2562 loop-header phi-node). */
2556 if (def_stmt 2563 if (def_stmt_info
2557 && gimple_bb (def_stmt) 2564 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt))
2558 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) 2565 && vect_valid_reduction_input_p (def_stmt_info))
2559 && (is_gimple_assign (def_stmt)
2560 || is_gimple_call (def_stmt)
2561 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2562 == vect_induction_def
2563 || (gimple_code (def_stmt) == GIMPLE_PHI
2564 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2565 == vect_internal_def
2566 && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
2567 { 2566 {
2568 lhs = gimple_assign_lhs (next_stmt); 2567 lhs = gimple_assign_lhs (next_stmt);
2569 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt)); 2568 next_stmt_info = REDUC_GROUP_NEXT_ELEMENT (next_stmt_info);
2570 continue; 2569 continue;
2571 } 2570 }
2572 2571
2573 return false; 2572 return false;
2574 } 2573 }
2575 else 2574 else
2576 { 2575 {
2577 tree op = gimple_assign_rhs2 (next_stmt); 2576 tree op = gimple_assign_rhs2 (next_stmt);
2578 gimple *def_stmt = NULL; 2577 stmt_vec_info def_stmt_info = loop_info->lookup_def (op);
2579
2580 if (TREE_CODE (op) == SSA_NAME)
2581 def_stmt = SSA_NAME_DEF_STMT (op);
2582 2578
2583 /* Check that the other def is either defined in the loop 2579 /* Check that the other def is either defined in the loop
2584 ("vect_internal_def"), or it's an induction (defined by a 2580 ("vect_internal_def"), or it's an induction (defined by a
2585 loop-header phi-node). */ 2581 loop-header phi-node). */
2586 if (def_stmt 2582 if (def_stmt_info
2587 && gimple_bb (def_stmt) 2583 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt))
2588 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) 2584 && vect_valid_reduction_input_p (def_stmt_info))
2589 && (is_gimple_assign (def_stmt)
2590 || is_gimple_call (def_stmt)
2591 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2592 == vect_induction_def
2593 || (gimple_code (def_stmt) == GIMPLE_PHI
2594 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
2595 == vect_internal_def
2596 && !is_loop_header_bb_p (gimple_bb (def_stmt)))))
2597 { 2585 {
2598 if (dump_enabled_p ()) 2586 if (dump_enabled_p ())
2599 { 2587 dump_printf_loc (MSG_NOTE, vect_location, "swapping oprnds: %G",
2600 dump_printf_loc (MSG_NOTE, vect_location, "swapping oprnds: "); 2588 next_stmt);
2601 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, next_stmt, 0);
2602 }
2603 2589
2604 swap_ssa_operands (next_stmt, 2590 swap_ssa_operands (next_stmt,
2605 gimple_assign_rhs1_ptr (next_stmt), 2591 gimple_assign_rhs1_ptr (next_stmt),
2606 gimple_assign_rhs2_ptr (next_stmt)); 2592 gimple_assign_rhs2_ptr (next_stmt));
2607 update_stmt (next_stmt); 2593 update_stmt (next_stmt);
2612 else 2598 else
2613 return false; 2599 return false;
2614 } 2600 }
2615 2601
2616 lhs = gimple_assign_lhs (next_stmt); 2602 lhs = gimple_assign_lhs (next_stmt);
2617 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt)); 2603 next_stmt_info = REDUC_GROUP_NEXT_ELEMENT (next_stmt_info);
2618 } 2604 }
2619 2605
2620 /* Save the chain for further analysis in SLP detection. */ 2606 /* Save the chain for further analysis in SLP detection. */
2621 first = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt)); 2607 stmt_vec_info first_stmt_info
2622 LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (first); 2608 = REDUC_GROUP_FIRST_ELEMENT (current_stmt_info);
2623 GROUP_SIZE (vinfo_for_stmt (first)) = size; 2609 LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (first_stmt_info);
2610 REDUC_GROUP_SIZE (first_stmt_info) = size;
2624 2611
2625 return true; 2612 return true;
2613 }
2614
2615 /* Return true if we need an in-order reduction for operation CODE
2616 on type TYPE. NEED_WRAPPING_INTEGRAL_OVERFLOW is true if integer
2617 overflow must wrap. */
2618
2619 static bool
2620 needs_fold_left_reduction_p (tree type, tree_code code,
2621 bool need_wrapping_integral_overflow)
2622 {
2623 /* CHECKME: check for !flag_finite_math_only too? */
2624 if (SCALAR_FLOAT_TYPE_P (type))
2625 switch (code)
2626 {
2627 case MIN_EXPR:
2628 case MAX_EXPR:
2629 return false;
2630
2631 default:
2632 return !flag_associative_math;
2633 }
2634
2635 if (INTEGRAL_TYPE_P (type))
2636 {
2637 if (!operation_no_trapping_overflow (type, code))
2638 return true;
2639 if (need_wrapping_integral_overflow
2640 && !TYPE_OVERFLOW_WRAPS (type)
2641 && operation_can_overflow (code))
2642 return true;
2643 return false;
2644 }
2645
2646 if (SAT_FIXED_POINT_TYPE_P (type))
2647 return true;
2648
2649 return false;
2650 }
2651
2652 /* Return true if the reduction PHI in LOOP with latch arg LOOP_ARG and
2653 reduction operation CODE has a handled computation expression. */
2654
2655 bool
2656 check_reduction_path (dump_user_location_t loc, loop_p loop, gphi *phi,
2657 tree loop_arg, enum tree_code code)
2658 {
2659 auto_vec<std::pair<ssa_op_iter, use_operand_p> > path;
2660 auto_bitmap visited;
2661 tree lookfor = PHI_RESULT (phi);
2662 ssa_op_iter curri;
2663 use_operand_p curr = op_iter_init_phiuse (&curri, phi, SSA_OP_USE);
2664 while (USE_FROM_PTR (curr) != loop_arg)
2665 curr = op_iter_next_use (&curri);
2666 curri.i = curri.numops;
2667 do
2668 {
2669 path.safe_push (std::make_pair (curri, curr));
2670 tree use = USE_FROM_PTR (curr);
2671 if (use == lookfor)
2672 break;
2673 gimple *def = SSA_NAME_DEF_STMT (use);
2674 if (gimple_nop_p (def)
2675 || ! flow_bb_inside_loop_p (loop, gimple_bb (def)))
2676 {
2677 pop:
2678 do
2679 {
2680 std::pair<ssa_op_iter, use_operand_p> x = path.pop ();
2681 curri = x.first;
2682 curr = x.second;
2683 do
2684 curr = op_iter_next_use (&curri);
2685 /* Skip already visited or non-SSA operands (from iterating
2686 over PHI args). */
2687 while (curr != NULL_USE_OPERAND_P
2688 && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME
2689 || ! bitmap_set_bit (visited,
2690 SSA_NAME_VERSION
2691 (USE_FROM_PTR (curr)))));
2692 }
2693 while (curr == NULL_USE_OPERAND_P && ! path.is_empty ());
2694 if (curr == NULL_USE_OPERAND_P)
2695 break;
2696 }
2697 else
2698 {
2699 if (gimple_code (def) == GIMPLE_PHI)
2700 curr = op_iter_init_phiuse (&curri, as_a <gphi *>(def), SSA_OP_USE);
2701 else
2702 curr = op_iter_init_use (&curri, def, SSA_OP_USE);
2703 while (curr != NULL_USE_OPERAND_P
2704 && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME
2705 || ! bitmap_set_bit (visited,
2706 SSA_NAME_VERSION
2707 (USE_FROM_PTR (curr)))))
2708 curr = op_iter_next_use (&curri);
2709 if (curr == NULL_USE_OPERAND_P)
2710 goto pop;
2711 }
2712 }
2713 while (1);
2714 if (dump_file && (dump_flags & TDF_DETAILS))
2715 {
2716 dump_printf_loc (MSG_NOTE, loc, "reduction path: ");
2717 unsigned i;
2718 std::pair<ssa_op_iter, use_operand_p> *x;
2719 FOR_EACH_VEC_ELT (path, i, x)
2720 dump_printf (MSG_NOTE, "%T ", USE_FROM_PTR (x->second));
2721 dump_printf (MSG_NOTE, "\n");
2722 }
2723
2724 /* Check whether the reduction path detected is valid. */
2725 bool fail = path.length () == 0;
2726 bool neg = false;
2727 for (unsigned i = 1; i < path.length (); ++i)
2728 {
2729 gimple *use_stmt = USE_STMT (path[i].second);
2730 tree op = USE_FROM_PTR (path[i].second);
2731 if (! has_single_use (op)
2732 || ! is_gimple_assign (use_stmt))
2733 {
2734 fail = true;
2735 break;
2736 }
2737 if (gimple_assign_rhs_code (use_stmt) != code)
2738 {
2739 if (code == PLUS_EXPR
2740 && gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
2741 {
2742 /* Track whether we negate the reduction value each iteration. */
2743 if (gimple_assign_rhs2 (use_stmt) == op)
2744 neg = ! neg;
2745 }
2746 else
2747 {
2748 fail = true;
2749 break;
2750 }
2751 }
2752 }
2753 return ! fail && ! neg;
2626 } 2754 }
2627 2755
2628 2756
2629 /* Function vect_is_simple_reduction 2757 /* Function vect_is_simple_reduction
2630 2758
2668 if (a[i] < val) 2796 if (a[i] < val)
2669 ret_val = a[i]; 2797 ret_val = a[i];
2670 2798
2671 */ 2799 */
2672 2800
2673 static gimple * 2801 static stmt_vec_info
2674 vect_is_simple_reduction (loop_vec_info loop_info, gimple *phi, 2802 vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
2675 bool *double_reduc, 2803 bool *double_reduc,
2676 bool need_wrapping_integral_overflow, 2804 bool need_wrapping_integral_overflow,
2677 enum vect_reduction_type *v_reduc_type) 2805 enum vect_reduction_type *v_reduc_type)
2678 { 2806 {
2807 gphi *phi = as_a <gphi *> (phi_info->stmt);
2679 struct loop *loop = (gimple_bb (phi))->loop_father; 2808 struct loop *loop = (gimple_bb (phi))->loop_father;
2680 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info); 2809 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info);
2681 gimple *def_stmt, *def1 = NULL, *def2 = NULL, *phi_use_stmt = NULL; 2810 gimple *phi_use_stmt = NULL;
2682 enum tree_code orig_code, code; 2811 enum tree_code orig_code, code;
2683 tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE; 2812 tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE;
2684 tree type; 2813 tree type;
2685 int nloop_uses; 2814 int nloop_uses;
2686 tree name; 2815 tree name;
2729 edge latch_e = loop_latch_edge (loop); 2858 edge latch_e = loop_latch_edge (loop);
2730 tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e); 2859 tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e);
2731 if (TREE_CODE (loop_arg) != SSA_NAME) 2860 if (TREE_CODE (loop_arg) != SSA_NAME)
2732 { 2861 {
2733 if (dump_enabled_p ()) 2862 if (dump_enabled_p ())
2734 { 2863 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2735 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2864 "reduction: not ssa_name: %T\n", loop_arg);
2736 "reduction: not ssa_name: ");
2737 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, loop_arg);
2738 dump_printf (MSG_MISSED_OPTIMIZATION, "\n");
2739 }
2740 return NULL; 2865 return NULL;
2741 } 2866 }
2742 2867
2743 def_stmt = SSA_NAME_DEF_STMT (loop_arg); 2868 stmt_vec_info def_stmt_info = loop_info->lookup_def (loop_arg);
2744 if (is_gimple_assign (def_stmt)) 2869 if (!def_stmt_info
2870 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt)))
2871 return NULL;
2872
2873 if (gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt))
2745 { 2874 {
2746 name = gimple_assign_lhs (def_stmt); 2875 name = gimple_assign_lhs (def_stmt);
2747 phi_def = false; 2876 phi_def = false;
2748 } 2877 }
2749 else if (gimple_code (def_stmt) == GIMPLE_PHI) 2878 else if (gphi *def_stmt = dyn_cast <gphi *> (def_stmt_info->stmt))
2750 { 2879 {
2751 name = PHI_RESULT (def_stmt); 2880 name = PHI_RESULT (def_stmt);
2752 phi_def = true; 2881 phi_def = true;
2753 } 2882 }
2754 else 2883 else
2755 { 2884 {
2756 if (dump_enabled_p ()) 2885 if (dump_enabled_p ())
2757 { 2886 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
2758 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 2887 "reduction: unhandled reduction operation: %G",
2759 "reduction: unhandled reduction operation: "); 2888 def_stmt_info->stmt);
2760 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def_stmt, 0);
2761 }
2762 return NULL; 2889 return NULL;
2763 } 2890 }
2764
2765 if (! flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
2766 return NULL;
2767 2891
2768 nloop_uses = 0; 2892 nloop_uses = 0;
2769 auto_vec<gphi *, 3> lcphis; 2893 auto_vec<gphi *, 3> lcphis;
2770 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name) 2894 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name)
2771 { 2895 {
2788 2912
2789 /* If DEF_STMT is a phi node itself, we expect it to have a single argument 2913 /* If DEF_STMT is a phi node itself, we expect it to have a single argument
2790 defined in the inner loop. */ 2914 defined in the inner loop. */
2791 if (phi_def) 2915 if (phi_def)
2792 { 2916 {
2917 gphi *def_stmt = as_a <gphi *> (def_stmt_info->stmt);
2793 op1 = PHI_ARG_DEF (def_stmt, 0); 2918 op1 = PHI_ARG_DEF (def_stmt, 0);
2794 2919
2795 if (gimple_phi_num_args (def_stmt) != 1 2920 if (gimple_phi_num_args (def_stmt) != 1
2796 || TREE_CODE (op1) != SSA_NAME) 2921 || TREE_CODE (op1) != SSA_NAME)
2797 { 2922 {
2800 "unsupported phi node definition.\n"); 2925 "unsupported phi node definition.\n");
2801 2926
2802 return NULL; 2927 return NULL;
2803 } 2928 }
2804 2929
2805 def1 = SSA_NAME_DEF_STMT (op1); 2930 gimple *def1 = SSA_NAME_DEF_STMT (op1);
2806 if (gimple_bb (def1) 2931 if (gimple_bb (def1)
2807 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) 2932 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
2808 && loop->inner 2933 && loop->inner
2809 && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1)) 2934 && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1))
2810 && is_gimple_assign (def1) 2935 && is_gimple_assign (def1)
2936 && is_a <gphi *> (phi_use_stmt)
2811 && flow_bb_inside_loop_p (loop->inner, gimple_bb (phi_use_stmt))) 2937 && flow_bb_inside_loop_p (loop->inner, gimple_bb (phi_use_stmt)))
2812 { 2938 {
2813 if (dump_enabled_p ()) 2939 if (dump_enabled_p ())
2814 report_vect_op (MSG_NOTE, def_stmt, 2940 report_vect_op (MSG_NOTE, def_stmt,
2815 "detected double reduction: "); 2941 "detected double reduction: ");
2816 2942
2817 *double_reduc = true; 2943 *double_reduc = true;
2818 return def_stmt; 2944 return def_stmt_info;
2819 } 2945 }
2820 2946
2821 return NULL; 2947 return NULL;
2822 } 2948 }
2823 2949
2839 if (! flow_bb_inside_loop_p (vect_loop, gimple_bb (use_stmt))) 2965 if (! flow_bb_inside_loop_p (vect_loop, gimple_bb (use_stmt)))
2840 check_reduction = true; 2966 check_reduction = true;
2841 } 2967 }
2842 } 2968 }
2843 2969
2970 gassign *def_stmt = as_a <gassign *> (def_stmt_info->stmt);
2844 bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop); 2971 bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop);
2845 code = orig_code = gimple_assign_rhs_code (def_stmt); 2972 code = orig_code = gimple_assign_rhs_code (def_stmt);
2846 2973
2847 /* We can handle "res -= x[i]", which is non-associative by 2974 /* We can handle "res -= x[i]", which is non-associative by
2848 simply rewriting this into "res += -x[i]". Avoid changing 2975 simply rewriting this into "res += -x[i]". Avoid changing
2914 && !types_compatible_p (type, TREE_TYPE (op4)))) 3041 && !types_compatible_p (type, TREE_TYPE (op4))))
2915 { 3042 {
2916 if (dump_enabled_p ()) 3043 if (dump_enabled_p ())
2917 { 3044 {
2918 dump_printf_loc (MSG_NOTE, vect_location, 3045 dump_printf_loc (MSG_NOTE, vect_location,
2919 "reduction: multiple types: operation type: "); 3046 "reduction: multiple types: operation type: "
2920 dump_generic_expr (MSG_NOTE, TDF_SLIM, type); 3047 "%T, operands types: %T,%T",
2921 dump_printf (MSG_NOTE, ", operands types: "); 3048 type, TREE_TYPE (op1), TREE_TYPE (op2));
2922 dump_generic_expr (MSG_NOTE, TDF_SLIM,
2923 TREE_TYPE (op1));
2924 dump_printf (MSG_NOTE, ",");
2925 dump_generic_expr (MSG_NOTE, TDF_SLIM,
2926 TREE_TYPE (op2));
2927 if (op3) 3049 if (op3)
2928 { 3050 dump_printf (MSG_NOTE, ",%T", TREE_TYPE (op3));
2929 dump_printf (MSG_NOTE, ",");
2930 dump_generic_expr (MSG_NOTE, TDF_SLIM,
2931 TREE_TYPE (op3));
2932 }
2933 3051
2934 if (op4) 3052 if (op4)
2935 { 3053 dump_printf (MSG_NOTE, ",%T", TREE_TYPE (op4));
2936 dump_printf (MSG_NOTE, ",");
2937 dump_generic_expr (MSG_NOTE, TDF_SLIM,
2938 TREE_TYPE (op4));
2939 }
2940 dump_printf (MSG_NOTE, "\n"); 3054 dump_printf (MSG_NOTE, "\n");
2941 } 3055 }
2942 3056
2943 return NULL; 3057 return NULL;
2944 } 3058 }
2945 3059
2946 /* Check that it's ok to change the order of the computation. 3060 /* Check whether it's ok to change the order of the computation.
2947 Generally, when vectorizing a reduction we change the order of the 3061 Generally, when vectorizing a reduction we change the order of the
2948 computation. This may change the behavior of the program in some 3062 computation. This may change the behavior of the program in some
2949 cases, so we need to check that this is ok. One exception is when 3063 cases, so we need to check that this is ok. One exception is when
2950 vectorizing an outer-loop: the inner-loop is executed sequentially, 3064 vectorizing an outer-loop: the inner-loop is executed sequentially,
2951 and therefore vectorizing reductions in the inner-loop during 3065 and therefore vectorizing reductions in the inner-loop during
2952 outer-loop vectorization is safe. */ 3066 outer-loop vectorization is safe. */
2953 3067 if (check_reduction
2954 if (*v_reduc_type != COND_REDUCTION 3068 && *v_reduc_type == TREE_CODE_REDUCTION
2955 && check_reduction) 3069 && needs_fold_left_reduction_p (type, code,
2956 { 3070 need_wrapping_integral_overflow))
2957 /* CHECKME: check for !flag_finite_math_only too? */ 3071 *v_reduc_type = FOLD_LEFT_REDUCTION;
2958 if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math)
2959 {
2960 /* Changing the order of operations changes the semantics. */
2961 if (dump_enabled_p ())
2962 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2963 "reduction: unsafe fp math optimization: ");
2964 return NULL;
2965 }
2966 else if (INTEGRAL_TYPE_P (type))
2967 {
2968 if (!operation_no_trapping_overflow (type, code))
2969 {
2970 /* Changing the order of operations changes the semantics. */
2971 if (dump_enabled_p ())
2972 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2973 "reduction: unsafe int math optimization"
2974 " (overflow traps): ");
2975 return NULL;
2976 }
2977 if (need_wrapping_integral_overflow
2978 && !TYPE_OVERFLOW_WRAPS (type)
2979 && operation_can_overflow (code))
2980 {
2981 /* Changing the order of operations changes the semantics. */
2982 if (dump_enabled_p ())
2983 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2984 "reduction: unsafe int math optimization"
2985 " (overflow doesn't wrap): ");
2986 return NULL;
2987 }
2988 }
2989 else if (SAT_FIXED_POINT_TYPE_P (type))
2990 {
2991 /* Changing the order of operations changes the semantics. */
2992 if (dump_enabled_p ())
2993 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
2994 "reduction: unsafe fixed-point math optimization: ");
2995 return NULL;
2996 }
2997 }
2998 3072
2999 /* Reduction is safe. We're dealing with one of the following: 3073 /* Reduction is safe. We're dealing with one of the following:
3000 1) integer arithmetic and no trapv 3074 1) integer arithmetic and no trapv
3001 2) floating point arithmetic, and special flags permit this optimization 3075 2) floating point arithmetic, and special flags permit this optimization
3002 3) nested cycle (i.e., outer loop vectorization). */ 3076 3) nested cycle (i.e., outer loop vectorization). */
3003 if (TREE_CODE (op1) == SSA_NAME) 3077 stmt_vec_info def1_info = loop_info->lookup_def (op1);
3004 def1 = SSA_NAME_DEF_STMT (op1); 3078 stmt_vec_info def2_info = loop_info->lookup_def (op2);
3005 3079 if (code != COND_EXPR && !def1_info && !def2_info)
3006 if (TREE_CODE (op2) == SSA_NAME)
3007 def2 = SSA_NAME_DEF_STMT (op2);
3008
3009 if (code != COND_EXPR
3010 && ((!def1 || gimple_nop_p (def1)) && (!def2 || gimple_nop_p (def2))))
3011 { 3080 {
3012 if (dump_enabled_p ()) 3081 if (dump_enabled_p ())
3013 report_vect_op (MSG_NOTE, def_stmt, "reduction: no defs for operands: "); 3082 report_vect_op (MSG_NOTE, def_stmt, "reduction: no defs for operands: ");
3014 return NULL; 3083 return NULL;
3015 } 3084 }
3016 3085
3017 /* Check that one def is the reduction def, defined by PHI, 3086 /* Check that one def is the reduction def, defined by PHI,
3018 the other def is either defined in the loop ("vect_internal_def"), 3087 the other def is either defined in the loop ("vect_internal_def"),
3019 or it's an induction (defined by a loop-header phi-node). */ 3088 or it's an induction (defined by a loop-header phi-node). */
3020 3089
3021 if (def2 && def2 == phi 3090 if (def2_info
3091 && def2_info->stmt == phi
3022 && (code == COND_EXPR 3092 && (code == COND_EXPR
3023 || !def1 || gimple_nop_p (def1) 3093 || !def1_info
3024 || !flow_bb_inside_loop_p (loop, gimple_bb (def1)) 3094 || !flow_bb_inside_loop_p (loop, gimple_bb (def1_info->stmt))
3025 || (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1)) 3095 || vect_valid_reduction_input_p (def1_info)))
3026 && (is_gimple_assign (def1)
3027 || is_gimple_call (def1)
3028 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
3029 == vect_induction_def
3030 || (gimple_code (def1) == GIMPLE_PHI
3031 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1))
3032 == vect_internal_def
3033 && !is_loop_header_bb_p (gimple_bb (def1)))))))
3034 { 3096 {
3035 if (dump_enabled_p ()) 3097 if (dump_enabled_p ())
3036 report_vect_op (MSG_NOTE, def_stmt, "detected reduction: "); 3098 report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
3037 return def_stmt; 3099 return def_stmt_info;
3038 } 3100 }
3039 3101
3040 if (def1 && def1 == phi 3102 if (def1_info
3103 && def1_info->stmt == phi
3041 && (code == COND_EXPR 3104 && (code == COND_EXPR
3042 || !def2 || gimple_nop_p (def2) 3105 || !def2_info
3043 || !flow_bb_inside_loop_p (loop, gimple_bb (def2)) 3106 || !flow_bb_inside_loop_p (loop, gimple_bb (def2_info->stmt))
3044 || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2)) 3107 || vect_valid_reduction_input_p (def2_info)))
3045 && (is_gimple_assign (def2)
3046 || is_gimple_call (def2)
3047 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
3048 == vect_induction_def
3049 || (gimple_code (def2) == GIMPLE_PHI
3050 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2))
3051 == vect_internal_def
3052 && !is_loop_header_bb_p (gimple_bb (def2)))))))
3053 { 3108 {
3054 if (! nested_in_vect_loop && orig_code != MINUS_EXPR) 3109 if (! nested_in_vect_loop && orig_code != MINUS_EXPR)
3055 { 3110 {
3056 /* Check if we can swap operands (just for simplicity - so that 3111 /* Check if we can swap operands (just for simplicity - so that
3057 the rest of the code can assume that the reduction variable 3112 the rest of the code can assume that the reduction variable
3099 { 3154 {
3100 if (dump_enabled_p ()) 3155 if (dump_enabled_p ())
3101 report_vect_op (MSG_NOTE, def_stmt, "detected reduction: "); 3156 report_vect_op (MSG_NOTE, def_stmt, "detected reduction: ");
3102 } 3157 }
3103 3158
3104 return def_stmt; 3159 return def_stmt_info;
3105 } 3160 }
3106 3161
3107 /* Try to find SLP reduction chain. */ 3162 /* Try to find SLP reduction chain. */
3108 if (! nested_in_vect_loop 3163 if (! nested_in_vect_loop
3109 && code != COND_EXPR 3164 && code != COND_EXPR
3112 { 3167 {
3113 if (dump_enabled_p ()) 3168 if (dump_enabled_p ())
3114 report_vect_op (MSG_NOTE, def_stmt, 3169 report_vect_op (MSG_NOTE, def_stmt,
3115 "reduction: detected reduction chain: "); 3170 "reduction: detected reduction chain: ");
3116 3171
3117 return def_stmt; 3172 return def_stmt_info;
3118 } 3173 }
3119 3174
3120 /* Dissolve group eventually half-built by vect_is_slp_reduction. */ 3175 /* Dissolve group eventually half-built by vect_is_slp_reduction. */
3121 gimple *first = GROUP_FIRST_ELEMENT (vinfo_for_stmt (def_stmt)); 3176 stmt_vec_info first = REDUC_GROUP_FIRST_ELEMENT (def_stmt_info);
3122 while (first) 3177 while (first)
3123 { 3178 {
3124 gimple *next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first)); 3179 stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (first);
3125 GROUP_FIRST_ELEMENT (vinfo_for_stmt (first)) = NULL; 3180 REDUC_GROUP_FIRST_ELEMENT (first) = NULL;
3126 GROUP_NEXT_ELEMENT (vinfo_for_stmt (first)) = NULL; 3181 REDUC_GROUP_NEXT_ELEMENT (first) = NULL;
3127 first = next; 3182 first = next;
3128 } 3183 }
3129 3184
3130 /* Look for the expression computing loop_arg from loop PHI result. */ 3185 /* Look for the expression computing loop_arg from loop PHI result. */
3131 auto_vec<std::pair<ssa_op_iter, use_operand_p> > path; 3186 if (check_reduction_path (vect_location, loop, phi, loop_arg, code))
3132 auto_bitmap visited; 3187 return def_stmt_info;
3133 tree lookfor = PHI_RESULT (phi);
3134 ssa_op_iter curri;
3135 use_operand_p curr = op_iter_init_phiuse (&curri, as_a <gphi *>(phi),
3136 SSA_OP_USE);
3137 while (USE_FROM_PTR (curr) != loop_arg)
3138 curr = op_iter_next_use (&curri);
3139 curri.i = curri.numops;
3140 do
3141 {
3142 path.safe_push (std::make_pair (curri, curr));
3143 tree use = USE_FROM_PTR (curr);
3144 if (use == lookfor)
3145 break;
3146 gimple *def = SSA_NAME_DEF_STMT (use);
3147 if (gimple_nop_p (def)
3148 || ! flow_bb_inside_loop_p (loop, gimple_bb (def)))
3149 {
3150 pop:
3151 do
3152 {
3153 std::pair<ssa_op_iter, use_operand_p> x = path.pop ();
3154 curri = x.first;
3155 curr = x.second;
3156 do
3157 curr = op_iter_next_use (&curri);
3158 /* Skip already visited or non-SSA operands (from iterating
3159 over PHI args). */
3160 while (curr != NULL_USE_OPERAND_P
3161 && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME
3162 || ! bitmap_set_bit (visited,
3163 SSA_NAME_VERSION
3164 (USE_FROM_PTR (curr)))));
3165 }
3166 while (curr == NULL_USE_OPERAND_P && ! path.is_empty ());
3167 if (curr == NULL_USE_OPERAND_P)
3168 break;
3169 }
3170 else
3171 {
3172 if (gimple_code (def) == GIMPLE_PHI)
3173 curr = op_iter_init_phiuse (&curri, as_a <gphi *>(def), SSA_OP_USE);
3174 else
3175 curr = op_iter_init_use (&curri, def, SSA_OP_USE);
3176 while (curr != NULL_USE_OPERAND_P
3177 && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME
3178 || ! bitmap_set_bit (visited,
3179 SSA_NAME_VERSION
3180 (USE_FROM_PTR (curr)))))
3181 curr = op_iter_next_use (&curri);
3182 if (curr == NULL_USE_OPERAND_P)
3183 goto pop;
3184 }
3185 }
3186 while (1);
3187 if (dump_file && (dump_flags & TDF_DETAILS))
3188 {
3189 dump_printf_loc (MSG_NOTE, vect_location,
3190 "reduction path: ");
3191 unsigned i;
3192 std::pair<ssa_op_iter, use_operand_p> *x;
3193 FOR_EACH_VEC_ELT (path, i, x)
3194 {
3195 dump_generic_expr (MSG_NOTE, TDF_SLIM, USE_FROM_PTR (x->second));
3196 dump_printf (MSG_NOTE, " ");
3197 }
3198 dump_printf (MSG_NOTE, "\n");
3199 }
3200
3201 /* Check whether the reduction path detected is valid. */
3202 bool fail = path.length () == 0;
3203 bool neg = false;
3204 for (unsigned i = 1; i < path.length (); ++i)
3205 {
3206 gimple *use_stmt = USE_STMT (path[i].second);
3207 tree op = USE_FROM_PTR (path[i].second);
3208 if (! has_single_use (op)
3209 || ! is_gimple_assign (use_stmt))
3210 {
3211 fail = true;
3212 break;
3213 }
3214 if (gimple_assign_rhs_code (use_stmt) != code)
3215 {
3216 if (code == PLUS_EXPR
3217 && gimple_assign_rhs_code (use_stmt) == MINUS_EXPR)
3218 {
3219 /* Track whether we negate the reduction value each iteration. */
3220 if (gimple_assign_rhs2 (use_stmt) == op)
3221 neg = ! neg;
3222 }
3223 else
3224 {
3225 fail = true;
3226 break;
3227 }
3228 }
3229 }
3230 if (! fail && ! neg)
3231 return def_stmt;
3232 3188
3233 if (dump_enabled_p ()) 3189 if (dump_enabled_p ())
3234 { 3190 {
3235 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, 3191 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt,
3236 "reduction: unknown pattern: "); 3192 "reduction: unknown pattern: ");
3241 3197
3242 /* Wrapper around vect_is_simple_reduction, which will modify code 3198 /* Wrapper around vect_is_simple_reduction, which will modify code
3243 in-place if it enables detection of more reductions. Arguments 3199 in-place if it enables detection of more reductions. Arguments
3244 as there. */ 3200 as there. */
3245 3201
3246 gimple * 3202 stmt_vec_info
3247 vect_force_simple_reduction (loop_vec_info loop_info, gimple *phi, 3203 vect_force_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info,
3248 bool *double_reduc, 3204 bool *double_reduc,
3249 bool need_wrapping_integral_overflow) 3205 bool need_wrapping_integral_overflow)
3250 { 3206 {
3251 enum vect_reduction_type v_reduc_type; 3207 enum vect_reduction_type v_reduc_type;
3252 gimple *def = vect_is_simple_reduction (loop_info, phi, double_reduc, 3208 stmt_vec_info def_info
3253 need_wrapping_integral_overflow, 3209 = vect_is_simple_reduction (loop_info, phi_info, double_reduc,
3254 &v_reduc_type); 3210 need_wrapping_integral_overflow,
3255 if (def) 3211 &v_reduc_type);
3256 { 3212 if (def_info)
3257 stmt_vec_info reduc_def_info = vinfo_for_stmt (phi); 3213 {
3258 STMT_VINFO_REDUC_TYPE (reduc_def_info) = v_reduc_type; 3214 STMT_VINFO_REDUC_TYPE (phi_info) = v_reduc_type;
3259 STMT_VINFO_REDUC_DEF (reduc_def_info) = def; 3215 STMT_VINFO_REDUC_DEF (phi_info) = def_info;
3260 reduc_def_info = vinfo_for_stmt (def); 3216 STMT_VINFO_REDUC_TYPE (def_info) = v_reduc_type;
3261 STMT_VINFO_REDUC_DEF (reduc_def_info) = phi; 3217 STMT_VINFO_REDUC_DEF (def_info) = phi_info;
3262 } 3218 }
3263 return def; 3219 return def_info;
3264 } 3220 }
3265 3221
3266 /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times. */ 3222 /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times. */
3267 int 3223 int
3268 vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue, 3224 vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue,
3270 stmt_vector_for_cost *scalar_cost_vec, 3226 stmt_vector_for_cost *scalar_cost_vec,
3271 stmt_vector_for_cost *prologue_cost_vec, 3227 stmt_vector_for_cost *prologue_cost_vec,
3272 stmt_vector_for_cost *epilogue_cost_vec) 3228 stmt_vector_for_cost *epilogue_cost_vec)
3273 { 3229 {
3274 int retval = 0; 3230 int retval = 0;
3275 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 3231 int assumed_vf = vect_vf_for_cost (loop_vinfo);
3276 3232
3277 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) 3233 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
3278 { 3234 {
3279 *peel_iters_epilogue = vf/2; 3235 *peel_iters_epilogue = assumed_vf / 2;
3280 if (dump_enabled_p ()) 3236 if (dump_enabled_p ())
3281 dump_printf_loc (MSG_NOTE, vect_location, 3237 dump_printf_loc (MSG_NOTE, vect_location,
3282 "cost model: epilogue peel iters set to vf/2 " 3238 "cost model: epilogue peel iters set to vf/2 "
3283 "because loop iterations are unknown .\n"); 3239 "because loop iterations are unknown .\n");
3284 3240
3292 else 3248 else
3293 { 3249 {
3294 int niters = LOOP_VINFO_INT_NITERS (loop_vinfo); 3250 int niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
3295 peel_iters_prologue = niters < peel_iters_prologue ? 3251 peel_iters_prologue = niters < peel_iters_prologue ?
3296 niters : peel_iters_prologue; 3252 niters : peel_iters_prologue;
3297 *peel_iters_epilogue = (niters - peel_iters_prologue) % vf; 3253 *peel_iters_epilogue = (niters - peel_iters_prologue) % assumed_vf;
3298 /* If we need to peel for gaps, but no peeling is required, we have to 3254 /* If we need to peel for gaps, but no peeling is required, we have to
3299 peel VF iterations. */ 3255 peel VF iterations. */
3300 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue) 3256 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue)
3301 *peel_iters_epilogue = vf; 3257 *peel_iters_epilogue = assumed_vf;
3302 } 3258 }
3303 3259
3304 stmt_info_for_cost *si; 3260 stmt_info_for_cost *si;
3305 int j; 3261 int j;
3306 if (peel_iters_prologue) 3262 if (peel_iters_prologue)
3307 FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si) 3263 FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)
3308 { 3264 retval += record_stmt_cost (prologue_cost_vec,
3309 stmt_vec_info stmt_info 3265 si->count * peel_iters_prologue,
3310 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL; 3266 si->kind, si->stmt_info, si->misalign,
3311 retval += record_stmt_cost (prologue_cost_vec, 3267 vect_prologue);
3312 si->count * peel_iters_prologue,
3313 si->kind, stmt_info, si->misalign,
3314 vect_prologue);
3315 }
3316 if (*peel_iters_epilogue) 3268 if (*peel_iters_epilogue)
3317 FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si) 3269 FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si)
3318 { 3270 retval += record_stmt_cost (epilogue_cost_vec,
3319 stmt_vec_info stmt_info 3271 si->count * *peel_iters_epilogue,
3320 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL; 3272 si->kind, si->stmt_info, si->misalign,
3321 retval += record_stmt_cost (epilogue_cost_vec, 3273 vect_epilogue);
3322 si->count * *peel_iters_epilogue,
3323 si->kind, stmt_info, si->misalign,
3324 vect_epilogue);
3325 }
3326 3274
3327 return retval; 3275 return retval;
3328 } 3276 }
3329 3277
3330 /* Function vect_estimate_min_profitable_iters 3278 /* Function vect_estimate_min_profitable_iters
3354 int vec_outside_cost = 0; 3302 int vec_outside_cost = 0;
3355 unsigned vec_prologue_cost = 0; 3303 unsigned vec_prologue_cost = 0;
3356 unsigned vec_epilogue_cost = 0; 3304 unsigned vec_epilogue_cost = 0;
3357 int scalar_single_iter_cost = 0; 3305 int scalar_single_iter_cost = 0;
3358 int scalar_outside_cost = 0; 3306 int scalar_outside_cost = 0;
3359 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 3307 int assumed_vf = vect_vf_for_cost (loop_vinfo);
3360 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo); 3308 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
3361 void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo); 3309 void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3362 3310
3363 /* Cost model disabled. */ 3311 /* Cost model disabled. */
3364 if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo))) 3312 if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo)))
3391 len = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).length (); 3339 len = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).length ();
3392 if (len) 3340 if (len)
3393 /* Count LEN - 1 ANDs and LEN comparisons. */ 3341 /* Count LEN - 1 ANDs and LEN comparisons. */
3394 (void) add_stmt_cost (target_cost_data, len * 2 - 1, scalar_stmt, 3342 (void) add_stmt_cost (target_cost_data, len * 2 - 1, scalar_stmt,
3395 NULL, 0, vect_prologue); 3343 NULL, 0, vect_prologue);
3344 len = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).length ();
3345 if (len)
3346 {
3347 /* Count LEN - 1 ANDs and LEN comparisons. */
3348 unsigned int nstmts = len * 2 - 1;
3349 /* +1 for each bias that needs adding. */
3350 for (unsigned int i = 0; i < len; ++i)
3351 if (!LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)[i].unsigned_p)
3352 nstmts += 1;
3353 (void) add_stmt_cost (target_cost_data, nstmts, scalar_stmt,
3354 NULL, 0, vect_prologue);
3355 }
3396 dump_printf (MSG_NOTE, 3356 dump_printf (MSG_NOTE,
3397 "cost model: Adding cost of checks for loop " 3357 "cost model: Adding cost of checks for loop "
3398 "versioning aliasing.\n"); 3358 "versioning aliasing.\n");
3399 } 3359 }
3400 3360
3423 3383
3424 scalar_single_iter_cost 3384 scalar_single_iter_cost
3425 = LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo); 3385 = LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo);
3426 3386
3427 /* Add additional cost for the peeled instructions in prologue and epilogue 3387 /* Add additional cost for the peeled instructions in prologue and epilogue
3428 loop. 3388 loop. (For fully-masked loops there will be no peeling.)
3429 3389
3430 FORNOW: If we don't know the value of peel_iters for prologue or epilogue 3390 FORNOW: If we don't know the value of peel_iters for prologue or epilogue
3431 at compile-time - we assume it's vf/2 (the worst would be vf-1). 3391 at compile-time - we assume it's vf/2 (the worst would be vf-1).
3432 3392
3433 TODO: Build an expression that represents peel_iters for prologue and 3393 TODO: Build an expression that represents peel_iters for prologue and
3434 epilogue to be used in a run-time test. */ 3394 epilogue to be used in a run-time test. */
3435 3395
3436 if (npeel < 0) 3396 if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
3437 { 3397 {
3438 peel_iters_prologue = vf/2; 3398 peel_iters_prologue = 0;
3399 peel_iters_epilogue = 0;
3400
3401 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
3402 {
3403 /* We need to peel exactly one iteration. */
3404 peel_iters_epilogue += 1;
3405 stmt_info_for_cost *si;
3406 int j;
3407 FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo),
3408 j, si)
3409 (void) add_stmt_cost (target_cost_data, si->count,
3410 si->kind, si->stmt_info, si->misalign,
3411 vect_epilogue);
3412 }
3413 }
3414 else if (npeel < 0)
3415 {
3416 peel_iters_prologue = assumed_vf / 2;
3439 dump_printf (MSG_NOTE, "cost model: " 3417 dump_printf (MSG_NOTE, "cost model: "
3440 "prologue peel iters set to vf/2.\n"); 3418 "prologue peel iters set to vf/2.\n");
3441 3419
3442 /* If peeling for alignment is unknown, loop bound of main loop becomes 3420 /* If peeling for alignment is unknown, loop bound of main loop becomes
3443 unknown. */ 3421 unknown. */
3444 peel_iters_epilogue = vf/2; 3422 peel_iters_epilogue = assumed_vf / 2;
3445 dump_printf (MSG_NOTE, "cost model: " 3423 dump_printf (MSG_NOTE, "cost model: "
3446 "epilogue peel iters set to vf/2 because " 3424 "epilogue peel iters set to vf/2 because "
3447 "peeling for alignment is unknown.\n"); 3425 "peeling for alignment is unknown.\n");
3448 3426
3449 /* If peeled iterations are unknown, count a taken branch and a not taken 3427 /* If peeled iterations are unknown, count a taken branch and a not taken
3460 NULL, 0, vect_epilogue); 3438 NULL, 0, vect_epilogue);
3461 stmt_info_for_cost *si; 3439 stmt_info_for_cost *si;
3462 int j; 3440 int j;
3463 FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si) 3441 FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si)
3464 { 3442 {
3465 struct _stmt_vec_info *stmt_info
3466 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
3467 (void) add_stmt_cost (target_cost_data, 3443 (void) add_stmt_cost (target_cost_data,
3468 si->count * peel_iters_prologue, 3444 si->count * peel_iters_prologue,
3469 si->kind, stmt_info, si->misalign, 3445 si->kind, si->stmt_info, si->misalign,
3470 vect_prologue); 3446 vect_prologue);
3471 (void) add_stmt_cost (target_cost_data, 3447 (void) add_stmt_cost (target_cost_data,
3472 si->count * peel_iters_epilogue, 3448 si->count * peel_iters_epilogue,
3473 si->kind, stmt_info, si->misalign, 3449 si->kind, si->stmt_info, si->misalign,
3474 vect_epilogue); 3450 vect_epilogue);
3475 } 3451 }
3476 } 3452 }
3477 else 3453 else
3478 { 3454 {
3491 (loop_vinfo), 3467 (loop_vinfo),
3492 &prologue_cost_vec, 3468 &prologue_cost_vec,
3493 &epilogue_cost_vec); 3469 &epilogue_cost_vec);
3494 3470
3495 FOR_EACH_VEC_ELT (prologue_cost_vec, j, si) 3471 FOR_EACH_VEC_ELT (prologue_cost_vec, j, si)
3496 { 3472 (void) add_stmt_cost (data, si->count, si->kind, si->stmt_info,
3497 struct _stmt_vec_info *stmt_info 3473 si->misalign, vect_prologue);
3498 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
3499 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
3500 si->misalign, vect_prologue);
3501 }
3502 3474
3503 FOR_EACH_VEC_ELT (epilogue_cost_vec, j, si) 3475 FOR_EACH_VEC_ELT (epilogue_cost_vec, j, si)
3504 { 3476 (void) add_stmt_cost (data, si->count, si->kind, si->stmt_info,
3505 struct _stmt_vec_info *stmt_info 3477 si->misalign, vect_epilogue);
3506 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL;
3507 (void) add_stmt_cost (data, si->count, si->kind, stmt_info,
3508 si->misalign, vect_epilogue);
3509 }
3510 3478
3511 prologue_cost_vec.release (); 3479 prologue_cost_vec.release ();
3512 epilogue_cost_vec.release (); 3480 epilogue_cost_vec.release ();
3513 } 3481 }
3514 3482
3618 SIC = scalar iteration cost, VIC = vector iteration cost, 3586 SIC = scalar iteration cost, VIC = vector iteration cost,
3619 VOC = vector outside cost, VF = vectorization factor, 3587 VOC = vector outside cost, VF = vectorization factor,
3620 PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations 3588 PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations
3621 SOC = scalar outside cost for run time cost model check. */ 3589 SOC = scalar outside cost for run time cost model check. */
3622 3590
3623 if ((scalar_single_iter_cost * vf) > (int) vec_inside_cost) 3591 if ((scalar_single_iter_cost * assumed_vf) > (int) vec_inside_cost)
3624 { 3592 {
3625 if (vec_outside_cost <= 0) 3593 min_profitable_iters = ((vec_outside_cost - scalar_outside_cost)
3594 * assumed_vf
3595 - vec_inside_cost * peel_iters_prologue
3596 - vec_inside_cost * peel_iters_epilogue);
3597 if (min_profitable_iters <= 0)
3626 min_profitable_iters = 0; 3598 min_profitable_iters = 0;
3627 else 3599 else
3628 { 3600 {
3629 min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) * vf 3601 min_profitable_iters /= ((scalar_single_iter_cost * assumed_vf)
3630 - vec_inside_cost * peel_iters_prologue 3602 - vec_inside_cost);
3631 - vec_inside_cost * peel_iters_epilogue) 3603
3632 / ((scalar_single_iter_cost * vf) 3604 if ((scalar_single_iter_cost * assumed_vf * min_profitable_iters)
3633 - vec_inside_cost); 3605 <= (((int) vec_inside_cost * min_profitable_iters)
3634 3606 + (((int) vec_outside_cost - scalar_outside_cost)
3635 if ((scalar_single_iter_cost * vf * min_profitable_iters) 3607 * assumed_vf)))
3636 <= (((int) vec_inside_cost * min_profitable_iters) 3608 min_profitable_iters++;
3637 + (((int) vec_outside_cost - scalar_outside_cost) * vf))) 3609 }
3638 min_profitable_iters++;
3639 }
3640 } 3610 }
3641 /* vector version will never be profitable. */ 3611 /* vector version will never be profitable. */
3642 else 3612 else
3643 { 3613 {
3644 if (LOOP_VINFO_LOOP (loop_vinfo)->force_vectorize) 3614 if (LOOP_VINFO_LOOP (loop_vinfo)->force_vectorize)
3645 warning_at (vect_location, OPT_Wopenmp_simd, "vectorization " 3615 warning_at (vect_location.get_location_t (), OPT_Wopenmp_simd,
3646 "did not happen for a simd loop"); 3616 "vectorization did not happen for a simd loop");
3647 3617
3648 if (dump_enabled_p ()) 3618 if (dump_enabled_p ())
3649 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 3619 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
3650 "cost model: the vector iteration cost = %d " 3620 "cost model: the vector iteration cost = %d "
3651 "divided by the scalar iteration cost = %d " 3621 "divided by the scalar iteration cost = %d "
3652 "is greater or equal to the vectorization factor = %d" 3622 "is greater or equal to the vectorization factor = %d"
3653 ".\n", 3623 ".\n",
3654 vec_inside_cost, scalar_single_iter_cost, vf); 3624 vec_inside_cost, scalar_single_iter_cost, assumed_vf);
3655 *ret_min_profitable_niters = -1; 3625 *ret_min_profitable_niters = -1;
3656 *ret_min_profitable_estimate = -1; 3626 *ret_min_profitable_estimate = -1;
3657 return; 3627 return;
3658 } 3628 }
3659 3629
3660 dump_printf (MSG_NOTE, 3630 dump_printf (MSG_NOTE,
3661 " Calculated minimum iters for profitability: %d\n", 3631 " Calculated minimum iters for profitability: %d\n",
3662 min_profitable_iters); 3632 min_profitable_iters);
3663 3633
3664 /* We want the vectorized loop to execute at least once. */ 3634 if (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
3665 if (min_profitable_iters < (vf + peel_iters_prologue)) 3635 && min_profitable_iters < (assumed_vf + peel_iters_prologue))
3666 min_profitable_iters = vf + peel_iters_prologue; 3636 /* We want the vectorized loop to execute at least once. */
3637 min_profitable_iters = assumed_vf + peel_iters_prologue;
3667 3638
3668 if (dump_enabled_p ()) 3639 if (dump_enabled_p ())
3669 dump_printf_loc (MSG_NOTE, vect_location, 3640 dump_printf_loc (MSG_NOTE, vect_location,
3670 " Runtime profitability threshold = %d\n", 3641 " Runtime profitability threshold = %d\n",
3671 min_profitable_iters); 3642 min_profitable_iters);
3681 3652
3682 if (vec_outside_cost <= 0) 3653 if (vec_outside_cost <= 0)
3683 min_profitable_estimate = 0; 3654 min_profitable_estimate = 0;
3684 else 3655 else
3685 { 3656 {
3686 min_profitable_estimate = ((vec_outside_cost + scalar_outside_cost) * vf 3657 min_profitable_estimate = ((vec_outside_cost + scalar_outside_cost)
3658 * assumed_vf
3687 - vec_inside_cost * peel_iters_prologue 3659 - vec_inside_cost * peel_iters_prologue
3688 - vec_inside_cost * peel_iters_epilogue) 3660 - vec_inside_cost * peel_iters_epilogue)
3689 / ((scalar_single_iter_cost * vf) 3661 / ((scalar_single_iter_cost * assumed_vf)
3690 - vec_inside_cost); 3662 - vec_inside_cost);
3691 } 3663 }
3692 min_profitable_estimate = MAX (min_profitable_estimate, min_profitable_iters); 3664 min_profitable_estimate = MAX (min_profitable_estimate, min_profitable_iters);
3693 if (dump_enabled_p ()) 3665 if (dump_enabled_p ())
3694 dump_printf_loc (MSG_NOTE, vect_location, 3666 dump_printf_loc (MSG_NOTE, vect_location,
3700 3672
3701 /* Writes into SEL a mask for a vec_perm, equivalent to a vec_shr by OFFSET 3673 /* Writes into SEL a mask for a vec_perm, equivalent to a vec_shr by OFFSET
3702 vector elements (not bits) for a vector with NELT elements. */ 3674 vector elements (not bits) for a vector with NELT elements. */
3703 static void 3675 static void
3704 calc_vec_perm_mask_for_shift (unsigned int offset, unsigned int nelt, 3676 calc_vec_perm_mask_for_shift (unsigned int offset, unsigned int nelt,
3705 vec_perm_indices *sel) 3677 vec_perm_builder *sel)
3706 { 3678 {
3707 unsigned int i; 3679 /* The encoding is a single stepped pattern. Any wrap-around is handled
3708 3680 by vec_perm_indices. */
3709 for (i = 0; i < nelt; i++) 3681 sel->new_vector (nelt, 1, 3);
3710 sel->quick_push ((i + offset) & (2 * nelt - 1)); 3682 for (unsigned int i = 0; i < 3; i++)
3683 sel->quick_push (i + offset);
3711 } 3684 }
3712 3685
3713 /* Checks whether the target supports whole-vector shifts for vectors of mode 3686 /* Checks whether the target supports whole-vector shifts for vectors of mode
3714 MODE. This is the case if _either_ the platform handles vec_shr_optab, _or_ 3687 MODE. This is the case if _either_ the platform handles vec_shr_optab, _or_
3715 it supports vec_perm_const with masks for all necessary shift amounts. */ 3688 it supports vec_perm_const with masks for all necessary shift amounts. */
3717 have_whole_vector_shift (machine_mode mode) 3690 have_whole_vector_shift (machine_mode mode)
3718 { 3691 {
3719 if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing) 3692 if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing)
3720 return true; 3693 return true;
3721 3694
3722 if (direct_optab_handler (vec_perm_const_optab, mode) == CODE_FOR_nothing) 3695 /* Variable-length vectors should be handled via the optab. */
3696 unsigned int nelt;
3697 if (!GET_MODE_NUNITS (mode).is_constant (&nelt))
3723 return false; 3698 return false;
3724 3699
3725 unsigned int i, nelt = GET_MODE_NUNITS (mode); 3700 vec_perm_builder sel;
3726 auto_vec_perm_indices sel (nelt); 3701 vec_perm_indices indices;
3727 3702 for (unsigned int i = nelt / 2; i >= 1; i /= 2)
3728 for (i = nelt/2; i >= 1; i/=2) 3703 {
3729 {
3730 sel.truncate (0);
3731 calc_vec_perm_mask_for_shift (i, nelt, &sel); 3704 calc_vec_perm_mask_for_shift (i, nelt, &sel);
3732 if (!can_vec_perm_p (mode, false, &sel)) 3705 indices.new_vector (sel, 2, nelt);
3706 if (!can_vec_perm_const_p (mode, indices, false))
3733 return false; 3707 return false;
3734 } 3708 }
3735 return true; 3709 return true;
3736 } 3710 }
3737 3711
3743 Models cost for a reduction operation, including the vector ops 3717 Models cost for a reduction operation, including the vector ops
3744 generated within the strip-mine loop, the initial definition before 3718 generated within the strip-mine loop, the initial definition before
3745 the loop, and the epilogue code that must be generated. */ 3719 the loop, and the epilogue code that must be generated. */
3746 3720
3747 static void 3721 static void
3748 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code, 3722 vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn,
3749 int ncopies) 3723 int ncopies, stmt_vector_for_cost *cost_vec)
3750 { 3724 {
3751 int prologue_cost = 0, epilogue_cost = 0; 3725 int prologue_cost = 0, epilogue_cost = 0, inside_cost;
3752 enum tree_code code; 3726 enum tree_code code;
3753 optab optab; 3727 optab optab;
3754 tree vectype; 3728 tree vectype;
3755 gimple *orig_stmt;
3756 machine_mode mode; 3729 machine_mode mode;
3757 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 3730 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3758 struct loop *loop = NULL; 3731 struct loop *loop = NULL;
3759 void *target_cost_data;
3760 3732
3761 if (loop_vinfo) 3733 if (loop_vinfo)
3762 { 3734 loop = LOOP_VINFO_LOOP (loop_vinfo);
3763 loop = LOOP_VINFO_LOOP (loop_vinfo);
3764 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3765 }
3766 else
3767 target_cost_data = BB_VINFO_TARGET_COST_DATA (STMT_VINFO_BB_VINFO (stmt_info));
3768 3735
3769 /* Condition reductions generate two reductions in the loop. */ 3736 /* Condition reductions generate two reductions in the loop. */
3770 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION) 3737 vect_reduction_type reduction_type
3738 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
3739 if (reduction_type == COND_REDUCTION)
3771 ncopies *= 2; 3740 ncopies *= 2;
3772
3773 /* Cost of reduction op inside loop. */
3774 unsigned inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt,
3775 stmt_info, 0, vect_body);
3776 3741
3777 vectype = STMT_VINFO_VECTYPE (stmt_info); 3742 vectype = STMT_VINFO_VECTYPE (stmt_info);
3778 mode = TYPE_MODE (vectype); 3743 mode = TYPE_MODE (vectype);
3779 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info); 3744 stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
3780 3745
3781 if (!orig_stmt) 3746 code = gimple_assign_rhs_code (orig_stmt_info->stmt);
3782 orig_stmt = STMT_VINFO_STMT (stmt_info); 3747
3783 3748 if (reduction_type == EXTRACT_LAST_REDUCTION
3784 code = gimple_assign_rhs_code (orig_stmt); 3749 || reduction_type == FOLD_LEFT_REDUCTION)
3785 3750 {
3786 /* Add in cost for initial definition. 3751 /* No extra instructions needed in the prologue. */
3787 For cond reduction we have four vectors: initial index, step, initial 3752 prologue_cost = 0;
3788 result of the data reduction, initial value of the index reduction. */ 3753
3789 int prologue_stmts = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) 3754 if (reduction_type == EXTRACT_LAST_REDUCTION || reduc_fn != IFN_LAST)
3790 == COND_REDUCTION ? 4 : 1; 3755 /* Count one reduction-like operation per vector. */
3791 prologue_cost += add_stmt_cost (target_cost_data, prologue_stmts, 3756 inside_cost = record_stmt_cost (cost_vec, ncopies, vec_to_scalar,
3792 scalar_to_vec, stmt_info, 0, 3757 stmt_info, 0, vect_body);
3793 vect_prologue); 3758 else
3759 {
3760 /* Use NELEMENTS extracts and NELEMENTS scalar ops. */
3761 unsigned int nelements = ncopies * vect_nunits_for_cost (vectype);
3762 inside_cost = record_stmt_cost (cost_vec, nelements,
3763 vec_to_scalar, stmt_info, 0,
3764 vect_body);
3765 inside_cost += record_stmt_cost (cost_vec, nelements,
3766 scalar_stmt, stmt_info, 0,
3767 vect_body);
3768 }
3769 }
3770 else
3771 {
3772 /* Add in cost for initial definition.
3773 For cond reduction we have four vectors: initial index, step,
3774 initial result of the data reduction, initial value of the index
3775 reduction. */
3776 int prologue_stmts = reduction_type == COND_REDUCTION ? 4 : 1;
3777 prologue_cost += record_stmt_cost (cost_vec, prologue_stmts,
3778 scalar_to_vec, stmt_info, 0,
3779 vect_prologue);
3780
3781 /* Cost of reduction op inside loop. */
3782 inside_cost = record_stmt_cost (cost_vec, ncopies, vector_stmt,
3783 stmt_info, 0, vect_body);
3784 }
3794 3785
3795 /* Determine cost of epilogue code. 3786 /* Determine cost of epilogue code.
3796 3787
3797 We have a reduction operator that will reduce the vector in one statement. 3788 We have a reduction operator that will reduce the vector in one statement.
3798 Also requires scalar extract. */ 3789 Also requires scalar extract. */
3799 3790
3800 if (!loop || !nested_in_vect_loop_p (loop, orig_stmt)) 3791 if (!loop || !nested_in_vect_loop_p (loop, orig_stmt_info))
3801 { 3792 {
3802 if (reduc_code != ERROR_MARK) 3793 if (reduc_fn != IFN_LAST)
3803 { 3794 {
3804 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION) 3795 if (reduction_type == COND_REDUCTION)
3805 { 3796 {
3806 /* An EQ stmt and an COND_EXPR stmt. */ 3797 /* An EQ stmt and an COND_EXPR stmt. */
3807 epilogue_cost += add_stmt_cost (target_cost_data, 2, 3798 epilogue_cost += record_stmt_cost (cost_vec, 2,
3808 vector_stmt, stmt_info, 0, 3799 vector_stmt, stmt_info, 0,
3809 vect_epilogue); 3800 vect_epilogue);
3810 /* Reduction of the max index and a reduction of the found 3801 /* Reduction of the max index and a reduction of the found
3811 values. */ 3802 values. */
3812 epilogue_cost += add_stmt_cost (target_cost_data, 2, 3803 epilogue_cost += record_stmt_cost (cost_vec, 2,
3813 vec_to_scalar, stmt_info, 0, 3804 vec_to_scalar, stmt_info, 0,
3814 vect_epilogue); 3805 vect_epilogue);
3815 /* A broadcast of the max value. */ 3806 /* A broadcast of the max value. */
3816 epilogue_cost += add_stmt_cost (target_cost_data, 1, 3807 epilogue_cost += record_stmt_cost (cost_vec, 1,
3817 scalar_to_vec, stmt_info, 0, 3808 scalar_to_vec, stmt_info, 0,
3818 vect_epilogue); 3809 vect_epilogue);
3819 } 3810 }
3820 else 3811 else
3821 { 3812 {
3822 epilogue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt, 3813 epilogue_cost += record_stmt_cost (cost_vec, 1, vector_stmt,
3823 stmt_info, 0, vect_epilogue); 3814 stmt_info, 0, vect_epilogue);
3824 epilogue_cost += add_stmt_cost (target_cost_data, 1, 3815 epilogue_cost += record_stmt_cost (cost_vec, 1,
3825 vec_to_scalar, stmt_info, 0, 3816 vec_to_scalar, stmt_info, 0,
3826 vect_epilogue); 3817 vect_epilogue);
3827 } 3818 }
3828 } 3819 }
3829 else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION) 3820 else if (reduction_type == COND_REDUCTION)
3830 { 3821 {
3831 unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype); 3822 unsigned estimated_nunits = vect_nunits_for_cost (vectype);
3832 /* Extraction of scalar elements. */ 3823 /* Extraction of scalar elements. */
3833 epilogue_cost += add_stmt_cost (target_cost_data, 2 * nunits, 3824 epilogue_cost += record_stmt_cost (cost_vec,
3834 vec_to_scalar, stmt_info, 0, 3825 2 * estimated_nunits,
3835 vect_epilogue); 3826 vec_to_scalar, stmt_info, 0,
3827 vect_epilogue);
3836 /* Scalar max reductions via COND_EXPR / MAX_EXPR. */ 3828 /* Scalar max reductions via COND_EXPR / MAX_EXPR. */
3837 epilogue_cost += add_stmt_cost (target_cost_data, 2 * nunits - 3, 3829 epilogue_cost += record_stmt_cost (cost_vec,
3838 scalar_stmt, stmt_info, 0, 3830 2 * estimated_nunits - 3,
3839 vect_epilogue); 3831 scalar_stmt, stmt_info, 0,
3840 } 3832 vect_epilogue);
3833 }
3834 else if (reduction_type == EXTRACT_LAST_REDUCTION
3835 || reduction_type == FOLD_LEFT_REDUCTION)
3836 /* No extra instructions need in the epilogue. */
3837 ;
3841 else 3838 else
3842 { 3839 {
3843 int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype)); 3840 int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
3844 tree bitsize = 3841 tree bitsize =
3845 TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt))); 3842 TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt_info->stmt)));
3846 int element_bitsize = tree_to_uhwi (bitsize); 3843 int element_bitsize = tree_to_uhwi (bitsize);
3847 int nelements = vec_size_in_bits / element_bitsize; 3844 int nelements = vec_size_in_bits / element_bitsize;
3848 3845
3849 if (code == COND_EXPR) 3846 if (code == COND_EXPR)
3850 code = MAX_EXPR; 3847 code = MAX_EXPR;
3857 && optab_handler (optab, mode) != CODE_FOR_nothing 3854 && optab_handler (optab, mode) != CODE_FOR_nothing
3858 && have_whole_vector_shift (mode)) 3855 && have_whole_vector_shift (mode))
3859 { 3856 {
3860 /* Final reduction via vector shifts and the reduction operator. 3857 /* Final reduction via vector shifts and the reduction operator.
3861 Also requires scalar extract. */ 3858 Also requires scalar extract. */
3862 epilogue_cost += add_stmt_cost (target_cost_data, 3859 epilogue_cost += record_stmt_cost (cost_vec,
3863 exact_log2 (nelements) * 2, 3860 exact_log2 (nelements) * 2,
3864 vector_stmt, stmt_info, 0, 3861 vector_stmt, stmt_info, 0,
3865 vect_epilogue); 3862 vect_epilogue);
3866 epilogue_cost += add_stmt_cost (target_cost_data, 1, 3863 epilogue_cost += record_stmt_cost (cost_vec, 1,
3867 vec_to_scalar, stmt_info, 0, 3864 vec_to_scalar, stmt_info, 0,
3868 vect_epilogue); 3865 vect_epilogue);
3869 } 3866 }
3870 else 3867 else
3871 /* Use extracts and reduction op for final reduction. For N 3868 /* Use extracts and reduction op for final reduction. For N
3872 elements, we have N extracts and N-1 reduction ops. */ 3869 elements, we have N extracts and N-1 reduction ops. */
3873 epilogue_cost += add_stmt_cost (target_cost_data, 3870 epilogue_cost += record_stmt_cost (cost_vec,
3874 nelements + nelements - 1, 3871 nelements + nelements - 1,
3875 vector_stmt, stmt_info, 0, 3872 vector_stmt, stmt_info, 0,
3876 vect_epilogue); 3873 vect_epilogue);
3877 } 3874 }
3878 } 3875 }
3879 3876
3880 if (dump_enabled_p ()) 3877 if (dump_enabled_p ())
3881 dump_printf (MSG_NOTE, 3878 dump_printf (MSG_NOTE,
3888 /* Function vect_model_induction_cost. 3885 /* Function vect_model_induction_cost.
3889 3886
3890 Models cost for induction operations. */ 3887 Models cost for induction operations. */
3891 3888
3892 static void 3889 static void
3893 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies) 3890 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies,
3891 stmt_vector_for_cost *cost_vec)
3894 { 3892 {
3895 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
3896 void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo);
3897 unsigned inside_cost, prologue_cost; 3893 unsigned inside_cost, prologue_cost;
3898 3894
3899 if (PURE_SLP_STMT (stmt_info)) 3895 if (PURE_SLP_STMT (stmt_info))
3900 return; 3896 return;
3901 3897
3902 /* loop cost for vec_loop. */ 3898 /* loop cost for vec_loop. */
3903 inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt, 3899 inside_cost = record_stmt_cost (cost_vec, ncopies, vector_stmt,
3904 stmt_info, 0, vect_body); 3900 stmt_info, 0, vect_body);
3905 3901
3906 /* prologue cost for vec_init and vec_step. */ 3902 /* prologue cost for vec_init and vec_step. */
3907 prologue_cost = add_stmt_cost (target_cost_data, 2, scalar_to_vec, 3903 prologue_cost = record_stmt_cost (cost_vec, 2, scalar_to_vec,
3908 stmt_info, 0, vect_prologue); 3904 stmt_info, 0, vect_prologue);
3909 3905
3910 if (dump_enabled_p ()) 3906 if (dump_enabled_p ())
3911 dump_printf_loc (MSG_NOTE, vect_location, 3907 dump_printf_loc (MSG_NOTE, vect_location,
3912 "vect_model_induction_cost: inside_cost = %d, " 3908 "vect_model_induction_cost: inside_cost = %d, "
3913 "prologue_cost = %d .\n", inside_cost, prologue_cost); 3909 "prologue_cost = %d .\n", inside_cost, prologue_cost);
3916 3912
3917 3913
3918 /* Function get_initial_def_for_reduction 3914 /* Function get_initial_def_for_reduction
3919 3915
3920 Input: 3916 Input:
3921 STMT - a stmt that performs a reduction operation in the loop. 3917 STMT_VINFO - a stmt that performs a reduction operation in the loop.
3922 INIT_VAL - the initial value of the reduction variable 3918 INIT_VAL - the initial value of the reduction variable
3923 3919
3924 Output: 3920 Output:
3925 ADJUSTMENT_DEF - a tree that holds a value to be added to the final result 3921 ADJUSTMENT_DEF - a tree that holds a value to be added to the final result
3926 of the reduction (used for adjusting the epilog - see below). 3922 of the reduction (used for adjusting the epilog - see below).
3927 Return a vector variable, initialized according to the operation that STMT 3923 Return a vector variable, initialized according to the operation that
3928 performs. This vector will be used as the initial value of the 3924 STMT_VINFO performs. This vector will be used as the initial value
3929 vector of partial results. 3925 of the vector of partial results.
3930 3926
3931 Option1 (adjust in epilog): Initialize the vector as follows: 3927 Option1 (adjust in epilog): Initialize the vector as follows:
3932 add/bit or/xor: [0,0,...,0,0] 3928 add/bit or/xor: [0,0,...,0,0]
3933 mult/bit and: [1,1,...,1,1] 3929 mult/bit and: [1,1,...,1,1]
3934 min/max/cond_expr: [init_val,init_val,..,init_val,init_val] 3930 min/max/cond_expr: [init_val,init_val,..,init_val,init_val]
3945 3941
3946 s = init_val; 3942 s = init_val;
3947 for (i=0;i<n;i++) 3943 for (i=0;i<n;i++)
3948 s = s + a[i]; 3944 s = s + a[i];
3949 3945
3950 STMT is 's = s + a[i]', and the reduction variable is 's'. 3946 STMT_VINFO is 's = s + a[i]', and the reduction variable is 's'.
3951 For a vector of 4 units, we want to return either [0,0,0,init_val], 3947 For a vector of 4 units, we want to return either [0,0,0,init_val],
3952 or [0,0,0,0] and let the caller know that it needs to adjust 3948 or [0,0,0,0] and let the caller know that it needs to adjust
3953 the result at the end by 'init_val'. 3949 the result at the end by 'init_val'.
3954 3950
3955 FORNOW, we are using the 'adjust in epilog' scheme, because this way the 3951 FORNOW, we are using the 'adjust in epilog' scheme, because this way the
3957 ADJUSTMENT_DEF is not NULL, and Option2 otherwise. 3953 ADJUSTMENT_DEF is not NULL, and Option2 otherwise.
3958 3954
3959 A cost model should help decide between these two schemes. */ 3955 A cost model should help decide between these two schemes. */
3960 3956
3961 tree 3957 tree
3962 get_initial_def_for_reduction (gimple *stmt, tree init_val, 3958 get_initial_def_for_reduction (stmt_vec_info stmt_vinfo, tree init_val,
3963 tree *adjustment_def) 3959 tree *adjustment_def)
3964 { 3960 {
3965 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt);
3966 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); 3961 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo);
3967 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 3962 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
3968 tree scalar_type = TREE_TYPE (init_val); 3963 tree scalar_type = TREE_TYPE (init_val);
3969 tree vectype = get_vectype_for_scalar_type (scalar_type); 3964 tree vectype = get_vectype_for_scalar_type (scalar_type);
3970 int nunits; 3965 enum tree_code code = gimple_assign_rhs_code (stmt_vinfo->stmt);
3971 enum tree_code code = gimple_assign_rhs_code (stmt);
3972 tree def_for_init; 3966 tree def_for_init;
3973 tree init_def; 3967 tree init_def;
3974 int i;
3975 bool nested_in_vect_loop = false;
3976 REAL_VALUE_TYPE real_init_val = dconst0; 3968 REAL_VALUE_TYPE real_init_val = dconst0;
3977 int int_init_val = 0; 3969 int int_init_val = 0;
3978 gimple *def_stmt = NULL;
3979 gimple_seq stmts = NULL; 3970 gimple_seq stmts = NULL;
3980 3971
3981 gcc_assert (vectype); 3972 gcc_assert (vectype);
3982 nunits = TYPE_VECTOR_SUBPARTS (vectype);
3983 3973
3984 gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type) 3974 gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type)
3985 || SCALAR_FLOAT_TYPE_P (scalar_type)); 3975 || SCALAR_FLOAT_TYPE_P (scalar_type));
3986 3976
3987 if (nested_in_vect_loop_p (loop, stmt)) 3977 gcc_assert (nested_in_vect_loop_p (loop, stmt_vinfo)
3988 nested_in_vect_loop = true; 3978 || loop == (gimple_bb (stmt_vinfo->stmt))->loop_father);
3989 else 3979
3990 gcc_assert (loop == (gimple_bb (stmt))->loop_father); 3980 vect_reduction_type reduction_type
3991 3981 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_vinfo);
3992 /* In case of double reduction we only create a vector variable to be put
3993 in the reduction phi node. The actual statement creation is done in
3994 vect_create_epilog_for_reduction. */
3995 if (adjustment_def && nested_in_vect_loop
3996 && TREE_CODE (init_val) == SSA_NAME
3997 && (def_stmt = SSA_NAME_DEF_STMT (init_val))
3998 && gimple_code (def_stmt) == GIMPLE_PHI
3999 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))
4000 && vinfo_for_stmt (def_stmt)
4001 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt))
4002 == vect_double_reduction_def)
4003 {
4004 *adjustment_def = NULL;
4005 return vect_create_destination_var (init_val, vectype);
4006 }
4007
4008 /* In case of a nested reduction do not use an adjustment def as
4009 that case is not supported by the epilogue generation correctly
4010 if ncopies is not one. */
4011 if (adjustment_def && nested_in_vect_loop)
4012 {
4013 *adjustment_def = NULL;
4014 return vect_get_vec_def_for_operand (init_val, stmt);
4015 }
4016 3982
4017 switch (code) 3983 switch (code)
4018 { 3984 {
4019 case WIDEN_SUM_EXPR: 3985 case WIDEN_SUM_EXPR:
4020 case DOT_PROD_EXPR: 3986 case DOT_PROD_EXPR:
4024 case BIT_IOR_EXPR: 3990 case BIT_IOR_EXPR:
4025 case BIT_XOR_EXPR: 3991 case BIT_XOR_EXPR:
4026 case MULT_EXPR: 3992 case MULT_EXPR:
4027 case BIT_AND_EXPR: 3993 case BIT_AND_EXPR:
4028 { 3994 {
4029 /* ADJUSMENT_DEF is NULL when called from 3995 /* ADJUSTMENT_DEF is NULL when called from
4030 vect_create_epilog_for_reduction to vectorize double reduction. */ 3996 vect_create_epilog_for_reduction to vectorize double reduction. */
4031 if (adjustment_def) 3997 if (adjustment_def)
4032 *adjustment_def = init_val; 3998 *adjustment_def = init_val;
4033 3999
4034 if (code == MULT_EXPR) 4000 if (code == MULT_EXPR)
4047 4013
4048 if (adjustment_def) 4014 if (adjustment_def)
4049 /* Option1: the first element is '0' or '1' as well. */ 4015 /* Option1: the first element is '0' or '1' as well. */
4050 init_def = gimple_build_vector_from_val (&stmts, vectype, 4016 init_def = gimple_build_vector_from_val (&stmts, vectype,
4051 def_for_init); 4017 def_for_init);
4018 else if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ())
4019 {
4020 /* Option2 (variable length): the first element is INIT_VAL. */
4021 init_def = gimple_build_vector_from_val (&stmts, vectype,
4022 def_for_init);
4023 init_def = gimple_build (&stmts, CFN_VEC_SHL_INSERT,
4024 vectype, init_def, init_val);
4025 }
4052 else 4026 else
4053 { 4027 {
4054 /* Option2: the first element is INIT_VAL. */ 4028 /* Option2: the first element is INIT_VAL. */
4055 auto_vec<tree, 32> elts (nunits); 4029 tree_vector_builder elts (vectype, 1, 2);
4056 elts.quick_push (init_val); 4030 elts.quick_push (init_val);
4057 for (i = 1; i < nunits; ++i) 4031 elts.quick_push (def_for_init);
4058 elts.quick_push (def_for_init); 4032 init_def = gimple_build_vector (&stmts, &elts);
4059 init_def = gimple_build_vector (&stmts, vectype, elts);
4060 } 4033 }
4061 } 4034 }
4062 break; 4035 break;
4063 4036
4064 case MIN_EXPR: 4037 case MIN_EXPR:
4066 case COND_EXPR: 4039 case COND_EXPR:
4067 { 4040 {
4068 if (adjustment_def) 4041 if (adjustment_def)
4069 { 4042 {
4070 *adjustment_def = NULL_TREE; 4043 *adjustment_def = NULL_TREE;
4071 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_vinfo) != COND_REDUCTION) 4044 if (reduction_type != COND_REDUCTION
4045 && reduction_type != EXTRACT_LAST_REDUCTION)
4072 { 4046 {
4073 init_def = vect_get_vec_def_for_operand (init_val, stmt); 4047 init_def = vect_get_vec_def_for_operand (init_val, stmt_vinfo);
4074 break; 4048 break;
4075 } 4049 }
4076 } 4050 }
4077 init_val = gimple_convert (&stmts, TREE_TYPE (vectype), init_val); 4051 init_val = gimple_convert (&stmts, TREE_TYPE (vectype), init_val);
4078 init_def = gimple_build_vector_from_val (&stmts, vectype, init_val); 4052 init_def = gimple_build_vector_from_val (&stmts, vectype, init_val);
4087 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); 4061 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
4088 return init_def; 4062 return init_def;
4089 } 4063 }
4090 4064
4091 /* Get at the initial defs for the reduction PHIs in SLP_NODE. 4065 /* Get at the initial defs for the reduction PHIs in SLP_NODE.
4092 NUMBER_OF_VECTORS is the number of vector defs to create. */ 4066 NUMBER_OF_VECTORS is the number of vector defs to create.
4067 If NEUTRAL_OP is nonnull, introducing extra elements of that
4068 value will not change the result. */
4093 4069
4094 static void 4070 static void
4095 get_initial_defs_for_reduction (slp_tree slp_node, 4071 get_initial_defs_for_reduction (slp_tree slp_node,
4096 vec<tree> *vec_oprnds, 4072 vec<tree> *vec_oprnds,
4097 unsigned int number_of_vectors, 4073 unsigned int number_of_vectors,
4098 enum tree_code code, bool reduc_chain) 4074 bool reduc_chain, tree neutral_op)
4099 { 4075 {
4100 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node); 4076 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node);
4101 gimple *stmt = stmts[0]; 4077 stmt_vec_info stmt_vinfo = stmts[0];
4102 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); 4078 unsigned HOST_WIDE_INT nunits;
4103 unsigned nunits;
4104 unsigned j, number_of_places_left_in_vector; 4079 unsigned j, number_of_places_left_in_vector;
4105 tree vector_type, scalar_type; 4080 tree vector_type;
4106 tree vop; 4081 tree vop;
4107 int group_size = stmts.length (); 4082 int group_size = stmts.length ();
4108 unsigned int vec_num, i; 4083 unsigned int vec_num, i;
4109 unsigned number_of_copies = 1; 4084 unsigned number_of_copies = 1;
4110 vec<tree> voprnds; 4085 vec<tree> voprnds;
4111 voprnds.create (number_of_vectors); 4086 voprnds.create (number_of_vectors);
4112 tree neutral_op = NULL;
4113 struct loop *loop; 4087 struct loop *loop;
4088 auto_vec<tree, 16> permute_results;
4114 4089
4115 vector_type = STMT_VINFO_VECTYPE (stmt_vinfo); 4090 vector_type = STMT_VINFO_VECTYPE (stmt_vinfo);
4116 scalar_type = TREE_TYPE (vector_type);
4117 nunits = TYPE_VECTOR_SUBPARTS (vector_type);
4118 4091
4119 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def); 4092 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def);
4120 4093
4121 loop = (gimple_bb (stmt))->loop_father; 4094 loop = (gimple_bb (stmt_vinfo->stmt))->loop_father;
4122 gcc_assert (loop); 4095 gcc_assert (loop);
4123 edge pe = loop_preheader_edge (loop); 4096 edge pe = loop_preheader_edge (loop);
4124 4097
4125 /* op is the reduction operand of the first stmt already. */ 4098 gcc_assert (!reduc_chain || neutral_op);
4126 /* For additional copies (see the explanation of NUMBER_OF_COPIES below)
4127 we need either neutral operands or the original operands. See
4128 get_initial_def_for_reduction() for details. */
4129 switch (code)
4130 {
4131 case WIDEN_SUM_EXPR:
4132 case DOT_PROD_EXPR:
4133 case SAD_EXPR:
4134 case PLUS_EXPR:
4135 case MINUS_EXPR:
4136 case BIT_IOR_EXPR:
4137 case BIT_XOR_EXPR:
4138 neutral_op = build_zero_cst (scalar_type);
4139 break;
4140
4141 case MULT_EXPR:
4142 neutral_op = build_one_cst (scalar_type);
4143 break;
4144
4145 case BIT_AND_EXPR:
4146 neutral_op = build_all_ones_cst (scalar_type);
4147 break;
4148
4149 /* For MIN/MAX we don't have an easy neutral operand but
4150 the initial values can be used fine here. Only for
4151 a reduction chain we have to force a neutral element. */
4152 case MAX_EXPR:
4153 case MIN_EXPR:
4154 if (! reduc_chain)
4155 neutral_op = NULL;
4156 else
4157 neutral_op = PHI_ARG_DEF_FROM_EDGE (stmt, pe);
4158 break;
4159
4160 default:
4161 gcc_assert (! reduc_chain);
4162 neutral_op = NULL;
4163 }
4164 4099
4165 /* NUMBER_OF_COPIES is the number of times we need to use the same values in 4100 /* NUMBER_OF_COPIES is the number of times we need to use the same values in
4166 created vectors. It is greater than 1 if unrolling is performed. 4101 created vectors. It is greater than 1 if unrolling is performed.
4167 4102
4168 For example, we have two scalar operands, s1 and s2 (e.g., group of 4103 For example, we have two scalar operands, s1 and s2 (e.g., group of
4169 strided accesses of size two), while NUNITS is four (i.e., four scalars 4104 strided accesses of size two), while NUNITS is four (i.e., four scalars
4170 of this type can be packed in a vector). The output vector will contain 4105 of this type can be packed in a vector). The output vector will contain
4171 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES 4106 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES
4172 will be 2). 4107 will be 2).
4173 4108
4174 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors 4109 If REDUC_GROUP_SIZE > NUNITS, the scalars will be split into several
4175 containing the operands. 4110 vectors containing the operands.
4176 4111
4177 For example, NUNITS is four as before, and the group size is 8 4112 For example, NUNITS is four as before, and the group size is 8
4178 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and 4113 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and
4179 {s5, s6, s7, s8}. */ 4114 {s5, s6, s7, s8}. */
4180 4115
4116 if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits))
4117 nunits = group_size;
4118
4181 number_of_copies = nunits * number_of_vectors / group_size; 4119 number_of_copies = nunits * number_of_vectors / group_size;
4182 4120
4183 number_of_places_left_in_vector = nunits; 4121 number_of_places_left_in_vector = nunits;
4184 auto_vec<tree, 32> elts (nunits); 4122 bool constant_p = true;
4123 tree_vector_builder elts (vector_type, nunits, 1);
4185 elts.quick_grow (nunits); 4124 elts.quick_grow (nunits);
4186 for (j = 0; j < number_of_copies; j++) 4125 for (j = 0; j < number_of_copies; j++)
4187 { 4126 {
4188 for (i = group_size - 1; stmts.iterate (i, &stmt); i--) 4127 for (i = group_size - 1; stmts.iterate (i, &stmt_vinfo); i--)
4189 { 4128 {
4190 tree op; 4129 tree op;
4191 /* Get the def before the loop. In reduction chain we have only 4130 /* Get the def before the loop. In reduction chain we have only
4192 one initial value. */ 4131 one initial value. */
4193 if ((j != (number_of_copies - 1) 4132 if ((j != (number_of_copies - 1)
4194 || (reduc_chain && i != 0)) 4133 || (reduc_chain && i != 0))
4195 && neutral_op) 4134 && neutral_op)
4196 op = neutral_op; 4135 op = neutral_op;
4197 else 4136 else
4198 op = PHI_ARG_DEF_FROM_EDGE (stmt, pe); 4137 op = PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, pe);
4199 4138
4200 /* Create 'vect_ = {op0,op1,...,opn}'. */ 4139 /* Create 'vect_ = {op0,op1,...,opn}'. */
4201 number_of_places_left_in_vector--; 4140 number_of_places_left_in_vector--;
4202 elts[number_of_places_left_in_vector] = op; 4141 elts[number_of_places_left_in_vector] = op;
4142 if (!CONSTANT_CLASS_P (op))
4143 constant_p = false;
4203 4144
4204 if (number_of_places_left_in_vector == 0) 4145 if (number_of_places_left_in_vector == 0)
4205 { 4146 {
4206 gimple_seq ctor_seq = NULL; 4147 gimple_seq ctor_seq = NULL;
4207 tree init = gimple_build_vector (&ctor_seq, vector_type, elts); 4148 tree init;
4149 if (constant_p && !neutral_op
4150 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits)
4151 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits))
4152 /* Build the vector directly from ELTS. */
4153 init = gimple_build_vector (&ctor_seq, &elts);
4154 else if (neutral_op)
4155 {
4156 /* Build a vector of the neutral value and shift the
4157 other elements into place. */
4158 init = gimple_build_vector_from_val (&ctor_seq, vector_type,
4159 neutral_op);
4160 int k = nunits;
4161 while (k > 0 && elts[k - 1] == neutral_op)
4162 k -= 1;
4163 while (k > 0)
4164 {
4165 k -= 1;
4166 init = gimple_build (&ctor_seq, CFN_VEC_SHL_INSERT,
4167 vector_type, init, elts[k]);
4168 }
4169 }
4170 else
4171 {
4172 /* First time round, duplicate ELTS to fill the
4173 required number of vectors, then cherry pick the
4174 appropriate result for each iteration. */
4175 if (vec_oprnds->is_empty ())
4176 duplicate_and_interleave (&ctor_seq, vector_type, elts,
4177 number_of_vectors,
4178 permute_results);
4179 init = permute_results[number_of_vectors - j - 1];
4180 }
4208 if (ctor_seq != NULL) 4181 if (ctor_seq != NULL)
4209 gsi_insert_seq_on_edge_immediate (pe, ctor_seq); 4182 gsi_insert_seq_on_edge_immediate (pe, ctor_seq);
4210 voprnds.quick_push (init); 4183 voprnds.quick_push (init);
4211 4184
4212 number_of_places_left_in_vector = nunits; 4185 number_of_places_left_in_vector = nunits;
4186 elts.new_vector (vector_type, nunits, 1);
4187 elts.quick_grow (nunits);
4188 constant_p = true;
4213 } 4189 }
4214 } 4190 }
4215 } 4191 }
4216 4192
4217 /* Since the vectors are created in the reverse order, we should invert 4193 /* Since the vectors are created in the reverse order, we should invert
4258 Create code at the loop-epilog to finalize the result of a reduction 4234 Create code at the loop-epilog to finalize the result of a reduction
4259 computation. 4235 computation.
4260 4236
4261 VECT_DEFS is list of vector of partial results, i.e., the lhs's of vector 4237 VECT_DEFS is list of vector of partial results, i.e., the lhs's of vector
4262 reduction statements. 4238 reduction statements.
4263 STMT is the scalar reduction stmt that is being vectorized. 4239 STMT_INFO is the scalar reduction stmt that is being vectorized.
4264 NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the 4240 NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the
4265 number of elements that we can fit in a vectype (nunits). In this case 4241 number of elements that we can fit in a vectype (nunits). In this case
4266 we have to generate more than one vector stmt - i.e - we need to "unroll" 4242 we have to generate more than one vector stmt - i.e - we need to "unroll"
4267 the vector stmt by a factor VF/nunits. For more details see documentation 4243 the vector stmt by a factor VF/nunits. For more details see documentation
4268 in vectorizable_operation. 4244 in vectorizable_operation.
4269 REDUC_CODE is the tree-code for the epilog reduction. 4245 REDUC_FN is the internal function for the epilog reduction.
4270 REDUCTION_PHIS is a list of the phi-nodes that carry the reduction 4246 REDUCTION_PHIS is a list of the phi-nodes that carry the reduction
4271 computation. 4247 computation.
4272 REDUC_INDEX is the index of the operand in the right hand side of the 4248 REDUC_INDEX is the index of the operand in the right hand side of the
4273 statement that is defined by REDUCTION_PHI. 4249 statement that is defined by REDUCTION_PHI.
4274 DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled. 4250 DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled.
4275 SLP_NODE is an SLP node containing a group of reduction statements. The 4251 SLP_NODE is an SLP node containing a group of reduction statements. The
4276 first one in this group is STMT. 4252 first one in this group is STMT_INFO.
4253 INDUC_VAL is for INTEGER_INDUC_COND_REDUCTION the value to use for the case
4254 when the COND_EXPR is never true in the loop. For MAX_EXPR, it needs to
4255 be smaller than any value of the IV in the loop, for MIN_EXPR larger than
4256 any value of the IV in the loop.
4257 INDUC_CODE is the code for epilog reduction if INTEGER_INDUC_COND_REDUCTION.
4258 NEUTRAL_OP is the value given by neutral_op_for_slp_reduction; it is
4259 null if this is not an SLP reduction
4277 4260
4278 This function: 4261 This function:
4279 1. Creates the reduction def-use cycles: sets the arguments for 4262 1. Creates the reduction def-use cycles: sets the arguments for
4280 REDUCTION_PHIS: 4263 REDUCTION_PHIS:
4281 The loop-entry argument is the vectorized initial-value of the reduction. 4264 The loop-entry argument is the vectorized initial-value of the reduction.
4282 The loop-latch argument is taken from VECT_DEFS - the vector of partial 4265 The loop-latch argument is taken from VECT_DEFS - the vector of partial
4283 sums. 4266 sums.
4284 2. "Reduces" each vector of partial results VECT_DEFS into a single result, 4267 2. "Reduces" each vector of partial results VECT_DEFS into a single result,
4285 by applying the operation specified by REDUC_CODE if available, or by 4268 by calling the function specified by REDUC_FN if available, or by
4286 other means (whole-vector shifts or a scalar loop). 4269 other means (whole-vector shifts or a scalar loop).
4287 The function also creates a new phi node at the loop exit to preserve 4270 The function also creates a new phi node at the loop exit to preserve
4288 loop-closed form, as illustrated below. 4271 loop-closed form, as illustrated below.
4289 4272
4290 The flow at the entry to this function: 4273 The flow at the entry to this function:
4291 4274
4292 loop: 4275 loop:
4293 vec_def = phi <null, null> # REDUCTION_PHI 4276 vec_def = phi <null, null> # REDUCTION_PHI
4294 VECT_DEF = vector_stmt # vectorized form of STMT 4277 VECT_DEF = vector_stmt # vectorized form of STMT_INFO
4295 s_loop = scalar_stmt # (scalar) STMT 4278 s_loop = scalar_stmt # (scalar) STMT_INFO
4296 loop_exit: 4279 loop_exit:
4297 s_out0 = phi <s_loop> # (scalar) EXIT_PHI 4280 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
4298 use <s_out0> 4281 use <s_out0>
4299 use <s_out0> 4282 use <s_out0>
4300 4283
4301 The above is transformed by this function into: 4284 The above is transformed by this function into:
4302 4285
4303 loop: 4286 loop:
4304 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI 4287 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI
4305 VECT_DEF = vector_stmt # vectorized form of STMT 4288 VECT_DEF = vector_stmt # vectorized form of STMT_INFO
4306 s_loop = scalar_stmt # (scalar) STMT 4289 s_loop = scalar_stmt # (scalar) STMT_INFO
4307 loop_exit: 4290 loop_exit:
4308 s_out0 = phi <s_loop> # (scalar) EXIT_PHI 4291 s_out0 = phi <s_loop> # (scalar) EXIT_PHI
4309 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI 4292 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI
4310 v_out2 = reduce <v_out1> 4293 v_out2 = reduce <v_out1>
4311 s_out3 = extract_field <v_out2, 0> 4294 s_out3 = extract_field <v_out2, 0>
4313 use <s_out4> 4296 use <s_out4>
4314 use <s_out4> 4297 use <s_out4>
4315 */ 4298 */
4316 4299
4317 static void 4300 static void
4318 vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt, 4301 vect_create_epilog_for_reduction (vec<tree> vect_defs,
4302 stmt_vec_info stmt_info,
4319 gimple *reduc_def_stmt, 4303 gimple *reduc_def_stmt,
4320 int ncopies, enum tree_code reduc_code, 4304 int ncopies, internal_fn reduc_fn,
4321 vec<gimple *> reduction_phis, 4305 vec<stmt_vec_info> reduction_phis,
4322 bool double_reduc, 4306 bool double_reduc,
4323 slp_tree slp_node, 4307 slp_tree slp_node,
4324 slp_instance slp_node_instance) 4308 slp_instance slp_node_instance,
4309 tree induc_val, enum tree_code induc_code,
4310 tree neutral_op)
4325 { 4311 {
4326 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4327 stmt_vec_info prev_phi_info; 4312 stmt_vec_info prev_phi_info;
4328 tree vectype; 4313 tree vectype;
4329 machine_mode mode; 4314 machine_mode mode;
4330 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 4315 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
4331 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL; 4316 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL;
4332 basic_block exit_bb; 4317 basic_block exit_bb;
4333 tree scalar_dest; 4318 tree scalar_dest;
4334 tree scalar_type; 4319 tree scalar_type;
4335 gimple *new_phi = NULL, *phi; 4320 gimple *new_phi = NULL, *phi;
4321 stmt_vec_info phi_info;
4336 gimple_stmt_iterator exit_gsi; 4322 gimple_stmt_iterator exit_gsi;
4337 tree vec_dest; 4323 tree vec_dest;
4338 tree new_temp = NULL_TREE, new_dest, new_name, new_scalar_dest; 4324 tree new_temp = NULL_TREE, new_dest, new_name, new_scalar_dest;
4339 gimple *epilog_stmt = NULL; 4325 gimple *epilog_stmt = NULL;
4340 enum tree_code code = gimple_assign_rhs_code (stmt); 4326 enum tree_code code = gimple_assign_rhs_code (stmt_info->stmt);
4341 gimple *exit_phi; 4327 gimple *exit_phi;
4342 tree bitsize; 4328 tree bitsize;
4343 tree adjustment_def = NULL; 4329 tree adjustment_def = NULL;
4344 tree vec_initial_def = NULL; 4330 tree vec_initial_def = NULL;
4345 tree expr, def, initial_def = NULL; 4331 tree expr, def, initial_def = NULL;
4346 tree orig_name, scalar_result; 4332 tree orig_name, scalar_result;
4347 imm_use_iterator imm_iter, phi_imm_iter; 4333 imm_use_iterator imm_iter, phi_imm_iter;
4348 use_operand_p use_p, phi_use_p; 4334 use_operand_p use_p, phi_use_p;
4349 gimple *use_stmt, *orig_stmt, *reduction_phi = NULL; 4335 gimple *use_stmt;
4336 stmt_vec_info reduction_phi_info = NULL;
4350 bool nested_in_vect_loop = false; 4337 bool nested_in_vect_loop = false;
4351 auto_vec<gimple *> new_phis; 4338 auto_vec<gimple *> new_phis;
4352 auto_vec<gimple *> inner_phis; 4339 auto_vec<stmt_vec_info> inner_phis;
4353 enum vect_def_type dt = vect_unknown_def_type;
4354 int j, i; 4340 int j, i;
4355 auto_vec<tree> scalar_results; 4341 auto_vec<tree> scalar_results;
4356 unsigned int group_size = 1, k, ratio; 4342 unsigned int group_size = 1, k, ratio;
4357 auto_vec<tree> vec_initial_defs; 4343 auto_vec<tree> vec_initial_defs;
4358 auto_vec<gimple *> phis; 4344 auto_vec<gimple *> phis;
4359 bool slp_reduc = false; 4345 bool slp_reduc = false;
4346 bool direct_slp_reduc;
4360 tree new_phi_result; 4347 tree new_phi_result;
4361 gimple *inner_phi = NULL; 4348 stmt_vec_info inner_phi = NULL;
4362 tree induction_index = NULL_TREE; 4349 tree induction_index = NULL_TREE;
4363 4350
4364 if (slp_node) 4351 if (slp_node)
4365 group_size = SLP_TREE_SCALAR_STMTS (slp_node).length (); 4352 group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
4366 4353
4367 if (nested_in_vect_loop_p (loop, stmt)) 4354 if (nested_in_vect_loop_p (loop, stmt_info))
4368 { 4355 {
4369 outer_loop = loop; 4356 outer_loop = loop;
4370 loop = loop->inner; 4357 loop = loop->inner;
4371 nested_in_vect_loop = true; 4358 nested_in_vect_loop = true;
4372 gcc_assert (!slp_node); 4359 gcc_assert (!slp_node);
4398 if (slp_node) 4385 if (slp_node)
4399 { 4386 {
4400 unsigned vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); 4387 unsigned vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
4401 vec_initial_defs.reserve (vec_num); 4388 vec_initial_defs.reserve (vec_num);
4402 get_initial_defs_for_reduction (slp_node_instance->reduc_phis, 4389 get_initial_defs_for_reduction (slp_node_instance->reduc_phis,
4403 &vec_initial_defs, vec_num, code, 4390 &vec_initial_defs, vec_num,
4404 GROUP_FIRST_ELEMENT (stmt_info)); 4391 REDUC_GROUP_FIRST_ELEMENT (stmt_info),
4392 neutral_op);
4405 } 4393 }
4406 else 4394 else
4407 { 4395 {
4408 /* Get at the scalar def before the loop, that defines the initial value 4396 /* Get at the scalar def before the loop, that defines the initial value
4409 of the reduction variable. */ 4397 of the reduction variable. */
4410 gimple *def_stmt;
4411 initial_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt, 4398 initial_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt,
4412 loop_preheader_edge (loop)); 4399 loop_preheader_edge (loop));
4413 vect_is_simple_use (initial_def, loop_vinfo, &def_stmt, &initial_def_dt); 4400 /* Optimize: if initial_def is for REDUC_MAX smaller than the base
4414 vec_initial_def = get_initial_def_for_reduction (stmt, initial_def, 4401 and we can't use zero for induc_val, use initial_def. Similarly
4415 &adjustment_def); 4402 for REDUC_MIN and initial_def larger than the base. */
4403 if (TREE_CODE (initial_def) == INTEGER_CST
4404 && (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
4405 == INTEGER_INDUC_COND_REDUCTION)
4406 && !integer_zerop (induc_val)
4407 && ((induc_code == MAX_EXPR
4408 && tree_int_cst_lt (initial_def, induc_val))
4409 || (induc_code == MIN_EXPR
4410 && tree_int_cst_lt (induc_val, initial_def))))
4411 induc_val = initial_def;
4412
4413 if (double_reduc)
4414 /* In case of double reduction we only create a vector variable
4415 to be put in the reduction phi node. The actual statement
4416 creation is done later in this function. */
4417 vec_initial_def = vect_create_destination_var (initial_def, vectype);
4418 else if (nested_in_vect_loop)
4419 {
4420 /* Do not use an adjustment def as that case is not supported
4421 correctly if ncopies is not one. */
4422 vect_is_simple_use (initial_def, loop_vinfo, &initial_def_dt);
4423 vec_initial_def = vect_get_vec_def_for_operand (initial_def,
4424 stmt_info);
4425 }
4426 else
4427 vec_initial_def
4428 = get_initial_def_for_reduction (stmt_info, initial_def,
4429 &adjustment_def);
4416 vec_initial_defs.create (1); 4430 vec_initial_defs.create (1);
4417 vec_initial_defs.quick_push (vec_initial_def); 4431 vec_initial_defs.quick_push (vec_initial_def);
4418 } 4432 }
4419 4433
4420 /* Set phi nodes arguments. */ 4434 /* Set phi nodes arguments. */
4421 FOR_EACH_VEC_ELT (reduction_phis, i, phi) 4435 FOR_EACH_VEC_ELT (reduction_phis, i, phi_info)
4422 { 4436 {
4423 tree vec_init_def = vec_initial_defs[i]; 4437 tree vec_init_def = vec_initial_defs[i];
4424 tree def = vect_defs[i]; 4438 tree def = vect_defs[i];
4425 for (j = 0; j < ncopies; j++) 4439 for (j = 0; j < ncopies; j++)
4426 { 4440 {
4427 if (j != 0) 4441 if (j != 0)
4428 { 4442 {
4429 phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)); 4443 phi_info = STMT_VINFO_RELATED_STMT (phi_info);
4430 if (nested_in_vect_loop) 4444 if (nested_in_vect_loop)
4431 vec_init_def 4445 vec_init_def
4432 = vect_get_vec_def_for_stmt_copy (initial_def_dt, 4446 = vect_get_vec_def_for_stmt_copy (loop_vinfo, vec_init_def);
4433 vec_init_def);
4434 } 4447 }
4435 4448
4436 /* Set the loop-entry arg of the reduction-phi. */ 4449 /* Set the loop-entry arg of the reduction-phi. */
4437 4450
4451 gphi *phi = as_a <gphi *> (phi_info->stmt);
4438 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) 4452 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
4439 == INTEGER_INDUC_COND_REDUCTION) 4453 == INTEGER_INDUC_COND_REDUCTION)
4440 { 4454 {
4441 /* Initialise the reduction phi to zero. This prevents initial 4455 /* Initialise the reduction phi to zero. This prevents initial
4442 values of non-zero interferring with the reduction op. */ 4456 values of non-zero interferring with the reduction op. */
4443 gcc_assert (ncopies == 1); 4457 gcc_assert (ncopies == 1);
4444 gcc_assert (i == 0); 4458 gcc_assert (i == 0);
4445 4459
4446 tree vec_init_def_type = TREE_TYPE (vec_init_def); 4460 tree vec_init_def_type = TREE_TYPE (vec_init_def);
4447 tree zero_vec = build_zero_cst (vec_init_def_type); 4461 tree induc_val_vec
4448 4462 = build_vector_from_val (vec_init_def_type, induc_val);
4449 add_phi_arg (as_a <gphi *> (phi), zero_vec, 4463
4450 loop_preheader_edge (loop), UNKNOWN_LOCATION); 4464 add_phi_arg (phi, induc_val_vec, loop_preheader_edge (loop),
4465 UNKNOWN_LOCATION);
4451 } 4466 }
4452 else 4467 else
4453 add_phi_arg (as_a <gphi *> (phi), vec_init_def, 4468 add_phi_arg (phi, vec_init_def, loop_preheader_edge (loop),
4454 loop_preheader_edge (loop), UNKNOWN_LOCATION); 4469 UNKNOWN_LOCATION);
4455 4470
4456 /* Set the loop-latch arg for the reduction-phi. */ 4471 /* Set the loop-latch arg for the reduction-phi. */
4457 if (j > 0) 4472 if (j > 0)
4458 def = vect_get_vec_def_for_stmt_copy (vect_unknown_def_type, def); 4473 def = vect_get_vec_def_for_stmt_copy (loop_vinfo, def);
4459 4474
4460 add_phi_arg (as_a <gphi *> (phi), def, loop_latch_edge (loop), 4475 add_phi_arg (phi, def, loop_latch_edge (loop), UNKNOWN_LOCATION);
4461 UNKNOWN_LOCATION);
4462 4476
4463 if (dump_enabled_p ()) 4477 if (dump_enabled_p ())
4464 { 4478 dump_printf_loc (MSG_NOTE, vect_location,
4465 dump_printf_loc (MSG_NOTE, vect_location, 4479 "transform reduction: created def-use cycle: %G%G",
4466 "transform reduction: created def-use cycle: "); 4480 phi, SSA_NAME_DEF_STMT (def));
4467 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
4468 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (def), 0);
4469 }
4470 } 4481 }
4471 } 4482 }
4472 4483
4473 /* For cond reductions we want to create a new vector (INDEX_COND_EXPR) 4484 /* For cond reductions we want to create a new vector (INDEX_COND_EXPR)
4474 which is updated with the current index of the loop for every match of 4485 which is updated with the current index of the loop for every match of
4478 indexes. If there are no matches at all then the vector will be all 4489 indexes. If there are no matches at all then the vector will be all
4479 zeroes. */ 4490 zeroes. */
4480 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION) 4491 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION)
4481 { 4492 {
4482 tree indx_before_incr, indx_after_incr; 4493 tree indx_before_incr, indx_after_incr;
4483 int nunits_out = TYPE_VECTOR_SUBPARTS (vectype); 4494 poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype);
4484 int k; 4495
4485 4496 gimple *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info)->stmt;
4486 gimple *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info);
4487 gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR); 4497 gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR);
4488 4498
4489 int scalar_precision 4499 int scalar_precision
4490 = GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (vectype))); 4500 = GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (vectype)));
4491 tree cr_index_scalar_type = make_unsigned_type (scalar_precision); 4501 tree cr_index_scalar_type = make_unsigned_type (scalar_precision);
4495 /* First we create a simple vector induction variable which starts 4505 /* First we create a simple vector induction variable which starts
4496 with the values {1,2,3,...} (SERIES_VECT) and increments by the 4506 with the values {1,2,3,...} (SERIES_VECT) and increments by the
4497 vector size (STEP). */ 4507 vector size (STEP). */
4498 4508
4499 /* Create a {1,2,3,...} vector. */ 4509 /* Create a {1,2,3,...} vector. */
4500 auto_vec<tree, 32> vtemp (nunits_out); 4510 tree series_vect = build_index_vector (cr_index_vector_type, 1, 1);
4501 for (k = 0; k < nunits_out; ++k)
4502 vtemp.quick_push (build_int_cst (cr_index_scalar_type, k + 1));
4503 tree series_vect = build_vector (cr_index_vector_type, vtemp);
4504 4511
4505 /* Create a vector of the step value. */ 4512 /* Create a vector of the step value. */
4506 tree step = build_int_cst (cr_index_scalar_type, nunits_out); 4513 tree step = build_int_cst (cr_index_scalar_type, nunits_out);
4507 tree vec_step = build_vector_from_val (cr_index_vector_type, step); 4514 tree vec_step = build_vector_from_val (cr_index_vector_type, step);
4508 4515
4521 tree vec_zero = build_vector_from_val (cr_index_vector_type, zero); 4528 tree vec_zero = build_vector_from_val (cr_index_vector_type, zero);
4522 4529
4523 /* Create a vector phi node. */ 4530 /* Create a vector phi node. */
4524 tree new_phi_tree = make_ssa_name (cr_index_vector_type); 4531 tree new_phi_tree = make_ssa_name (cr_index_vector_type);
4525 new_phi = create_phi_node (new_phi_tree, loop->header); 4532 new_phi = create_phi_node (new_phi_tree, loop->header);
4526 set_vinfo_for_stmt (new_phi, 4533 loop_vinfo->add_stmt (new_phi);
4527 new_stmt_vec_info (new_phi, loop_vinfo));
4528 add_phi_arg (as_a <gphi *> (new_phi), vec_zero, 4534 add_phi_arg (as_a <gphi *> (new_phi), vec_zero,
4529 loop_preheader_edge (loop), UNKNOWN_LOCATION); 4535 loop_preheader_edge (loop), UNKNOWN_LOCATION);
4530 4536
4531 /* Now take the condition from the loops original cond_expr 4537 /* Now take the condition from the loops original cond_expr
4532 (VEC_STMT) and produce a new cond_expr (INDEX_COND_EXPR) which for 4538 (VEC_STMT) and produce a new cond_expr (INDEX_COND_EXPR) which for
4547 new_phi_tree); 4553 new_phi_tree);
4548 induction_index = make_ssa_name (cr_index_vector_type); 4554 induction_index = make_ssa_name (cr_index_vector_type);
4549 gimple *index_condition = gimple_build_assign (induction_index, 4555 gimple *index_condition = gimple_build_assign (induction_index,
4550 index_cond_expr); 4556 index_cond_expr);
4551 gsi_insert_before (&incr_gsi, index_condition, GSI_SAME_STMT); 4557 gsi_insert_before (&incr_gsi, index_condition, GSI_SAME_STMT);
4552 stmt_vec_info index_vec_info = new_stmt_vec_info (index_condition, 4558 stmt_vec_info index_vec_info = loop_vinfo->add_stmt (index_condition);
4553 loop_vinfo);
4554 STMT_VINFO_VECTYPE (index_vec_info) = cr_index_vector_type; 4559 STMT_VINFO_VECTYPE (index_vec_info) = cr_index_vector_type;
4555 set_vinfo_for_stmt (index_condition, index_vec_info);
4556 4560
4557 /* Update the phi with the vec cond. */ 4561 /* Update the phi with the vec cond. */
4558 add_phi_arg (as_a <gphi *> (new_phi), induction_index, 4562 add_phi_arg (as_a <gphi *> (new_phi), induction_index,
4559 loop_latch_edge (loop), UNKNOWN_LOCATION); 4563 loop_latch_edge (loop), UNKNOWN_LOCATION);
4560 } 4564 }
4567 step 1: compute the scalar result in a vector (v_out2) 4571 step 1: compute the scalar result in a vector (v_out2)
4568 step 2: extract the scalar result (s_out3) from the vector (v_out2) 4572 step 2: extract the scalar result (s_out3) from the vector (v_out2)
4569 step 3: adjust the scalar result (s_out3) if needed. 4573 step 3: adjust the scalar result (s_out3) if needed.
4570 4574
4571 Step 1 can be accomplished using one the following three schemes: 4575 Step 1 can be accomplished using one the following three schemes:
4572 (scheme 1) using reduc_code, if available. 4576 (scheme 1) using reduc_fn, if available.
4573 (scheme 2) using whole-vector shifts, if available. 4577 (scheme 2) using whole-vector shifts, if available.
4574 (scheme 3) using a scalar loop. In this case steps 1+2 above are 4578 (scheme 3) using a scalar loop. In this case steps 1+2 above are
4575 combined. 4579 combined.
4576 4580
4577 The overall epilog code looks like this: 4581 The overall epilog code looks like this:
4597 { 4601 {
4598 for (j = 0; j < ncopies; j++) 4602 for (j = 0; j < ncopies; j++)
4599 { 4603 {
4600 tree new_def = copy_ssa_name (def); 4604 tree new_def = copy_ssa_name (def);
4601 phi = create_phi_node (new_def, exit_bb); 4605 phi = create_phi_node (new_def, exit_bb);
4602 set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, loop_vinfo)); 4606 stmt_vec_info phi_info = loop_vinfo->add_stmt (phi);
4603 if (j == 0) 4607 if (j == 0)
4604 new_phis.quick_push (phi); 4608 new_phis.quick_push (phi);
4605 else 4609 else
4606 { 4610 {
4607 def = vect_get_vec_def_for_stmt_copy (dt, def); 4611 def = vect_get_vec_def_for_stmt_copy (loop_vinfo, def);
4608 STMT_VINFO_RELATED_STMT (prev_phi_info) = phi; 4612 STMT_VINFO_RELATED_STMT (prev_phi_info) = phi_info;
4609 } 4613 }
4610 4614
4611 SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def); 4615 SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def);
4612 prev_phi_info = vinfo_for_stmt (phi); 4616 prev_phi_info = phi_info;
4613 } 4617 }
4614 } 4618 }
4615 4619
4616 /* The epilogue is created for the outer-loop, i.e., for the loop being 4620 /* The epilogue is created for the outer-loop, i.e., for the loop being
4617 vectorized. Create exit phis for the outer loop. */ 4621 vectorized. Create exit phis for the outer loop. */
4620 loop = outer_loop; 4624 loop = outer_loop;
4621 exit_bb = single_exit (loop)->dest; 4625 exit_bb = single_exit (loop)->dest;
4622 inner_phis.create (vect_defs.length ()); 4626 inner_phis.create (vect_defs.length ());
4623 FOR_EACH_VEC_ELT (new_phis, i, phi) 4627 FOR_EACH_VEC_ELT (new_phis, i, phi)
4624 { 4628 {
4629 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (phi);
4625 tree new_result = copy_ssa_name (PHI_RESULT (phi)); 4630 tree new_result = copy_ssa_name (PHI_RESULT (phi));
4626 gphi *outer_phi = create_phi_node (new_result, exit_bb); 4631 gphi *outer_phi = create_phi_node (new_result, exit_bb);
4627 SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx, 4632 SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
4628 PHI_RESULT (phi)); 4633 PHI_RESULT (phi));
4629 set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi, 4634 prev_phi_info = loop_vinfo->add_stmt (outer_phi);
4630 loop_vinfo)); 4635 inner_phis.quick_push (phi_info);
4631 inner_phis.quick_push (phi);
4632 new_phis[i] = outer_phi; 4636 new_phis[i] = outer_phi;
4633 prev_phi_info = vinfo_for_stmt (outer_phi); 4637 while (STMT_VINFO_RELATED_STMT (phi_info))
4634 while (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)))
4635 { 4638 {
4636 phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)); 4639 phi_info = STMT_VINFO_RELATED_STMT (phi_info);
4637 new_result = copy_ssa_name (PHI_RESULT (phi)); 4640 new_result = copy_ssa_name (PHI_RESULT (phi_info->stmt));
4638 outer_phi = create_phi_node (new_result, exit_bb); 4641 outer_phi = create_phi_node (new_result, exit_bb);
4639 SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx, 4642 SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx,
4640 PHI_RESULT (phi)); 4643 PHI_RESULT (phi_info->stmt));
4641 set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi, 4644 stmt_vec_info outer_phi_info = loop_vinfo->add_stmt (outer_phi);
4642 loop_vinfo)); 4645 STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi_info;
4643 STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi; 4646 prev_phi_info = outer_phi_info;
4644 prev_phi_info = vinfo_for_stmt (outer_phi);
4645 } 4647 }
4646 } 4648 }
4647 } 4649 }
4648 4650
4649 exit_gsi = gsi_after_labels (exit_bb); 4651 exit_gsi = gsi_after_labels (exit_bb);
4650 4652
4651 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3 4653 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3
4652 (i.e. when reduc_code is not available) and in the final adjustment 4654 (i.e. when reduc_fn is not available) and in the final adjustment
4653 code (if needed). Also get the original scalar reduction variable as 4655 code (if needed). Also get the original scalar reduction variable as
4654 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it 4656 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it
4655 represents a reduction pattern), the tree-code and scalar-def are 4657 represents a reduction pattern), the tree-code and scalar-def are
4656 taken from the original stmt that the pattern-stmt (STMT) replaces. 4658 taken from the original stmt that the pattern-stmt (STMT) replaces.
4657 Otherwise (it is a regular reduction) - the tree-code and scalar-def 4659 Otherwise (it is a regular reduction) - the tree-code and scalar-def
4658 are taken from STMT. */ 4660 are taken from STMT. */
4659 4661
4660 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info); 4662 stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info);
4661 if (!orig_stmt) 4663 if (orig_stmt_info != stmt_info)
4662 {
4663 /* Regular reduction */
4664 orig_stmt = stmt;
4665 }
4666 else
4667 { 4664 {
4668 /* Reduction pattern */ 4665 /* Reduction pattern */
4669 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt); 4666 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
4670 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)); 4667 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt_info);
4671 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt); 4668 }
4672 } 4669
4673 4670 code = gimple_assign_rhs_code (orig_stmt_info->stmt);
4674 code = gimple_assign_rhs_code (orig_stmt);
4675 /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore, 4671 /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore,
4676 partial results are added and not subtracted. */ 4672 partial results are added and not subtracted. */
4677 if (code == MINUS_EXPR) 4673 if (code == MINUS_EXPR)
4678 code = PLUS_EXPR; 4674 code = PLUS_EXPR;
4679 4675
4680 scalar_dest = gimple_assign_lhs (orig_stmt); 4676 scalar_dest = gimple_assign_lhs (orig_stmt_info->stmt);
4681 scalar_type = TREE_TYPE (scalar_dest); 4677 scalar_type = TREE_TYPE (scalar_dest);
4682 scalar_results.create (group_size); 4678 scalar_results.create (group_size);
4683 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL); 4679 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL);
4684 bitsize = TYPE_SIZE (scalar_type); 4680 bitsize = TYPE_SIZE (scalar_type);
4685 4681
4695 /* SLP reduction without reduction chain, e.g., 4691 /* SLP reduction without reduction chain, e.g.,
4696 # a1 = phi <a2, a0> 4692 # a1 = phi <a2, a0>
4697 # b1 = phi <b2, b0> 4693 # b1 = phi <b2, b0>
4698 a2 = operation (a1) 4694 a2 = operation (a1)
4699 b2 = operation (b1) */ 4695 b2 = operation (b1) */
4700 slp_reduc = (slp_node && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))); 4696 slp_reduc = (slp_node && !REDUC_GROUP_FIRST_ELEMENT (stmt_info));
4697
4698 /* True if we should implement SLP_REDUC using native reduction operations
4699 instead of scalar operations. */
4700 direct_slp_reduc = (reduc_fn != IFN_LAST
4701 && slp_reduc
4702 && !TYPE_VECTOR_SUBPARTS (vectype).is_constant ());
4701 4703
4702 /* In case of reduction chain, e.g., 4704 /* In case of reduction chain, e.g.,
4703 # a1 = phi <a3, a0> 4705 # a1 = phi <a3, a0>
4704 a2 = operation (a1) 4706 a2 = operation (a1)
4705 a3 = operation (a2), 4707 a3 = operation (a2),
4706 4708
4707 we may end up with more than one vector result. Here we reduce them to 4709 we may end up with more than one vector result. Here we reduce them to
4708 one vector. */ 4710 one vector. */
4709 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))) 4711 if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) || direct_slp_reduc)
4710 { 4712 {
4711 tree first_vect = PHI_RESULT (new_phis[0]); 4713 tree first_vect = PHI_RESULT (new_phis[0]);
4712 gassign *new_vec_stmt = NULL; 4714 gassign *new_vec_stmt = NULL;
4713 vec_dest = vect_create_destination_var (scalar_dest, vectype); 4715 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4714 for (k = 1; k < new_phis.length (); k++) 4716 for (k = 1; k < new_phis.length (); k++)
4734 { 4736 {
4735 gcc_assert (new_phis.length () == 1); 4737 gcc_assert (new_phis.length () == 1);
4736 tree first_vect = PHI_RESULT (new_phis[0]); 4738 tree first_vect = PHI_RESULT (new_phis[0]);
4737 gassign *new_vec_stmt = NULL; 4739 gassign *new_vec_stmt = NULL;
4738 vec_dest = vect_create_destination_var (scalar_dest, vectype); 4740 vec_dest = vect_create_destination_var (scalar_dest, vectype);
4739 gimple *next_phi = new_phis[0]; 4741 stmt_vec_info next_phi_info = loop_vinfo->lookup_stmt (new_phis[0]);
4740 for (int k = 1; k < ncopies; ++k) 4742 for (int k = 1; k < ncopies; ++k)
4741 { 4743 {
4742 next_phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next_phi)); 4744 next_phi_info = STMT_VINFO_RELATED_STMT (next_phi_info);
4743 tree second_vect = PHI_RESULT (next_phi); 4745 tree second_vect = PHI_RESULT (next_phi_info->stmt);
4744 tree tem = make_ssa_name (vec_dest, new_vec_stmt); 4746 tree tem = make_ssa_name (vec_dest, new_vec_stmt);
4745 new_vec_stmt = gimple_build_assign (tem, code, 4747 new_vec_stmt = gimple_build_assign (tem, code,
4746 first_vect, second_vect); 4748 first_vect, second_vect);
4747 gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT); 4749 gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT);
4748 first_vect = tem; 4750 first_vect = tem;
4753 } 4755 }
4754 else 4756 else
4755 new_phi_result = PHI_RESULT (new_phis[0]); 4757 new_phi_result = PHI_RESULT (new_phis[0]);
4756 4758
4757 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION 4759 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION
4758 && reduc_code != ERROR_MARK) 4760 && reduc_fn != IFN_LAST)
4759 { 4761 {
4760 /* For condition reductions, we have a vector (NEW_PHI_RESULT) containing 4762 /* For condition reductions, we have a vector (NEW_PHI_RESULT) containing
4761 various data values where the condition matched and another vector 4763 various data values where the condition matched and another vector
4762 (INDUCTION_INDEX) containing all the indexes of those matches. We 4764 (INDUCTION_INDEX) containing all the indexes of those matches. We
4763 need to extract the last matching index (which will be the index with 4765 need to extract the last matching index (which will be the index with
4791 gimple *zero_vec_stmt = gimple_build_assign (zero_vec, zero_vec_rhs); 4793 gimple *zero_vec_stmt = gimple_build_assign (zero_vec, zero_vec_rhs);
4792 gsi_insert_before (&exit_gsi, zero_vec_stmt, GSI_SAME_STMT); 4794 gsi_insert_before (&exit_gsi, zero_vec_stmt, GSI_SAME_STMT);
4793 4795
4794 /* Find maximum value from the vector of found indexes. */ 4796 /* Find maximum value from the vector of found indexes. */
4795 tree max_index = make_ssa_name (index_scalar_type); 4797 tree max_index = make_ssa_name (index_scalar_type);
4796 gimple *max_index_stmt = gimple_build_assign (max_index, REDUC_MAX_EXPR, 4798 gcall *max_index_stmt = gimple_build_call_internal (IFN_REDUC_MAX,
4797 induction_index); 4799 1, induction_index);
4800 gimple_call_set_lhs (max_index_stmt, max_index);
4798 gsi_insert_before (&exit_gsi, max_index_stmt, GSI_SAME_STMT); 4801 gsi_insert_before (&exit_gsi, max_index_stmt, GSI_SAME_STMT);
4799 4802
4800 /* Vector of {max_index, max_index, max_index,...}. */ 4803 /* Vector of {max_index, max_index, max_index,...}. */
4801 tree max_index_vec = make_ssa_name (index_vec_type); 4804 tree max_index_vec = make_ssa_name (index_vec_type);
4802 tree max_index_vec_rhs = build_vector_from_val (index_vec_type, 4805 tree max_index_vec_rhs = build_vector_from_val (index_vec_type,
4847 vec_cond_cast_rhs); 4850 vec_cond_cast_rhs);
4848 gsi_insert_before (&exit_gsi, vec_cond_cast_stmt, GSI_SAME_STMT); 4851 gsi_insert_before (&exit_gsi, vec_cond_cast_stmt, GSI_SAME_STMT);
4849 4852
4850 /* Reduce down to a scalar value. */ 4853 /* Reduce down to a scalar value. */
4851 tree data_reduc = make_ssa_name (scalar_type_unsigned); 4854 tree data_reduc = make_ssa_name (scalar_type_unsigned);
4852 optab ot = optab_for_tree_code (REDUC_MAX_EXPR, vectype_unsigned, 4855 gcall *data_reduc_stmt = gimple_build_call_internal (IFN_REDUC_MAX,
4853 optab_default); 4856 1, vec_cond_cast);
4854 gcc_assert (optab_handler (ot, TYPE_MODE (vectype_unsigned)) 4857 gimple_call_set_lhs (data_reduc_stmt, data_reduc);
4855 != CODE_FOR_nothing);
4856 gimple *data_reduc_stmt = gimple_build_assign (data_reduc,
4857 REDUC_MAX_EXPR,
4858 vec_cond_cast);
4859 gsi_insert_before (&exit_gsi, data_reduc_stmt, GSI_SAME_STMT); 4858 gsi_insert_before (&exit_gsi, data_reduc_stmt, GSI_SAME_STMT);
4860 4859
4861 /* Convert the reduced value back to the result type and set as the 4860 /* Convert the reduced value back to the result type and set as the
4862 result. */ 4861 result. */
4863 gimple_seq stmts = NULL; 4862 gimple_seq stmts = NULL;
4865 data_reduc); 4864 data_reduc);
4866 gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); 4865 gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT);
4867 scalar_results.safe_push (new_temp); 4866 scalar_results.safe_push (new_temp);
4868 } 4867 }
4869 else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION 4868 else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION
4870 && reduc_code == ERROR_MARK) 4869 && reduc_fn == IFN_LAST)
4871 { 4870 {
4872 /* Condition redution without supported REDUC_MAX_EXPR. Generate 4871 /* Condition reduction without supported IFN_REDUC_MAX. Generate
4873 idx = 0; 4872 idx = 0;
4874 idx_val = induction_index[0]; 4873 idx_val = induction_index[0];
4875 val = data_reduc[0]; 4874 val = data_reduc[0];
4876 for (idx = 0, val = init, i = 0; i < nelts; ++i) 4875 for (idx = 0, val = init, i = 0; i < nelts; ++i)
4877 if (induction_index[i] > idx_val) 4876 if (induction_index[i] > idx_val)
4879 return val; */ 4878 return val; */
4880 4879
4881 tree data_eltype = TREE_TYPE (TREE_TYPE (new_phi_result)); 4880 tree data_eltype = TREE_TYPE (TREE_TYPE (new_phi_result));
4882 tree idx_eltype = TREE_TYPE (TREE_TYPE (induction_index)); 4881 tree idx_eltype = TREE_TYPE (TREE_TYPE (induction_index));
4883 unsigned HOST_WIDE_INT el_size = tree_to_uhwi (TYPE_SIZE (idx_eltype)); 4882 unsigned HOST_WIDE_INT el_size = tree_to_uhwi (TYPE_SIZE (idx_eltype));
4884 unsigned HOST_WIDE_INT v_size 4883 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (TREE_TYPE (induction_index));
4885 = el_size * TYPE_VECTOR_SUBPARTS (TREE_TYPE (induction_index)); 4884 /* Enforced by vectorizable_reduction, which ensures we have target
4885 support before allowing a conditional reduction on variable-length
4886 vectors. */
4887 unsigned HOST_WIDE_INT v_size = el_size * nunits.to_constant ();
4886 tree idx_val = NULL_TREE, val = NULL_TREE; 4888 tree idx_val = NULL_TREE, val = NULL_TREE;
4887 for (unsigned HOST_WIDE_INT off = 0; off < v_size; off += el_size) 4889 for (unsigned HOST_WIDE_INT off = 0; off < v_size; off += el_size)
4888 { 4890 {
4889 tree old_idx_val = idx_val; 4891 tree old_idx_val = idx_val;
4890 tree old_val = val; 4892 tree old_val = val;
4937 } 4939 }
4938 4940
4939 /* 2.3 Create the reduction code, using one of the three schemes described 4941 /* 2.3 Create the reduction code, using one of the three schemes described
4940 above. In SLP we simply need to extract all the elements from the 4942 above. In SLP we simply need to extract all the elements from the
4941 vector (without reducing them), so we use scalar shifts. */ 4943 vector (without reducing them), so we use scalar shifts. */
4942 else if (reduc_code != ERROR_MARK && !slp_reduc) 4944 else if (reduc_fn != IFN_LAST && !slp_reduc)
4943 { 4945 {
4944 tree tmp; 4946 tree tmp;
4945 tree vec_elem_type; 4947 tree vec_elem_type;
4946 4948
4947 /* Case 1: Create: 4949 /* Case 1: Create:
4952 "Reduce using direct vector reduction.\n"); 4954 "Reduce using direct vector reduction.\n");
4953 4955
4954 vec_elem_type = TREE_TYPE (TREE_TYPE (new_phi_result)); 4956 vec_elem_type = TREE_TYPE (TREE_TYPE (new_phi_result));
4955 if (!useless_type_conversion_p (scalar_type, vec_elem_type)) 4957 if (!useless_type_conversion_p (scalar_type, vec_elem_type))
4956 { 4958 {
4957 tree tmp_dest = 4959 tree tmp_dest
4958 vect_create_destination_var (scalar_dest, vec_elem_type); 4960 = vect_create_destination_var (scalar_dest, vec_elem_type);
4959 tmp = build1 (reduc_code, vec_elem_type, new_phi_result); 4961 epilog_stmt = gimple_build_call_internal (reduc_fn, 1,
4960 epilog_stmt = gimple_build_assign (tmp_dest, tmp); 4962 new_phi_result);
4963 gimple_set_lhs (epilog_stmt, tmp_dest);
4961 new_temp = make_ssa_name (tmp_dest, epilog_stmt); 4964 new_temp = make_ssa_name (tmp_dest, epilog_stmt);
4962 gimple_assign_set_lhs (epilog_stmt, new_temp); 4965 gimple_set_lhs (epilog_stmt, new_temp);
4963 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); 4966 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4964 4967
4965 tmp = build1 (NOP_EXPR, scalar_type, new_temp); 4968 epilog_stmt = gimple_build_assign (new_scalar_dest, NOP_EXPR,
4969 new_temp);
4966 } 4970 }
4967 else 4971 else
4968 tmp = build1 (reduc_code, scalar_type, new_phi_result); 4972 {
4969 4973 epilog_stmt = gimple_build_call_internal (reduc_fn, 1,
4970 epilog_stmt = gimple_build_assign (new_scalar_dest, tmp); 4974 new_phi_result);
4975 gimple_set_lhs (epilog_stmt, new_scalar_dest);
4976 }
4977
4971 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); 4978 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
4972 gimple_assign_set_lhs (epilog_stmt, new_temp); 4979 gimple_set_lhs (epilog_stmt, new_temp);
4973 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); 4980 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4974 4981
4975 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) 4982 if ((STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
4976 == INTEGER_INDUC_COND_REDUCTION) 4983 == INTEGER_INDUC_COND_REDUCTION)
4977 { 4984 && !operand_equal_p (initial_def, induc_val, 0))
4978 /* Earlier we set the initial value to be zero. Check the result 4985 {
4979 and if it is zero then replace with the original initial 4986 /* Earlier we set the initial value to be a vector if induc_val
4980 value. */ 4987 values. Check the result and if it is induc_val then replace
4981 tree zero = build_zero_cst (scalar_type); 4988 with the original initial value, unless induc_val is
4982 tree zcompare = build2 (EQ_EXPR, boolean_type_node, new_temp, zero); 4989 the same as initial_def already. */
4990 tree zcompare = build2 (EQ_EXPR, boolean_type_node, new_temp,
4991 induc_val);
4983 4992
4984 tmp = make_ssa_name (new_scalar_dest); 4993 tmp = make_ssa_name (new_scalar_dest);
4985 epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare, 4994 epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare,
4986 initial_def, new_temp); 4995 initial_def, new_temp);
4987 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); 4996 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
4988 new_temp = tmp; 4997 new_temp = tmp;
4989 } 4998 }
4990 4999
4991 scalar_results.safe_push (new_temp); 5000 scalar_results.safe_push (new_temp);
4992 } 5001 }
5002 else if (direct_slp_reduc)
5003 {
5004 /* Here we create one vector for each of the REDUC_GROUP_SIZE results,
5005 with the elements for other SLP statements replaced with the
5006 neutral value. We can then do a normal reduction on each vector. */
5007
5008 /* Enforced by vectorizable_reduction. */
5009 gcc_assert (new_phis.length () == 1);
5010 gcc_assert (pow2p_hwi (group_size));
5011
5012 slp_tree orig_phis_slp_node = slp_node_instance->reduc_phis;
5013 vec<stmt_vec_info> orig_phis
5014 = SLP_TREE_SCALAR_STMTS (orig_phis_slp_node);
5015 gimple_seq seq = NULL;
5016
5017 /* Build a vector {0, 1, 2, ...}, with the same number of elements
5018 and the same element size as VECTYPE. */
5019 tree index = build_index_vector (vectype, 0, 1);
5020 tree index_type = TREE_TYPE (index);
5021 tree index_elt_type = TREE_TYPE (index_type);
5022 tree mask_type = build_same_sized_truth_vector_type (index_type);
5023
5024 /* Create a vector that, for each element, identifies which of
5025 the REDUC_GROUP_SIZE results should use it. */
5026 tree index_mask = build_int_cst (index_elt_type, group_size - 1);
5027 index = gimple_build (&seq, BIT_AND_EXPR, index_type, index,
5028 build_vector_from_val (index_type, index_mask));
5029
5030 /* Get a neutral vector value. This is simply a splat of the neutral
5031 scalar value if we have one, otherwise the initial scalar value
5032 is itself a neutral value. */
5033 tree vector_identity = NULL_TREE;
5034 if (neutral_op)
5035 vector_identity = gimple_build_vector_from_val (&seq, vectype,
5036 neutral_op);
5037 for (unsigned int i = 0; i < group_size; ++i)
5038 {
5039 /* If there's no univeral neutral value, we can use the
5040 initial scalar value from the original PHI. This is used
5041 for MIN and MAX reduction, for example. */
5042 if (!neutral_op)
5043 {
5044 tree scalar_value
5045 = PHI_ARG_DEF_FROM_EDGE (orig_phis[i]->stmt,
5046 loop_preheader_edge (loop));
5047 vector_identity = gimple_build_vector_from_val (&seq, vectype,
5048 scalar_value);
5049 }
5050
5051 /* Calculate the equivalent of:
5052
5053 sel[j] = (index[j] == i);
5054
5055 which selects the elements of NEW_PHI_RESULT that should
5056 be included in the result. */
5057 tree compare_val = build_int_cst (index_elt_type, i);
5058 compare_val = build_vector_from_val (index_type, compare_val);
5059 tree sel = gimple_build (&seq, EQ_EXPR, mask_type,
5060 index, compare_val);
5061
5062 /* Calculate the equivalent of:
5063
5064 vec = seq ? new_phi_result : vector_identity;
5065
5066 VEC is now suitable for a full vector reduction. */
5067 tree vec = gimple_build (&seq, VEC_COND_EXPR, vectype,
5068 sel, new_phi_result, vector_identity);
5069
5070 /* Do the reduction and convert it to the appropriate type. */
5071 tree scalar = gimple_build (&seq, as_combined_fn (reduc_fn),
5072 TREE_TYPE (vectype), vec);
5073 scalar = gimple_convert (&seq, scalar_type, scalar);
5074 scalar_results.safe_push (scalar);
5075 }
5076 gsi_insert_seq_before (&exit_gsi, seq, GSI_SAME_STMT);
5077 }
4993 else 5078 else
4994 { 5079 {
4995 bool reduce_with_shift = have_whole_vector_shift (mode); 5080 bool reduce_with_shift;
4996 int element_bitsize = tree_to_uhwi (bitsize);
4997 int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
4998 tree vec_temp; 5081 tree vec_temp;
4999 5082
5000 /* COND reductions all do the final reduction with MAX_EXPR. */ 5083 /* COND reductions all do the final reduction with MAX_EXPR
5084 or MIN_EXPR. */
5001 if (code == COND_EXPR) 5085 if (code == COND_EXPR)
5002 code = MAX_EXPR; 5086 {
5003 5087 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5004 /* Regardless of whether we have a whole vector shift, if we're 5088 == INTEGER_INDUC_COND_REDUCTION)
5005 emulating the operation via tree-vect-generic, we don't want 5089 code = induc_code;
5006 to use it. Only the first round of the reduction is likely 5090 else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5007 to still be profitable via emulation. */ 5091 == CONST_COND_REDUCTION)
5008 /* ??? It might be better to emit a reduction tree code here, so that 5092 code = STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info);
5009 tree-vect-generic can expand the first round via bit tricks. */ 5093 else
5010 if (!VECTOR_MODE_P (mode)) 5094 code = MAX_EXPR;
5011 reduce_with_shift = false; 5095 }
5096
5097 /* See if the target wants to do the final (shift) reduction
5098 in a vector mode of smaller size and first reduce upper/lower
5099 halves against each other. */
5100 enum machine_mode mode1 = mode;
5101 tree vectype1 = vectype;
5102 unsigned sz = tree_to_uhwi (TYPE_SIZE_UNIT (vectype));
5103 unsigned sz1 = sz;
5104 if (!slp_reduc
5105 && (mode1 = targetm.vectorize.split_reduction (mode)) != mode)
5106 sz1 = GET_MODE_SIZE (mode1).to_constant ();
5107
5108 vectype1 = get_vectype_for_scalar_type_and_size (scalar_type, sz1);
5109 reduce_with_shift = have_whole_vector_shift (mode1);
5110 if (!VECTOR_MODE_P (mode1))
5111 reduce_with_shift = false;
5012 else 5112 else
5013 { 5113 {
5014 optab optab = optab_for_tree_code (code, vectype, optab_default); 5114 optab optab = optab_for_tree_code (code, vectype1, optab_default);
5015 if (optab_handler (optab, mode) == CODE_FOR_nothing) 5115 if (optab_handler (optab, mode1) == CODE_FOR_nothing)
5016 reduce_with_shift = false; 5116 reduce_with_shift = false;
5017 } 5117 }
5118
5119 /* First reduce the vector to the desired vector size we should
5120 do shift reduction on by combining upper and lower halves. */
5121 new_temp = new_phi_result;
5122 while (sz > sz1)
5123 {
5124 gcc_assert (!slp_reduc);
5125 sz /= 2;
5126 vectype1 = get_vectype_for_scalar_type_and_size (scalar_type, sz);
5127
5128 /* The target has to make sure we support lowpart/highpart
5129 extraction, either via direct vector extract or through
5130 an integer mode punning. */
5131 tree dst1, dst2;
5132 if (convert_optab_handler (vec_extract_optab,
5133 TYPE_MODE (TREE_TYPE (new_temp)),
5134 TYPE_MODE (vectype1))
5135 != CODE_FOR_nothing)
5136 {
5137 /* Extract sub-vectors directly once vec_extract becomes
5138 a conversion optab. */
5139 dst1 = make_ssa_name (vectype1);
5140 epilog_stmt
5141 = gimple_build_assign (dst1, BIT_FIELD_REF,
5142 build3 (BIT_FIELD_REF, vectype1,
5143 new_temp, TYPE_SIZE (vectype1),
5144 bitsize_int (0)));
5145 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5146 dst2 = make_ssa_name (vectype1);
5147 epilog_stmt
5148 = gimple_build_assign (dst2, BIT_FIELD_REF,
5149 build3 (BIT_FIELD_REF, vectype1,
5150 new_temp, TYPE_SIZE (vectype1),
5151 bitsize_int (sz * BITS_PER_UNIT)));
5152 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5153 }
5154 else
5155 {
5156 /* Extract via punning to appropriately sized integer mode
5157 vector. */
5158 tree eltype = build_nonstandard_integer_type (sz * BITS_PER_UNIT,
5159 1);
5160 tree etype = build_vector_type (eltype, 2);
5161 gcc_assert (convert_optab_handler (vec_extract_optab,
5162 TYPE_MODE (etype),
5163 TYPE_MODE (eltype))
5164 != CODE_FOR_nothing);
5165 tree tem = make_ssa_name (etype);
5166 epilog_stmt = gimple_build_assign (tem, VIEW_CONVERT_EXPR,
5167 build1 (VIEW_CONVERT_EXPR,
5168 etype, new_temp));
5169 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5170 new_temp = tem;
5171 tem = make_ssa_name (eltype);
5172 epilog_stmt
5173 = gimple_build_assign (tem, BIT_FIELD_REF,
5174 build3 (BIT_FIELD_REF, eltype,
5175 new_temp, TYPE_SIZE (eltype),
5176 bitsize_int (0)));
5177 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5178 dst1 = make_ssa_name (vectype1);
5179 epilog_stmt = gimple_build_assign (dst1, VIEW_CONVERT_EXPR,
5180 build1 (VIEW_CONVERT_EXPR,
5181 vectype1, tem));
5182 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5183 tem = make_ssa_name (eltype);
5184 epilog_stmt
5185 = gimple_build_assign (tem, BIT_FIELD_REF,
5186 build3 (BIT_FIELD_REF, eltype,
5187 new_temp, TYPE_SIZE (eltype),
5188 bitsize_int (sz * BITS_PER_UNIT)));
5189 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5190 dst2 = make_ssa_name (vectype1);
5191 epilog_stmt = gimple_build_assign (dst2, VIEW_CONVERT_EXPR,
5192 build1 (VIEW_CONVERT_EXPR,
5193 vectype1, tem));
5194 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5195 }
5196
5197 new_temp = make_ssa_name (vectype1);
5198 epilog_stmt = gimple_build_assign (new_temp, code, dst1, dst2);
5199 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5200 }
5018 5201
5019 if (reduce_with_shift && !slp_reduc) 5202 if (reduce_with_shift && !slp_reduc)
5020 { 5203 {
5021 int nelements = vec_size_in_bits / element_bitsize; 5204 int element_bitsize = tree_to_uhwi (bitsize);
5022 auto_vec_perm_indices sel (nelements); 5205 /* Enforced by vectorizable_reduction, which disallows SLP reductions
5206 for variable-length vectors and also requires direct target support
5207 for loop reductions. */
5208 int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype1));
5209 int nelements = vec_size_in_bits / element_bitsize;
5210 vec_perm_builder sel;
5211 vec_perm_indices indices;
5023 5212
5024 int elt_offset; 5213 int elt_offset;
5025 5214
5026 tree zero_vec = build_zero_cst (vectype); 5215 tree zero_vec = build_zero_cst (vectype1);
5027 /* Case 2: Create: 5216 /* Case 2: Create:
5028 for (offset = nelements/2; offset >= 1; offset/=2) 5217 for (offset = nelements/2; offset >= 1; offset/=2)
5029 { 5218 {
5030 Create: va' = vec_shift <va, offset> 5219 Create: va' = vec_shift <va, offset>
5031 Create: va = vop <va, va'> 5220 Create: va = vop <va, va'>
5035 5224
5036 if (dump_enabled_p ()) 5225 if (dump_enabled_p ())
5037 dump_printf_loc (MSG_NOTE, vect_location, 5226 dump_printf_loc (MSG_NOTE, vect_location,
5038 "Reduce using vector shifts\n"); 5227 "Reduce using vector shifts\n");
5039 5228
5040 vec_dest = vect_create_destination_var (scalar_dest, vectype); 5229 mode1 = TYPE_MODE (vectype1);
5041 new_temp = new_phi_result; 5230 vec_dest = vect_create_destination_var (scalar_dest, vectype1);
5042 for (elt_offset = nelements / 2; 5231 for (elt_offset = nelements / 2;
5043 elt_offset >= 1; 5232 elt_offset >= 1;
5044 elt_offset /= 2) 5233 elt_offset /= 2)
5045 { 5234 {
5046 sel.truncate (0);
5047 calc_vec_perm_mask_for_shift (elt_offset, nelements, &sel); 5235 calc_vec_perm_mask_for_shift (elt_offset, nelements, &sel);
5048 tree mask = vect_gen_perm_mask_any (vectype, sel); 5236 indices.new_vector (sel, 2, nelements);
5237 tree mask = vect_gen_perm_mask_any (vectype1, indices);
5049 epilog_stmt = gimple_build_assign (vec_dest, VEC_PERM_EXPR, 5238 epilog_stmt = gimple_build_assign (vec_dest, VEC_PERM_EXPR,
5050 new_temp, zero_vec, mask); 5239 new_temp, zero_vec, mask);
5051 new_name = make_ssa_name (vec_dest, epilog_stmt); 5240 new_name = make_ssa_name (vec_dest, epilog_stmt);
5052 gimple_assign_set_lhs (epilog_stmt, new_name); 5241 gimple_assign_set_lhs (epilog_stmt, new_name);
5053 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); 5242 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5088 5277
5089 if (dump_enabled_p ()) 5278 if (dump_enabled_p ())
5090 dump_printf_loc (MSG_NOTE, vect_location, 5279 dump_printf_loc (MSG_NOTE, vect_location,
5091 "Reduce using scalar code.\n"); 5280 "Reduce using scalar code.\n");
5092 5281
5093 vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype)); 5282 int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype1));
5283 int element_bitsize = tree_to_uhwi (bitsize);
5094 FOR_EACH_VEC_ELT (new_phis, i, new_phi) 5284 FOR_EACH_VEC_ELT (new_phis, i, new_phi)
5095 { 5285 {
5096 int bit_offset; 5286 int bit_offset;
5097 if (gimple_code (new_phi) == GIMPLE_PHI) 5287 if (gimple_code (new_phi) == GIMPLE_PHI)
5098 vec_temp = PHI_RESULT (new_phi); 5288 vec_temp = PHI_RESULT (new_phi);
5099 else 5289 else
5100 vec_temp = gimple_assign_lhs (new_phi); 5290 vec_temp = gimple_assign_lhs (new_phi);
5101 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize, 5291 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize,
5102 bitsize_zero_node); 5292 bitsize_zero_node);
5103 epilog_stmt = gimple_build_assign (new_scalar_dest, rhs); 5293 epilog_stmt = gimple_build_assign (new_scalar_dest, rhs);
5104 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); 5294 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt);
5105 gimple_assign_set_lhs (epilog_stmt, new_temp); 5295 gimple_assign_set_lhs (epilog_stmt, new_temp);
5106 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); 5296 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5107 5297
5141 } 5331 }
5142 } 5332 }
5143 5333
5144 /* The only case where we need to reduce scalar results in SLP, is 5334 /* The only case where we need to reduce scalar results in SLP, is
5145 unrolling. If the size of SCALAR_RESULTS is greater than 5335 unrolling. If the size of SCALAR_RESULTS is greater than
5146 GROUP_SIZE, we reduce them combining elements modulo 5336 REDUC_GROUP_SIZE, we reduce them combining elements modulo
5147 GROUP_SIZE. */ 5337 REDUC_GROUP_SIZE. */
5148 if (slp_reduc) 5338 if (slp_reduc)
5149 { 5339 {
5150 tree res, first_res, new_res; 5340 tree res, first_res, new_res;
5151 gimple *new_stmt; 5341 gimple *new_stmt;
5152 5342
5166 else 5356 else
5167 /* Not SLP - we have one scalar to keep in SCALAR_RESULTS. */ 5357 /* Not SLP - we have one scalar to keep in SCALAR_RESULTS. */
5168 scalar_results.safe_push (new_temp); 5358 scalar_results.safe_push (new_temp);
5169 } 5359 }
5170 5360
5171 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) 5361 if ((STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5172 == INTEGER_INDUC_COND_REDUCTION) 5362 == INTEGER_INDUC_COND_REDUCTION)
5173 { 5363 && !operand_equal_p (initial_def, induc_val, 0))
5174 /* Earlier we set the initial value to be zero. Check the result 5364 {
5175 and if it is zero then replace with the original initial 5365 /* Earlier we set the initial value to be a vector if induc_val
5176 value. */ 5366 values. Check the result and if it is induc_val then replace
5177 tree zero = build_zero_cst (scalar_type); 5367 with the original initial value, unless induc_val is
5178 tree zcompare = build2 (EQ_EXPR, boolean_type_node, new_temp, zero); 5368 the same as initial_def already. */
5369 tree zcompare = build2 (EQ_EXPR, boolean_type_node, new_temp,
5370 induc_val);
5179 5371
5180 tree tmp = make_ssa_name (new_scalar_dest); 5372 tree tmp = make_ssa_name (new_scalar_dest);
5181 epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare, 5373 epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare,
5182 initial_def, new_temp); 5374 initial_def, new_temp);
5183 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); 5375 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5217 new_temp = make_ssa_name (new_dest, epilog_stmt); 5409 new_temp = make_ssa_name (new_dest, epilog_stmt);
5218 gimple_assign_set_lhs (epilog_stmt, new_temp); 5410 gimple_assign_set_lhs (epilog_stmt, new_temp);
5219 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); 5411 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT);
5220 if (nested_in_vect_loop) 5412 if (nested_in_vect_loop)
5221 { 5413 {
5222 set_vinfo_for_stmt (epilog_stmt, 5414 stmt_vec_info epilog_stmt_info = loop_vinfo->add_stmt (epilog_stmt);
5223 new_stmt_vec_info (epilog_stmt, loop_vinfo)); 5415 STMT_VINFO_RELATED_STMT (epilog_stmt_info)
5224 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) = 5416 = STMT_VINFO_RELATED_STMT (loop_vinfo->lookup_stmt (new_phi));
5225 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi));
5226 5417
5227 if (!double_reduc) 5418 if (!double_reduc)
5228 scalar_results.quick_push (new_temp); 5419 scalar_results.quick_push (new_temp);
5229 else 5420 else
5230 scalar_results[0] = new_temp; 5421 scalar_results[0] = new_temp;
5260 use <s_out4> 5451 use <s_out4>
5261 use <s_out4> */ 5452 use <s_out4> */
5262 5453
5263 5454
5264 /* In SLP reduction chain we reduce vector results into one vector if 5455 /* In SLP reduction chain we reduce vector results into one vector if
5265 necessary, hence we set here GROUP_SIZE to 1. SCALAR_DEST is the LHS of 5456 necessary, hence we set here REDUC_GROUP_SIZE to 1. SCALAR_DEST is the
5266 the last stmt in the reduction chain, since we are looking for the loop 5457 LHS of the last stmt in the reduction chain, since we are looking for
5267 exit phi node. */ 5458 the loop exit phi node. */
5268 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))) 5459 if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
5269 { 5460 {
5270 gimple *dest_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1]; 5461 stmt_vec_info dest_stmt_info
5271 /* Handle reduction patterns. */ 5462 = vect_orig_stmt (SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1]);
5272 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt))) 5463 scalar_dest = gimple_assign_lhs (dest_stmt_info->stmt);
5273 dest_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt));
5274
5275 scalar_dest = gimple_assign_lhs (dest_stmt);
5276 group_size = 1; 5464 group_size = 1;
5277 } 5465 }
5278 5466
5279 /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in 5467 /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in
5280 case that GROUP_SIZE is greater than vectorization factor). Therefore, we 5468 case that REDUC_GROUP_SIZE is greater than vectorization factor).
5281 need to match SCALAR_RESULTS with corresponding statements. The first 5469 Therefore, we need to match SCALAR_RESULTS with corresponding statements.
5282 (GROUP_SIZE / number of new vector stmts) scalar results correspond to 5470 The first (REDUC_GROUP_SIZE / number of new vector stmts) scalar results
5283 the first vector stmt, etc. 5471 correspond to the first vector stmt, etc.
5284 (RATIO is equal to (GROUP_SIZE / number of new vector stmts)). */ 5472 (RATIO is equal to (REDUC_GROUP_SIZE / number of new vector stmts)). */
5285 if (group_size > new_phis.length ()) 5473 if (group_size > new_phis.length ())
5286 { 5474 {
5287 ratio = group_size / new_phis.length (); 5475 ratio = group_size / new_phis.length ();
5288 gcc_assert (!(group_size % new_phis.length ())); 5476 gcc_assert (!(group_size % new_phis.length ()));
5289 } 5477 }
5290 else 5478 else
5291 ratio = 1; 5479 ratio = 1;
5292 5480
5481 stmt_vec_info epilog_stmt_info = NULL;
5293 for (k = 0; k < group_size; k++) 5482 for (k = 0; k < group_size; k++)
5294 { 5483 {
5295 if (k % ratio == 0) 5484 if (k % ratio == 0)
5296 { 5485 {
5297 epilog_stmt = new_phis[k / ratio]; 5486 epilog_stmt_info = loop_vinfo->lookup_stmt (new_phis[k / ratio]);
5298 reduction_phi = reduction_phis[k / ratio]; 5487 reduction_phi_info = reduction_phis[k / ratio];
5299 if (double_reduc) 5488 if (double_reduc)
5300 inner_phi = inner_phis[k / ratio]; 5489 inner_phi = inner_phis[k / ratio];
5301 } 5490 }
5302 5491
5303 if (slp_reduc) 5492 if (slp_reduc)
5304 { 5493 {
5305 gimple *current_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[k]; 5494 stmt_vec_info scalar_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[k];
5306 5495
5307 orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (current_stmt)); 5496 orig_stmt_info = STMT_VINFO_RELATED_STMT (scalar_stmt_info);
5308 /* SLP statements can't participate in patterns. */ 5497 /* SLP statements can't participate in patterns. */
5309 gcc_assert (!orig_stmt); 5498 gcc_assert (!orig_stmt_info);
5310 scalar_dest = gimple_assign_lhs (current_stmt); 5499 scalar_dest = gimple_assign_lhs (scalar_stmt_info->stmt);
5311 } 5500 }
5312 5501
5313 phis.create (3); 5502 phis.create (3);
5314 /* Find the loop-closed-use at the loop exit of the original scalar 5503 /* Find the loop-closed-use at the loop exit of the original scalar
5315 result. (The reduction result is expected to have two immediate uses - 5504 result. (The reduction result is expected to have two immediate uses -
5324 5513
5325 FOR_EACH_VEC_ELT (phis, i, exit_phi) 5514 FOR_EACH_VEC_ELT (phis, i, exit_phi)
5326 { 5515 {
5327 if (outer_loop) 5516 if (outer_loop)
5328 { 5517 {
5329 stmt_vec_info exit_phi_vinfo = vinfo_for_stmt (exit_phi); 5518 stmt_vec_info exit_phi_vinfo
5519 = loop_vinfo->lookup_stmt (exit_phi);
5330 gphi *vect_phi; 5520 gphi *vect_phi;
5331 5521
5332 /* FORNOW. Currently not supporting the case that an inner-loop 5522 /* FORNOW. Currently not supporting the case that an inner-loop
5333 reduction is not used in the outer-loop (but only outside the 5523 reduction is not used in the outer-loop (but only outside the
5334 outer-loop), unless it is double reduction. */ 5524 outer-loop), unless it is double reduction. */
5337 || double_reduc); 5527 || double_reduc);
5338 5528
5339 if (double_reduc) 5529 if (double_reduc)
5340 STMT_VINFO_VEC_STMT (exit_phi_vinfo) = inner_phi; 5530 STMT_VINFO_VEC_STMT (exit_phi_vinfo) = inner_phi;
5341 else 5531 else
5342 STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt; 5532 STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt_info;
5343 if (!double_reduc 5533 if (!double_reduc
5344 || STMT_VINFO_DEF_TYPE (exit_phi_vinfo) 5534 || STMT_VINFO_DEF_TYPE (exit_phi_vinfo)
5345 != vect_double_reduction_def) 5535 != vect_double_reduction_def)
5346 continue; 5536 continue;
5347 5537
5361 node, i.e., stmt1 above. */ 5551 node, i.e., stmt1 above. */
5362 orig_name = PHI_RESULT (exit_phi); 5552 orig_name = PHI_RESULT (exit_phi);
5363 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name) 5553 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name)
5364 { 5554 {
5365 stmt_vec_info use_stmt_vinfo; 5555 stmt_vec_info use_stmt_vinfo;
5366 stmt_vec_info new_phi_vinfo;
5367 tree vect_phi_init, preheader_arg, vect_phi_res; 5556 tree vect_phi_init, preheader_arg, vect_phi_res;
5368 basic_block bb = gimple_bb (use_stmt); 5557 basic_block bb = gimple_bb (use_stmt);
5369 gimple *use;
5370 5558
5371 /* Check that USE_STMT is really double reduction phi 5559 /* Check that USE_STMT is really double reduction phi
5372 node. */ 5560 node. */
5373 if (gimple_code (use_stmt) != GIMPLE_PHI 5561 if (gimple_code (use_stmt) != GIMPLE_PHI
5374 || gimple_phi_num_args (use_stmt) != 2 5562 || gimple_phi_num_args (use_stmt) != 2
5375 || bb->loop_father != outer_loop) 5563 || bb->loop_father != outer_loop)
5376 continue; 5564 continue;
5377 use_stmt_vinfo = vinfo_for_stmt (use_stmt); 5565 use_stmt_vinfo = loop_vinfo->lookup_stmt (use_stmt);
5378 if (!use_stmt_vinfo 5566 if (!use_stmt_vinfo
5379 || STMT_VINFO_DEF_TYPE (use_stmt_vinfo) 5567 || STMT_VINFO_DEF_TYPE (use_stmt_vinfo)
5380 != vect_double_reduction_def) 5568 != vect_double_reduction_def)
5381 continue; 5569 continue;
5382 5570
5388 vs2 is defined by INNER_PHI, the vectorized EXIT_PHI; 5576 vs2 is defined by INNER_PHI, the vectorized EXIT_PHI;
5389 vs0 is created here. */ 5577 vs0 is created here. */
5390 5578
5391 /* Create vector phi node. */ 5579 /* Create vector phi node. */
5392 vect_phi = create_phi_node (vec_initial_def, bb); 5580 vect_phi = create_phi_node (vec_initial_def, bb);
5393 new_phi_vinfo = new_stmt_vec_info (vect_phi, 5581 loop_vec_info_for_loop (outer_loop)->add_stmt (vect_phi);
5394 loop_vec_info_for_loop (outer_loop));
5395 set_vinfo_for_stmt (vect_phi, new_phi_vinfo);
5396 5582
5397 /* Create vs0 - initial def of the double reduction phi. */ 5583 /* Create vs0 - initial def of the double reduction phi. */
5398 preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt, 5584 preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt,
5399 loop_preheader_edge (outer_loop)); 5585 loop_preheader_edge (outer_loop));
5400 vect_phi_init = get_initial_def_for_reduction 5586 vect_phi_init = get_initial_def_for_reduction
5401 (stmt, preheader_arg, NULL); 5587 (stmt_info, preheader_arg, NULL);
5402 5588
5403 /* Update phi node arguments with vs0 and vs2. */ 5589 /* Update phi node arguments with vs0 and vs2. */
5404 add_phi_arg (vect_phi, vect_phi_init, 5590 add_phi_arg (vect_phi, vect_phi_init,
5405 loop_preheader_edge (outer_loop), 5591 loop_preheader_edge (outer_loop),
5406 UNKNOWN_LOCATION); 5592 UNKNOWN_LOCATION);
5407 add_phi_arg (vect_phi, PHI_RESULT (inner_phi), 5593 add_phi_arg (vect_phi, PHI_RESULT (inner_phi->stmt),
5408 loop_latch_edge (outer_loop), UNKNOWN_LOCATION); 5594 loop_latch_edge (outer_loop), UNKNOWN_LOCATION);
5409 if (dump_enabled_p ()) 5595 if (dump_enabled_p ())
5410 { 5596 dump_printf_loc (MSG_NOTE, vect_location,
5411 dump_printf_loc (MSG_NOTE, vect_location, 5597 "created double reduction phi node: %G",
5412 "created double reduction phi node: "); 5598 vect_phi);
5413 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vect_phi, 0);
5414 }
5415 5599
5416 vect_phi_res = PHI_RESULT (vect_phi); 5600 vect_phi_res = PHI_RESULT (vect_phi);
5417 5601
5418 /* Replace the use, i.e., set the correct vs1 in the regular 5602 /* Replace the use, i.e., set the correct vs1 in the regular
5419 reduction phi node. FORNOW, NCOPIES is always 1, so the 5603 reduction phi node. FORNOW, NCOPIES is always 1, so the
5420 loop is redundant. */ 5604 loop is redundant. */
5421 use = reduction_phi; 5605 stmt_vec_info use_info = reduction_phi_info;
5422 for (j = 0; j < ncopies; j++) 5606 for (j = 0; j < ncopies; j++)
5423 { 5607 {
5424 edge pr_edge = loop_preheader_edge (loop); 5608 edge pr_edge = loop_preheader_edge (loop);
5425 SET_PHI_ARG_DEF (use, pr_edge->dest_idx, vect_phi_res); 5609 SET_PHI_ARG_DEF (as_a <gphi *> (use_info->stmt),
5426 use = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use)); 5610 pr_edge->dest_idx, vect_phi_res);
5427 } 5611 use_info = STMT_VINFO_RELATED_STMT (use_info);
5612 }
5428 } 5613 }
5429 } 5614 }
5430 } 5615 }
5431 5616
5432 phis.release (); 5617 phis.release ();
5479 5664
5480 phis.release (); 5665 phis.release ();
5481 } 5666 }
5482 } 5667 }
5483 5668
5669 /* Return a vector of type VECTYPE that is equal to the vector select
5670 operation "MASK ? VEC : IDENTITY". Insert the select statements
5671 before GSI. */
5672
5673 static tree
5674 merge_with_identity (gimple_stmt_iterator *gsi, tree mask, tree vectype,
5675 tree vec, tree identity)
5676 {
5677 tree cond = make_temp_ssa_name (vectype, NULL, "cond");
5678 gimple *new_stmt = gimple_build_assign (cond, VEC_COND_EXPR,
5679 mask, vec, identity);
5680 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
5681 return cond;
5682 }
5683
5684 /* Successively apply CODE to each element of VECTOR_RHS, in left-to-right
5685 order, starting with LHS. Insert the extraction statements before GSI and
5686 associate the new scalar SSA names with variable SCALAR_DEST.
5687 Return the SSA name for the result. */
5688
5689 static tree
5690 vect_expand_fold_left (gimple_stmt_iterator *gsi, tree scalar_dest,
5691 tree_code code, tree lhs, tree vector_rhs)
5692 {
5693 tree vectype = TREE_TYPE (vector_rhs);
5694 tree scalar_type = TREE_TYPE (vectype);
5695 tree bitsize = TYPE_SIZE (scalar_type);
5696 unsigned HOST_WIDE_INT vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype));
5697 unsigned HOST_WIDE_INT element_bitsize = tree_to_uhwi (bitsize);
5698
5699 for (unsigned HOST_WIDE_INT bit_offset = 0;
5700 bit_offset < vec_size_in_bits;
5701 bit_offset += element_bitsize)
5702 {
5703 tree bitpos = bitsize_int (bit_offset);
5704 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vector_rhs,
5705 bitsize, bitpos);
5706
5707 gassign *stmt = gimple_build_assign (scalar_dest, rhs);
5708 rhs = make_ssa_name (scalar_dest, stmt);
5709 gimple_assign_set_lhs (stmt, rhs);
5710 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
5711
5712 stmt = gimple_build_assign (scalar_dest, code, lhs, rhs);
5713 tree new_name = make_ssa_name (scalar_dest, stmt);
5714 gimple_assign_set_lhs (stmt, new_name);
5715 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
5716 lhs = new_name;
5717 }
5718 return lhs;
5719 }
5720
5721 /* Perform an in-order reduction (FOLD_LEFT_REDUCTION). STMT_INFO is the
5722 statement that sets the live-out value. REDUC_DEF_STMT is the phi
5723 statement. CODE is the operation performed by STMT_INFO and OPS are
5724 its scalar operands. REDUC_INDEX is the index of the operand in
5725 OPS that is set by REDUC_DEF_STMT. REDUC_FN is the function that
5726 implements in-order reduction, or IFN_LAST if we should open-code it.
5727 VECTYPE_IN is the type of the vector input. MASKS specifies the masks
5728 that should be used to control the operation in a fully-masked loop. */
5729
5730 static bool
5731 vectorize_fold_left_reduction (stmt_vec_info stmt_info,
5732 gimple_stmt_iterator *gsi,
5733 stmt_vec_info *vec_stmt, slp_tree slp_node,
5734 gimple *reduc_def_stmt,
5735 tree_code code, internal_fn reduc_fn,
5736 tree ops[3], tree vectype_in,
5737 int reduc_index, vec_loop_masks *masks)
5738 {
5739 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5740 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5741 tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5742 stmt_vec_info new_stmt_info = NULL;
5743
5744 int ncopies;
5745 if (slp_node)
5746 ncopies = 1;
5747 else
5748 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
5749
5750 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info));
5751 gcc_assert (ncopies == 1);
5752 gcc_assert (TREE_CODE_LENGTH (code) == binary_op);
5753 gcc_assert (reduc_index == (code == MINUS_EXPR ? 0 : 1));
5754 gcc_assert (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
5755 == FOLD_LEFT_REDUCTION);
5756
5757 if (slp_node)
5758 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype_out),
5759 TYPE_VECTOR_SUBPARTS (vectype_in)));
5760
5761 tree op0 = ops[1 - reduc_index];
5762
5763 int group_size = 1;
5764 stmt_vec_info scalar_dest_def_info;
5765 auto_vec<tree> vec_oprnds0;
5766 if (slp_node)
5767 {
5768 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL,
5769 slp_node);
5770 group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
5771 scalar_dest_def_info = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1];
5772 }
5773 else
5774 {
5775 tree loop_vec_def0 = vect_get_vec_def_for_operand (op0, stmt_info);
5776 vec_oprnds0.create (1);
5777 vec_oprnds0.quick_push (loop_vec_def0);
5778 scalar_dest_def_info = stmt_info;
5779 }
5780
5781 tree scalar_dest = gimple_assign_lhs (scalar_dest_def_info->stmt);
5782 tree scalar_type = TREE_TYPE (scalar_dest);
5783 tree reduc_var = gimple_phi_result (reduc_def_stmt);
5784
5785 int vec_num = vec_oprnds0.length ();
5786 gcc_assert (vec_num == 1 || slp_node);
5787 tree vec_elem_type = TREE_TYPE (vectype_out);
5788 gcc_checking_assert (useless_type_conversion_p (scalar_type, vec_elem_type));
5789
5790 tree vector_identity = NULL_TREE;
5791 if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
5792 vector_identity = build_zero_cst (vectype_out);
5793
5794 tree scalar_dest_var = vect_create_destination_var (scalar_dest, NULL);
5795 int i;
5796 tree def0;
5797 FOR_EACH_VEC_ELT (vec_oprnds0, i, def0)
5798 {
5799 gimple *new_stmt;
5800 tree mask = NULL_TREE;
5801 if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
5802 mask = vect_get_loop_mask (gsi, masks, vec_num, vectype_in, i);
5803
5804 /* Handle MINUS by adding the negative. */
5805 if (reduc_fn != IFN_LAST && code == MINUS_EXPR)
5806 {
5807 tree negated = make_ssa_name (vectype_out);
5808 new_stmt = gimple_build_assign (negated, NEGATE_EXPR, def0);
5809 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT);
5810 def0 = negated;
5811 }
5812
5813 if (mask)
5814 def0 = merge_with_identity (gsi, mask, vectype_out, def0,
5815 vector_identity);
5816
5817 /* On the first iteration the input is simply the scalar phi
5818 result, and for subsequent iterations it is the output of
5819 the preceding operation. */
5820 if (reduc_fn != IFN_LAST)
5821 {
5822 new_stmt = gimple_build_call_internal (reduc_fn, 2, reduc_var, def0);
5823 /* For chained SLP reductions the output of the previous reduction
5824 operation serves as the input of the next. For the final statement
5825 the output cannot be a temporary - we reuse the original
5826 scalar destination of the last statement. */
5827 if (i != vec_num - 1)
5828 {
5829 gimple_set_lhs (new_stmt, scalar_dest_var);
5830 reduc_var = make_ssa_name (scalar_dest_var, new_stmt);
5831 gimple_set_lhs (new_stmt, reduc_var);
5832 }
5833 }
5834 else
5835 {
5836 reduc_var = vect_expand_fold_left (gsi, scalar_dest_var, code,
5837 reduc_var, def0);
5838 new_stmt = SSA_NAME_DEF_STMT (reduc_var);
5839 /* Remove the statement, so that we can use the same code paths
5840 as for statements that we've just created. */
5841 gimple_stmt_iterator tmp_gsi = gsi_for_stmt (new_stmt);
5842 gsi_remove (&tmp_gsi, false);
5843 }
5844
5845 if (i == vec_num - 1)
5846 {
5847 gimple_set_lhs (new_stmt, scalar_dest);
5848 new_stmt_info = vect_finish_replace_stmt (scalar_dest_def_info,
5849 new_stmt);
5850 }
5851 else
5852 new_stmt_info = vect_finish_stmt_generation (scalar_dest_def_info,
5853 new_stmt, gsi);
5854
5855 if (slp_node)
5856 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
5857 }
5858
5859 if (!slp_node)
5860 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
5861
5862 return true;
5863 }
5484 5864
5485 /* Function is_nonwrapping_integer_induction. 5865 /* Function is_nonwrapping_integer_induction.
5486 5866
5487 Check if STMT (which is part of loop LOOP) both increments and 5867 Check if STMT_VINO (which is part of loop LOOP) both increments and
5488 does not cause overflow. */ 5868 does not cause overflow. */
5489 5869
5490 static bool 5870 static bool
5491 is_nonwrapping_integer_induction (gimple *stmt, struct loop *loop) 5871 is_nonwrapping_integer_induction (stmt_vec_info stmt_vinfo, struct loop *loop)
5492 { 5872 {
5493 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); 5873 gphi *phi = as_a <gphi *> (stmt_vinfo->stmt);
5494 tree base = STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo); 5874 tree base = STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo);
5495 tree step = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo); 5875 tree step = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo);
5496 tree lhs_type = TREE_TYPE (gimple_phi_result (stmt)); 5876 tree lhs_type = TREE_TYPE (gimple_phi_result (phi));
5497 widest_int ni, max_loop_value, lhs_max; 5877 widest_int ni, max_loop_value, lhs_max;
5498 bool overflow = false; 5878 wi::overflow_type overflow = wi::OVF_NONE;
5499 5879
5500 /* Make sure the loop is integer based. */ 5880 /* Make sure the loop is integer based. */
5501 if (TREE_CODE (base) != INTEGER_CST 5881 if (TREE_CODE (base) != INTEGER_CST
5502 || TREE_CODE (step) != INTEGER_CST) 5882 || TREE_CODE (step) != INTEGER_CST)
5503 return false; 5883 return false;
5504 5884
5505 /* Check that the induction increments. */
5506 if (tree_int_cst_sgn (step) == -1)
5507 return false;
5508
5509 /* Check that the max size of the loop will not wrap. */ 5885 /* Check that the max size of the loop will not wrap. */
5510 5886
5511 if (TYPE_OVERFLOW_UNDEFINED (lhs_type)) 5887 if (TYPE_OVERFLOW_UNDEFINED (lhs_type))
5512 return true; 5888 return true;
5513 5889
5528 <= TYPE_PRECISION (lhs_type)); 5904 <= TYPE_PRECISION (lhs_type));
5529 } 5905 }
5530 5906
5531 /* Function vectorizable_reduction. 5907 /* Function vectorizable_reduction.
5532 5908
5533 Check if STMT performs a reduction operation that can be vectorized. 5909 Check if STMT_INFO performs a reduction operation that can be vectorized.
5534 If VEC_STMT is also passed, vectorize the STMT: create a vectorized 5910 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized
5535 stmt to replace it, put it in VEC_STMT, and insert it at GSI. 5911 stmt to replace it, put it in VEC_STMT, and insert it at GSI.
5536 Return FALSE if not a vectorizable STMT, TRUE otherwise. 5912 Return true if STMT_INFO is vectorizable in this way.
5537 5913
5538 This function also handles reduction idioms (patterns) that have been 5914 This function also handles reduction idioms (patterns) that have been
5539 recognized in advance during vect_pattern_recog. In this case, STMT may be 5915 recognized in advance during vect_pattern_recog. In this case, STMT_INFO
5540 of this form: 5916 may be of this form:
5541 X = pattern_expr (arg0, arg1, ..., X) 5917 X = pattern_expr (arg0, arg1, ..., X)
5542 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original 5918 and its STMT_VINFO_RELATED_STMT points to the last stmt in the original
5543 sequence that had been detected and replaced by the pattern-stmt (STMT). 5919 sequence that had been detected and replaced by the pattern-stmt
5920 (STMT_INFO).
5544 5921
5545 This function also handles reduction of condition expressions, for example: 5922 This function also handles reduction of condition expressions, for example:
5546 for (int i = 0; i < N; i++) 5923 for (int i = 0; i < N; i++)
5547 if (a[i] < value) 5924 if (a[i] < value)
5548 last = a[i]; 5925 last = a[i];
5550 containing the loop indexes for which "a[i] < value" was true. In the 5927 containing the loop indexes for which "a[i] < value" was true. In the
5551 function epilogue this is reduced to a single max value and then used to 5928 function epilogue this is reduced to a single max value and then used to
5552 index into the vector of results. 5929 index into the vector of results.
5553 5930
5554 In some cases of reduction patterns, the type of the reduction variable X is 5931 In some cases of reduction patterns, the type of the reduction variable X is
5555 different than the type of the other arguments of STMT. 5932 different than the type of the other arguments of STMT_INFO.
5556 In such cases, the vectype that is used when transforming STMT into a vector 5933 In such cases, the vectype that is used when transforming STMT_INFO into
5557 stmt is different than the vectype that is used to determine the 5934 a vector stmt is different than the vectype that is used to determine the
5558 vectorization factor, because it consists of a different number of elements 5935 vectorization factor, because it consists of a different number of elements
5559 than the actual number of elements that are being operated upon in parallel. 5936 than the actual number of elements that are being operated upon in parallel.
5560 5937
5561 For example, consider an accumulation of shorts into an int accumulator. 5938 For example, consider an accumulation of shorts into an int accumulator.
5562 On some targets it's possible to vectorize this pattern operating on 8 5939 On some targets it's possible to vectorize this pattern operating on 8
5576 general), the following equation: 5953 general), the following equation:
5577 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X)) 5954 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X))
5578 does *NOT* necessarily hold for reduction patterns. */ 5955 does *NOT* necessarily hold for reduction patterns. */
5579 5956
5580 bool 5957 bool
5581 vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, 5958 vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi,
5582 gimple **vec_stmt, slp_tree slp_node, 5959 stmt_vec_info *vec_stmt, slp_tree slp_node,
5583 slp_instance slp_node_instance) 5960 slp_instance slp_node_instance,
5961 stmt_vector_for_cost *cost_vec)
5584 { 5962 {
5585 tree vec_dest; 5963 tree vec_dest;
5586 tree scalar_dest; 5964 tree scalar_dest;
5587 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
5588 tree vectype_out = STMT_VINFO_VECTYPE (stmt_info); 5965 tree vectype_out = STMT_VINFO_VECTYPE (stmt_info);
5589 tree vectype_in = NULL_TREE; 5966 tree vectype_in = NULL_TREE;
5590 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 5967 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
5591 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 5968 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
5592 enum tree_code code, orig_code, epilog_reduc_code; 5969 enum tree_code code, orig_code;
5970 internal_fn reduc_fn;
5593 machine_mode vec_mode; 5971 machine_mode vec_mode;
5594 int op_type; 5972 int op_type;
5595 optab optab, reduc_optab; 5973 optab optab;
5596 tree new_temp = NULL_TREE; 5974 tree new_temp = NULL_TREE;
5597 gimple *def_stmt;
5598 enum vect_def_type dt, cond_reduc_dt = vect_unknown_def_type; 5975 enum vect_def_type dt, cond_reduc_dt = vect_unknown_def_type;
5976 stmt_vec_info cond_stmt_vinfo = NULL;
5977 enum tree_code cond_reduc_op_code = ERROR_MARK;
5599 tree scalar_type; 5978 tree scalar_type;
5600 bool is_simple_use; 5979 bool is_simple_use;
5601 gimple *orig_stmt;
5602 stmt_vec_info orig_stmt_info = NULL;
5603 int i; 5980 int i;
5604 int ncopies; 5981 int ncopies;
5605 int epilog_copies; 5982 int epilog_copies;
5606 stmt_vec_info prev_stmt_info, prev_phi_info; 5983 stmt_vec_info prev_stmt_info, prev_phi_info;
5607 bool single_defuse_cycle = false; 5984 bool single_defuse_cycle = false;
5608 gimple *new_stmt = NULL; 5985 stmt_vec_info new_stmt_info = NULL;
5609 int j; 5986 int j;
5610 tree ops[3]; 5987 tree ops[3];
5611 enum vect_def_type dts[3]; 5988 enum vect_def_type dts[3];
5612 bool nested_cycle = false, found_nested_cycle_def = false; 5989 bool nested_cycle = false, found_nested_cycle_def = false;
5613 bool double_reduc = false; 5990 bool double_reduc = false;
5614 basic_block def_bb; 5991 basic_block def_bb;
5615 struct loop * def_stmt_loop, *outer_loop = NULL; 5992 struct loop * def_stmt_loop;
5616 tree def_arg; 5993 tree def_arg;
5617 gimple *def_arg_stmt;
5618 auto_vec<tree> vec_oprnds0; 5994 auto_vec<tree> vec_oprnds0;
5619 auto_vec<tree> vec_oprnds1; 5995 auto_vec<tree> vec_oprnds1;
5620 auto_vec<tree> vec_oprnds2; 5996 auto_vec<tree> vec_oprnds2;
5621 auto_vec<tree> vect_defs; 5997 auto_vec<tree> vect_defs;
5622 auto_vec<gimple *> phis; 5998 auto_vec<stmt_vec_info> phis;
5623 int vec_num; 5999 int vec_num;
5624 tree def0, tem; 6000 tree def0, tem;
5625 bool first_p = true;
5626 tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE; 6001 tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE;
5627 tree cond_reduc_val = NULL_TREE; 6002 tree cond_reduc_val = NULL_TREE;
5628 6003
5629 /* Make sure it was already recognized as a reduction computation. */ 6004 /* Make sure it was already recognized as a reduction computation. */
5630 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_reduction_def 6005 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def
5631 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_nested_cycle) 6006 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle)
5632 return false; 6007 return false;
5633 6008
5634 if (nested_in_vect_loop_p (loop, stmt)) 6009 if (nested_in_vect_loop_p (loop, stmt_info))
5635 { 6010 {
5636 outer_loop = loop;
5637 loop = loop->inner; 6011 loop = loop->inner;
5638 nested_cycle = true; 6012 nested_cycle = true;
5639 } 6013 }
5640 6014
5641 /* In case of reduction chain we switch to the first stmt in the chain, but 6015 if (REDUC_GROUP_FIRST_ELEMENT (stmt_info))
5642 we don't update STMT_INFO, since only the last stmt is marked as reduction 6016 gcc_assert (slp_node
5643 and has reduction properties. */ 6017 && REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info);
5644 if (GROUP_FIRST_ELEMENT (stmt_info) 6018
5645 && GROUP_FIRST_ELEMENT (stmt_info) != stmt) 6019 if (gphi *phi = dyn_cast <gphi *> (stmt_info->stmt))
5646 { 6020 {
5647 stmt = GROUP_FIRST_ELEMENT (stmt_info); 6021 tree phi_result = gimple_phi_result (phi);
5648 first_p = false;
5649 }
5650
5651 if (gimple_code (stmt) == GIMPLE_PHI)
5652 {
5653 /* Analysis is fully done on the reduction stmt invocation. */ 6022 /* Analysis is fully done on the reduction stmt invocation. */
5654 if (! vec_stmt) 6023 if (! vec_stmt)
5655 { 6024 {
5656 if (slp_node) 6025 if (slp_node)
5657 slp_node_instance->reduc_phis = slp_node; 6026 slp_node_instance->reduc_phis = slp_node;
5658 6027
5659 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type; 6028 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
5660 return true; 6029 return true;
5661 } 6030 }
5662 6031
5663 gimple *reduc_stmt = STMT_VINFO_REDUC_DEF (stmt_info); 6032 if (STMT_VINFO_REDUC_TYPE (stmt_info) == FOLD_LEFT_REDUCTION)
5664 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (reduc_stmt))) 6033 /* Leave the scalar phi in place. Note that checking
5665 reduc_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (reduc_stmt)); 6034 STMT_VINFO_VEC_REDUCTION_TYPE (as below) only works
5666 6035 for reductions involving a single statement. */
5667 gcc_assert (is_gimple_assign (reduc_stmt)); 6036 return true;
6037
6038 stmt_vec_info reduc_stmt_info = STMT_VINFO_REDUC_DEF (stmt_info);
6039 reduc_stmt_info = vect_stmt_to_vectorize (reduc_stmt_info);
6040
6041 if (STMT_VINFO_VEC_REDUCTION_TYPE (reduc_stmt_info)
6042 == EXTRACT_LAST_REDUCTION)
6043 /* Leave the scalar phi in place. */
6044 return true;
6045
6046 gassign *reduc_stmt = as_a <gassign *> (reduc_stmt_info->stmt);
5668 for (unsigned k = 1; k < gimple_num_ops (reduc_stmt); ++k) 6047 for (unsigned k = 1; k < gimple_num_ops (reduc_stmt); ++k)
5669 { 6048 {
5670 tree op = gimple_op (reduc_stmt, k); 6049 tree op = gimple_op (reduc_stmt, k);
5671 if (op == gimple_phi_result (stmt)) 6050 if (op == phi_result)
5672 continue; 6051 continue;
5673 if (k == 1 6052 if (k == 1
5674 && gimple_assign_rhs_code (reduc_stmt) == COND_EXPR) 6053 && gimple_assign_rhs_code (reduc_stmt) == COND_EXPR)
5675 continue; 6054 continue;
5676 tem = get_vectype_for_scalar_type (TREE_TYPE (op)); 6055 if (!vectype_in
5677 if (! vectype_in 6056 || (GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_in)))
5678 || TYPE_VECTOR_SUBPARTS (tem) < TYPE_VECTOR_SUBPARTS (vectype_in)) 6057 < GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (op)))))
5679 vectype_in = tem; 6058 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op));
5680 break; 6059 break;
5681 } 6060 }
5682 gcc_assert (vectype_in); 6061 gcc_assert (vectype_in);
5683 6062
5684 if (slp_node) 6063 if (slp_node)
5685 ncopies = 1; 6064 ncopies = 1;
5686 else 6065 else
5687 ncopies = vect_get_num_copies (loop_vinfo, vectype_in); 6066 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
5688 6067
5689 use_operand_p use_p; 6068 stmt_vec_info use_stmt_info;
5690 gimple *use_stmt;
5691 if (ncopies > 1 6069 if (ncopies > 1
5692 && (STMT_VINFO_RELEVANT (vinfo_for_stmt (reduc_stmt)) 6070 && STMT_VINFO_RELEVANT (reduc_stmt_info) <= vect_used_only_live
5693 <= vect_used_only_live) 6071 && (use_stmt_info = loop_vinfo->lookup_single_use (phi_result))
5694 && single_imm_use (gimple_phi_result (stmt), &use_p, &use_stmt) 6072 && vect_stmt_to_vectorize (use_stmt_info) == reduc_stmt_info)
5695 && (use_stmt == reduc_stmt
5696 || (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt))
5697 == reduc_stmt)))
5698 single_defuse_cycle = true; 6073 single_defuse_cycle = true;
5699 6074
5700 /* Create the destination vector */ 6075 /* Create the destination vector */
5701 scalar_dest = gimple_assign_lhs (reduc_stmt); 6076 scalar_dest = gimple_assign_lhs (reduc_stmt);
5702 vec_dest = vect_create_destination_var (scalar_dest, vectype_out); 6077 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
5703 6078
5704 if (slp_node) 6079 if (slp_node)
5705 /* The size vect_schedule_slp_instance computes is off for us. */ 6080 /* The size vect_schedule_slp_instance computes is off for us. */
5706 vec_num = ((LOOP_VINFO_VECT_FACTOR (loop_vinfo) 6081 vec_num = vect_get_num_vectors
5707 * SLP_TREE_SCALAR_STMTS (slp_node).length ()) 6082 (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
5708 / TYPE_VECTOR_SUBPARTS (vectype_in)); 6083 * SLP_TREE_SCALAR_STMTS (slp_node).length (),
6084 vectype_in);
5709 else 6085 else
5710 vec_num = 1; 6086 vec_num = 1;
5711 6087
5712 /* Generate the reduction PHIs upfront. */ 6088 /* Generate the reduction PHIs upfront. */
5713 prev_phi_info = NULL; 6089 prev_phi_info = NULL;
5718 for (i = 0; i < vec_num; i++) 6094 for (i = 0; i < vec_num; i++)
5719 { 6095 {
5720 /* Create the reduction-phi that defines the reduction 6096 /* Create the reduction-phi that defines the reduction
5721 operand. */ 6097 operand. */
5722 gimple *new_phi = create_phi_node (vec_dest, loop->header); 6098 gimple *new_phi = create_phi_node (vec_dest, loop->header);
5723 set_vinfo_for_stmt (new_phi, 6099 stmt_vec_info new_phi_info = loop_vinfo->add_stmt (new_phi);
5724 new_stmt_vec_info (new_phi, loop_vinfo));
5725 6100
5726 if (slp_node) 6101 if (slp_node)
5727 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_phi); 6102 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_phi_info);
5728 else 6103 else
5729 { 6104 {
5730 if (j == 0) 6105 if (j == 0)
5731 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_phi; 6106 STMT_VINFO_VEC_STMT (stmt_info)
6107 = *vec_stmt = new_phi_info;
5732 else 6108 else
5733 STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi; 6109 STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi_info;
5734 prev_phi_info = vinfo_for_stmt (new_phi); 6110 prev_phi_info = new_phi_info;
5735 } 6111 }
5736 } 6112 }
5737 } 6113 }
5738 } 6114 }
5739 6115
5742 6118
5743 /* 1. Is vectorizable reduction? */ 6119 /* 1. Is vectorizable reduction? */
5744 /* Not supportable if the reduction variable is used in the loop, unless 6120 /* Not supportable if the reduction variable is used in the loop, unless
5745 it's a reduction chain. */ 6121 it's a reduction chain. */
5746 if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer 6122 if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer
5747 && !GROUP_FIRST_ELEMENT (stmt_info)) 6123 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
5748 return false; 6124 return false;
5749 6125
5750 /* Reductions that are not used even in an enclosing outer-loop, 6126 /* Reductions that are not used even in an enclosing outer-loop,
5751 are expected to be "live" (used out of the loop). */ 6127 are expected to be "live" (used out of the loop). */
5752 if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope 6128 if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope
5758 Check if STMT represents a pattern that has been recognized 6134 Check if STMT represents a pattern that has been recognized
5759 in earlier analysis stages. For stmts that represent a pattern, 6135 in earlier analysis stages. For stmts that represent a pattern,
5760 the STMT_VINFO_RELATED_STMT field records the last stmt in 6136 the STMT_VINFO_RELATED_STMT field records the last stmt in
5761 the original sequence that constitutes the pattern. */ 6137 the original sequence that constitutes the pattern. */
5762 6138
5763 orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)); 6139 stmt_vec_info orig_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info);
5764 if (orig_stmt) 6140 if (orig_stmt_info)
5765 { 6141 {
5766 orig_stmt_info = vinfo_for_stmt (orig_stmt);
5767 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info)); 6142 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info));
5768 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info)); 6143 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info));
5769 } 6144 }
5770 6145
5771 /* 3. Check the operands of the operation. The first operands are defined 6146 /* 3. Check the operands of the operation. The first operands are defined
5772 inside the loop body. The last operand is the reduction variable, 6147 inside the loop body. The last operand is the reduction variable,
5773 which is defined by the loop-header-phi. */ 6148 which is defined by the loop-header-phi. */
5774 6149
5775 gcc_assert (is_gimple_assign (stmt)); 6150 gassign *stmt = as_a <gassign *> (stmt_info->stmt);
5776 6151
5777 /* Flatten RHS. */ 6152 /* Flatten RHS. */
5778 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))) 6153 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt)))
5779 { 6154 {
5780 case GIMPLE_BINARY_RHS: 6155 case GIMPLE_BINARY_RHS:
5816 6191
5817 /* All uses but the last are expected to be defined in the loop. 6192 /* All uses but the last are expected to be defined in the loop.
5818 The last use is the reduction variable. In case of nested cycle this 6193 The last use is the reduction variable. In case of nested cycle this
5819 assumption is not true: we use reduc_index to record the index of the 6194 assumption is not true: we use reduc_index to record the index of the
5820 reduction variable. */ 6195 reduction variable. */
5821 gimple *reduc_def_stmt = NULL; 6196 stmt_vec_info reduc_def_info = NULL;
5822 int reduc_index = -1; 6197 int reduc_index = -1;
5823 for (i = 0; i < op_type; i++) 6198 for (i = 0; i < op_type; i++)
5824 { 6199 {
5825 /* The condition of COND_EXPR is checked in vectorizable_condition(). */ 6200 /* The condition of COND_EXPR is checked in vectorizable_condition(). */
5826 if (i == 0 && code == COND_EXPR) 6201 if (i == 0 && code == COND_EXPR)
5827 continue; 6202 continue;
5828 6203
5829 is_simple_use = vect_is_simple_use (ops[i], loop_vinfo, 6204 stmt_vec_info def_stmt_info;
5830 &def_stmt, &dts[i], &tem); 6205 is_simple_use = vect_is_simple_use (ops[i], loop_vinfo, &dts[i], &tem,
6206 &def_stmt_info);
5831 dt = dts[i]; 6207 dt = dts[i];
5832 gcc_assert (is_simple_use); 6208 gcc_assert (is_simple_use);
5833 if (dt == vect_reduction_def) 6209 if (dt == vect_reduction_def)
5834 { 6210 {
5835 reduc_def_stmt = def_stmt; 6211 reduc_def_info = def_stmt_info;
5836 reduc_index = i; 6212 reduc_index = i;
5837 continue; 6213 continue;
5838 } 6214 }
5839 else if (tem) 6215 else if (tem)
5840 { 6216 {
5841 /* To properly compute ncopies we are interested in the widest 6217 /* To properly compute ncopies we are interested in the widest
5842 input type in case we're looking at a widening accumulation. */ 6218 input type in case we're looking at a widening accumulation. */
5843 if (!vectype_in 6219 if (!vectype_in
5844 || TYPE_VECTOR_SUBPARTS (vectype_in) > TYPE_VECTOR_SUBPARTS (tem)) 6220 || (GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_in)))
6221 < GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (tem)))))
5845 vectype_in = tem; 6222 vectype_in = tem;
5846 } 6223 }
5847 6224
5848 if (dt != vect_internal_def 6225 if (dt != vect_internal_def
5849 && dt != vect_external_def 6226 && dt != vect_external_def
5851 && dt != vect_induction_def 6228 && dt != vect_induction_def
5852 && !(dt == vect_nested_cycle && nested_cycle)) 6229 && !(dt == vect_nested_cycle && nested_cycle))
5853 return false; 6230 return false;
5854 6231
5855 if (dt == vect_nested_cycle) 6232 if (dt == vect_nested_cycle)
5856 { 6233 {
5857 found_nested_cycle_def = true; 6234 found_nested_cycle_def = true;
5858 reduc_def_stmt = def_stmt; 6235 reduc_def_info = def_stmt_info;
5859 reduc_index = i; 6236 reduc_index = i;
5860 } 6237 }
5861 6238
5862 if (i == 1 && code == COND_EXPR) 6239 if (i == 1 && code == COND_EXPR)
5863 { 6240 {
5864 /* Record how value of COND_EXPR is defined. */ 6241 /* Record how value of COND_EXPR is defined. */
5865 if (dt == vect_constant_def) 6242 if (dt == vect_constant_def)
5866 { 6243 {
5867 cond_reduc_dt = dt; 6244 cond_reduc_dt = dt;
5868 cond_reduc_val = ops[i]; 6245 cond_reduc_val = ops[i];
5869 } 6246 }
5870 if (dt == vect_induction_def && def_stmt != NULL 6247 if (dt == vect_induction_def
5871 && is_nonwrapping_integer_induction (def_stmt, loop)) 6248 && def_stmt_info
5872 cond_reduc_dt = dt; 6249 && is_nonwrapping_integer_induction (def_stmt_info, loop))
6250 {
6251 cond_reduc_dt = dt;
6252 cond_stmt_vinfo = def_stmt_info;
6253 }
5873 } 6254 }
5874 } 6255 }
5875 6256
5876 if (!vectype_in) 6257 if (!vectype_in)
5877 vectype_in = vectype_out; 6258 vectype_in = vectype_out;
5878 6259
5879 /* When vectorizing a reduction chain w/o SLP the reduction PHI is not 6260 /* When vectorizing a reduction chain w/o SLP the reduction PHI is not
5880 directy used in stmt. */ 6261 directy used in stmt. */
5881 if (reduc_index == -1) 6262 if (reduc_index == -1)
5882 { 6263 {
5883 if (orig_stmt) 6264 if (STMT_VINFO_REDUC_TYPE (stmt_info) == FOLD_LEFT_REDUCTION)
5884 reduc_def_stmt = STMT_VINFO_REDUC_DEF (orig_stmt_info); 6265 {
6266 if (dump_enabled_p ())
6267 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6268 "in-order reduction chain without SLP.\n");
6269 return false;
6270 }
6271
6272 if (orig_stmt_info)
6273 reduc_def_info = STMT_VINFO_REDUC_DEF (orig_stmt_info);
5885 else 6274 else
5886 reduc_def_stmt = STMT_VINFO_REDUC_DEF (stmt_info); 6275 reduc_def_info = STMT_VINFO_REDUC_DEF (stmt_info);
5887 } 6276 }
5888 6277
5889 if (! reduc_def_stmt || gimple_code (reduc_def_stmt) != GIMPLE_PHI) 6278 if (! reduc_def_info)
6279 return false;
6280
6281 gphi *reduc_def_phi = dyn_cast <gphi *> (reduc_def_info->stmt);
6282 if (!reduc_def_phi)
5890 return false; 6283 return false;
5891 6284
5892 if (!(reduc_index == -1 6285 if (!(reduc_index == -1
5893 || dts[reduc_index] == vect_reduction_def 6286 || dts[reduc_index] == vect_reduction_def
5894 || dts[reduc_index] == vect_nested_cycle 6287 || dts[reduc_index] == vect_nested_cycle
5899 && nested_cycle && found_nested_cycle_def))) 6292 && nested_cycle && found_nested_cycle_def)))
5900 { 6293 {
5901 /* For pattern recognized stmts, orig_stmt might be a reduction, 6294 /* For pattern recognized stmts, orig_stmt might be a reduction,
5902 but some helper statements for the pattern might not, or 6295 but some helper statements for the pattern might not, or
5903 might be COND_EXPRs with reduction uses in the condition. */ 6296 might be COND_EXPRs with reduction uses in the condition. */
5904 gcc_assert (orig_stmt); 6297 gcc_assert (orig_stmt_info);
5905 return false; 6298 return false;
5906 } 6299 }
5907 6300
5908 stmt_vec_info reduc_def_info = vinfo_for_stmt (reduc_def_stmt); 6301 /* PHIs should not participate in patterns. */
6302 gcc_assert (!STMT_VINFO_RELATED_STMT (reduc_def_info));
5909 enum vect_reduction_type v_reduc_type 6303 enum vect_reduction_type v_reduc_type
5910 = STMT_VINFO_REDUC_TYPE (reduc_def_info); 6304 = STMT_VINFO_REDUC_TYPE (reduc_def_info);
5911 gimple *tmp = STMT_VINFO_REDUC_DEF (reduc_def_info); 6305 stmt_vec_info tmp = STMT_VINFO_REDUC_DEF (reduc_def_info);
5912 6306
5913 STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = v_reduc_type; 6307 STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = v_reduc_type;
5914 /* If we have a condition reduction, see if we can simplify it further. */ 6308 /* If we have a condition reduction, see if we can simplify it further. */
5915 if (v_reduc_type == COND_REDUCTION) 6309 if (v_reduc_type == COND_REDUCTION)
5916 { 6310 {
5917 if (cond_reduc_dt == vect_induction_def) 6311 /* TODO: We can't yet handle reduction chains, since we need to treat
6312 each COND_EXPR in the chain specially, not just the last one.
6313 E.g. for:
6314
6315 x_1 = PHI <x_3, ...>
6316 x_2 = a_2 ? ... : x_1;
6317 x_3 = a_3 ? ... : x_2;
6318
6319 we're interested in the last element in x_3 for which a_2 || a_3
6320 is true, whereas the current reduction chain handling would
6321 vectorize x_2 as a normal VEC_COND_EXPR and only treat x_3
6322 as a reduction operation. */
6323 if (reduc_index == -1)
5918 { 6324 {
5919 if (dump_enabled_p ()) 6325 if (dump_enabled_p ())
5920 dump_printf_loc (MSG_NOTE, vect_location, 6326 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
5921 "condition expression based on " 6327 "conditional reduction chains not supported\n");
5922 "integer induction.\n"); 6328 return false;
5923 STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) 6329 }
5924 = INTEGER_INDUC_COND_REDUCTION; 6330
5925 } 6331 /* vect_is_simple_reduction ensured that operand 2 is the
6332 loop-carried operand. */
6333 gcc_assert (reduc_index == 2);
5926 6334
5927 /* Loop peeling modifies initial value of reduction PHI, which 6335 /* Loop peeling modifies initial value of reduction PHI, which
5928 makes the reduction stmt to be transformed different to the 6336 makes the reduction stmt to be transformed different to the
5929 original stmt analyzed. We need to record reduction code for 6337 original stmt analyzed. We need to record reduction code for
5930 CONST_COND_REDUCTION type reduction at analyzing stage, thus 6338 CONST_COND_REDUCTION type reduction at analyzing stage, thus
5934 { 6342 {
5935 /* Also set the reduction type to CONST_COND_REDUCTION. */ 6343 /* Also set the reduction type to CONST_COND_REDUCTION. */
5936 gcc_assert (cond_reduc_dt == vect_constant_def); 6344 gcc_assert (cond_reduc_dt == vect_constant_def);
5937 STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = CONST_COND_REDUCTION; 6345 STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = CONST_COND_REDUCTION;
5938 } 6346 }
6347 else if (direct_internal_fn_supported_p (IFN_FOLD_EXTRACT_LAST,
6348 vectype_in, OPTIMIZE_FOR_SPEED))
6349 {
6350 if (dump_enabled_p ())
6351 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6352 "optimizing condition reduction with"
6353 " FOLD_EXTRACT_LAST.\n");
6354 STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = EXTRACT_LAST_REDUCTION;
6355 }
6356 else if (cond_reduc_dt == vect_induction_def)
6357 {
6358 tree base
6359 = STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (cond_stmt_vinfo);
6360 tree step = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (cond_stmt_vinfo);
6361
6362 gcc_assert (TREE_CODE (base) == INTEGER_CST
6363 && TREE_CODE (step) == INTEGER_CST);
6364 cond_reduc_val = NULL_TREE;
6365 /* Find a suitable value, for MAX_EXPR below base, for MIN_EXPR
6366 above base; punt if base is the minimum value of the type for
6367 MAX_EXPR or maximum value of the type for MIN_EXPR for now. */
6368 if (tree_int_cst_sgn (step) == -1)
6369 {
6370 cond_reduc_op_code = MIN_EXPR;
6371 if (tree_int_cst_sgn (base) == -1)
6372 cond_reduc_val = build_int_cst (TREE_TYPE (base), 0);
6373 else if (tree_int_cst_lt (base,
6374 TYPE_MAX_VALUE (TREE_TYPE (base))))
6375 cond_reduc_val
6376 = int_const_binop (PLUS_EXPR, base, integer_one_node);
6377 }
6378 else
6379 {
6380 cond_reduc_op_code = MAX_EXPR;
6381 if (tree_int_cst_sgn (base) == 1)
6382 cond_reduc_val = build_int_cst (TREE_TYPE (base), 0);
6383 else if (tree_int_cst_lt (TYPE_MIN_VALUE (TREE_TYPE (base)),
6384 base))
6385 cond_reduc_val
6386 = int_const_binop (MINUS_EXPR, base, integer_one_node);
6387 }
6388 if (cond_reduc_val)
6389 {
6390 if (dump_enabled_p ())
6391 dump_printf_loc (MSG_NOTE, vect_location,
6392 "condition expression based on "
6393 "integer induction.\n");
6394 STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info)
6395 = INTEGER_INDUC_COND_REDUCTION;
6396 }
6397 }
5939 else if (cond_reduc_dt == vect_constant_def) 6398 else if (cond_reduc_dt == vect_constant_def)
5940 { 6399 {
5941 enum vect_def_type cond_initial_dt; 6400 enum vect_def_type cond_initial_dt;
5942 gimple *def_stmt = SSA_NAME_DEF_STMT (ops[reduc_index]); 6401 gimple *def_stmt = SSA_NAME_DEF_STMT (ops[reduc_index]);
5943 tree cond_initial_val 6402 tree cond_initial_val
5944 = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop)); 6403 = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop));
5945 6404
5946 gcc_assert (cond_reduc_val != NULL_TREE); 6405 gcc_assert (cond_reduc_val != NULL_TREE);
5947 vect_is_simple_use (cond_initial_val, loop_vinfo, 6406 vect_is_simple_use (cond_initial_val, loop_vinfo, &cond_initial_dt);
5948 &def_stmt, &cond_initial_dt);
5949 if (cond_initial_dt == vect_constant_def 6407 if (cond_initial_dt == vect_constant_def
5950 && types_compatible_p (TREE_TYPE (cond_initial_val), 6408 && types_compatible_p (TREE_TYPE (cond_initial_val),
5951 TREE_TYPE (cond_reduc_val))) 6409 TREE_TYPE (cond_reduc_val)))
5952 { 6410 {
5953 tree e = fold_binary (LE_EXPR, boolean_type_node, 6411 tree e = fold_binary (LE_EXPR, boolean_type_node,
5966 } 6424 }
5967 } 6425 }
5968 } 6426 }
5969 } 6427 }
5970 6428
5971 if (orig_stmt) 6429 if (orig_stmt_info)
5972 gcc_assert (tmp == orig_stmt 6430 gcc_assert (tmp == orig_stmt_info
5973 || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == orig_stmt); 6431 || REDUC_GROUP_FIRST_ELEMENT (tmp) == orig_stmt_info);
5974 else 6432 else
5975 /* We changed STMT to be the first stmt in reduction chain, hence we 6433 /* We changed STMT to be the first stmt in reduction chain, hence we
5976 check that in this case the first element in the chain is STMT. */ 6434 check that in this case the first element in the chain is STMT. */
5977 gcc_assert (stmt == tmp 6435 gcc_assert (tmp == stmt_info
5978 || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt); 6436 || REDUC_GROUP_FIRST_ELEMENT (tmp) == stmt_info);
5979 6437
5980 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt))) 6438 if (STMT_VINFO_LIVE_P (reduc_def_info))
5981 return false; 6439 return false;
5982 6440
5983 if (slp_node) 6441 if (slp_node)
5984 ncopies = 1; 6442 ncopies = 1;
5985 else 6443 else
5986 ncopies = vect_get_num_copies (loop_vinfo, vectype_in); 6444 ncopies = vect_get_num_copies (loop_vinfo, vectype_in);
5987 6445
5988 gcc_assert (ncopies >= 1); 6446 gcc_assert (ncopies >= 1);
5989 6447
5990 vec_mode = TYPE_MODE (vectype_in); 6448 vec_mode = TYPE_MODE (vectype_in);
6449 poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out);
5991 6450
5992 if (code == COND_EXPR) 6451 if (code == COND_EXPR)
5993 { 6452 {
5994 /* Only call during the analysis stage, otherwise we'll lose 6453 /* Only call during the analysis stage, otherwise we'll lose
5995 STMT_VINFO_TYPE. */ 6454 STMT_VINFO_TYPE. */
5996 if (!vec_stmt && !vectorizable_condition (stmt, gsi, NULL, 6455 if (!vec_stmt && !vectorizable_condition (stmt_info, gsi, NULL,
5997 ops[reduc_index], 0, NULL)) 6456 ops[reduc_index], 0, NULL,
6457 cost_vec))
5998 { 6458 {
5999 if (dump_enabled_p ()) 6459 if (dump_enabled_p ())
6000 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6460 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6001 "unsupported condition in reduction\n"); 6461 "unsupported condition in reduction\n");
6002 return false; 6462 return false;
6031 if (optab_handler (optab, vec_mode) == CODE_FOR_nothing) 6491 if (optab_handler (optab, vec_mode) == CODE_FOR_nothing)
6032 { 6492 {
6033 if (dump_enabled_p ()) 6493 if (dump_enabled_p ())
6034 dump_printf (MSG_NOTE, "op not supported by target.\n"); 6494 dump_printf (MSG_NOTE, "op not supported by target.\n");
6035 6495
6036 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD 6496 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD)
6037 || !vect_worthwhile_without_simd_p (loop_vinfo, code)) 6497 || !vect_worthwhile_without_simd_p (loop_vinfo, code))
6038 return false; 6498 return false;
6039 6499
6040 if (dump_enabled_p ()) 6500 if (dump_enabled_p ())
6041 dump_printf (MSG_NOTE, "proceeding using word mode.\n"); 6501 dump_printf (MSG_NOTE, "proceeding using word mode.\n");
6086 the arguments are the same as the type of the reduction variable. 6546 the arguments are the same as the type of the reduction variable.
6087 For "regular" reductions we can therefore use the same vector type 6547 For "regular" reductions we can therefore use the same vector type
6088 (and also the same tree-code) when generating the epilog code and 6548 (and also the same tree-code) when generating the epilog code and
6089 when generating the code inside the loop. */ 6549 when generating the code inside the loop. */
6090 6550
6091 if (orig_stmt) 6551 vect_reduction_type reduction_type
6552 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info);
6553 if (orig_stmt_info
6554 && (reduction_type == TREE_CODE_REDUCTION
6555 || reduction_type == FOLD_LEFT_REDUCTION))
6092 { 6556 {
6093 /* This is a reduction pattern: get the vectype from the type of the 6557 /* This is a reduction pattern: get the vectype from the type of the
6094 reduction variable, and get the tree-code from orig_stmt. */ 6558 reduction variable, and get the tree-code from orig_stmt. */
6095 gcc_assert (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) 6559 orig_code = gimple_assign_rhs_code (orig_stmt_info->stmt);
6096 == TREE_CODE_REDUCTION);
6097 orig_code = gimple_assign_rhs_code (orig_stmt);
6098 gcc_assert (vectype_out); 6560 gcc_assert (vectype_out);
6099 vec_mode = TYPE_MODE (vectype_out); 6561 vec_mode = TYPE_MODE (vectype_out);
6100 } 6562 }
6101 else 6563 else
6102 { 6564 {
6107 if (code == MINUS_EXPR) 6569 if (code == MINUS_EXPR)
6108 orig_code = PLUS_EXPR; 6570 orig_code = PLUS_EXPR;
6109 6571
6110 /* For simple condition reductions, replace with the actual expression 6572 /* For simple condition reductions, replace with the actual expression
6111 we want to base our reduction around. */ 6573 we want to base our reduction around. */
6112 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == CONST_COND_REDUCTION) 6574 if (reduction_type == CONST_COND_REDUCTION)
6113 { 6575 {
6114 orig_code = STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info); 6576 orig_code = STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info);
6115 gcc_assert (orig_code == MAX_EXPR || orig_code == MIN_EXPR); 6577 gcc_assert (orig_code == MAX_EXPR || orig_code == MIN_EXPR);
6116 } 6578 }
6117 else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) 6579 else if (reduction_type == INTEGER_INDUC_COND_REDUCTION)
6118 == INTEGER_INDUC_COND_REDUCTION) 6580 orig_code = cond_reduc_op_code;
6119 orig_code = MAX_EXPR;
6120 } 6581 }
6121 6582
6122 if (nested_cycle) 6583 if (nested_cycle)
6123 { 6584 {
6124 def_bb = gimple_bb (reduc_def_stmt); 6585 def_bb = gimple_bb (reduc_def_phi);
6125 def_stmt_loop = def_bb->loop_father; 6586 def_stmt_loop = def_bb->loop_father;
6126 def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt, 6587 def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi,
6127 loop_preheader_edge (def_stmt_loop)); 6588 loop_preheader_edge (def_stmt_loop));
6128 if (TREE_CODE (def_arg) == SSA_NAME 6589 stmt_vec_info def_arg_stmt_info = loop_vinfo->lookup_def (def_arg);
6129 && (def_arg_stmt = SSA_NAME_DEF_STMT (def_arg)) 6590 if (def_arg_stmt_info
6130 && gimple_code (def_arg_stmt) == GIMPLE_PHI 6591 && (STMT_VINFO_DEF_TYPE (def_arg_stmt_info)
6131 && flow_bb_inside_loop_p (outer_loop, gimple_bb (def_arg_stmt)) 6592 == vect_double_reduction_def))
6132 && vinfo_for_stmt (def_arg_stmt)
6133 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_arg_stmt))
6134 == vect_double_reduction_def)
6135 double_reduc = true; 6593 double_reduc = true;
6136 } 6594 }
6137 6595
6138 epilog_reduc_code = ERROR_MARK; 6596 reduc_fn = IFN_LAST;
6139 6597
6140 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) != COND_REDUCTION) 6598 if (reduction_type == TREE_CODE_REDUCTION
6141 { 6599 || reduction_type == FOLD_LEFT_REDUCTION
6142 if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code)) 6600 || reduction_type == INTEGER_INDUC_COND_REDUCTION
6143 { 6601 || reduction_type == CONST_COND_REDUCTION)
6144 reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype_out, 6602 {
6145 optab_default); 6603 if (reduction_type == FOLD_LEFT_REDUCTION
6146 if (!reduc_optab) 6604 ? fold_left_reduction_fn (orig_code, &reduc_fn)
6147 { 6605 : reduction_fn_for_scalar_code (orig_code, &reduc_fn))
6148 if (dump_enabled_p ()) 6606 {
6149 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6607 if (reduc_fn != IFN_LAST
6150 "no optab for reduction.\n"); 6608 && !direct_internal_fn_supported_p (reduc_fn, vectype_out,
6151 6609 OPTIMIZE_FOR_SPEED))
6152 epilog_reduc_code = ERROR_MARK;
6153 }
6154 else if (optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing)
6155 { 6610 {
6156 if (dump_enabled_p ()) 6611 if (dump_enabled_p ())
6157 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6612 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6158 "reduc op not supported by target.\n"); 6613 "reduc op not supported by target.\n");
6159 6614
6160 epilog_reduc_code = ERROR_MARK; 6615 reduc_fn = IFN_LAST;
6161 } 6616 }
6162 } 6617 }
6163 else 6618 else
6164 { 6619 {
6165 if (!nested_cycle || double_reduc) 6620 if (!nested_cycle || double_reduc)
6170 6625
6171 return false; 6626 return false;
6172 } 6627 }
6173 } 6628 }
6174 } 6629 }
6175 else 6630 else if (reduction_type == COND_REDUCTION)
6176 { 6631 {
6177 int scalar_precision 6632 int scalar_precision
6178 = GET_MODE_PRECISION (SCALAR_TYPE_MODE (scalar_type)); 6633 = GET_MODE_PRECISION (SCALAR_TYPE_MODE (scalar_type));
6179 cr_index_scalar_type = make_unsigned_type (scalar_precision); 6634 cr_index_scalar_type = make_unsigned_type (scalar_precision);
6180 cr_index_vector_type = build_vector_type 6635 cr_index_vector_type = build_vector_type (cr_index_scalar_type,
6181 (cr_index_scalar_type, TYPE_VECTOR_SUBPARTS (vectype_out)); 6636 nunits_out);
6182 6637
6183 optab = optab_for_tree_code (REDUC_MAX_EXPR, cr_index_vector_type, 6638 if (direct_internal_fn_supported_p (IFN_REDUC_MAX, cr_index_vector_type,
6184 optab_default); 6639 OPTIMIZE_FOR_SPEED))
6185 if (optab_handler (optab, TYPE_MODE (cr_index_vector_type)) 6640 reduc_fn = IFN_REDUC_MAX;
6186 != CODE_FOR_nothing) 6641 }
6187 epilog_reduc_code = REDUC_MAX_EXPR; 6642
6188 } 6643 if (reduction_type != EXTRACT_LAST_REDUCTION
6189 6644 && (!nested_cycle || double_reduc)
6190 if ((double_reduc 6645 && reduc_fn == IFN_LAST
6191 || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) != TREE_CODE_REDUCTION) 6646 && !nunits_out.is_constant ())
6647 {
6648 if (dump_enabled_p ())
6649 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6650 "missing target support for reduction on"
6651 " variable-length vectors.\n");
6652 return false;
6653 }
6654
6655 if ((double_reduc || reduction_type != TREE_CODE_REDUCTION)
6192 && ncopies > 1) 6656 && ncopies > 1)
6193 { 6657 {
6194 if (dump_enabled_p ()) 6658 if (dump_enabled_p ())
6195 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 6659 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6196 "multiple types in double reduction or condition " 6660 "multiple types in double reduction or condition "
6197 "reduction.\n"); 6661 "reduction.\n");
6198 return false; 6662 return false;
6663 }
6664
6665 /* For SLP reductions, see if there is a neutral value we can use. */
6666 tree neutral_op = NULL_TREE;
6667 if (slp_node)
6668 neutral_op = neutral_op_for_slp_reduction
6669 (slp_node_instance->reduc_phis, code,
6670 REDUC_GROUP_FIRST_ELEMENT (stmt_info) != NULL);
6671
6672 if (double_reduc && reduction_type == FOLD_LEFT_REDUCTION)
6673 {
6674 /* We can't support in-order reductions of code such as this:
6675
6676 for (int i = 0; i < n1; ++i)
6677 for (int j = 0; j < n2; ++j)
6678 l += a[j];
6679
6680 since GCC effectively transforms the loop when vectorizing:
6681
6682 for (int i = 0; i < n1 / VF; ++i)
6683 for (int j = 0; j < n2; ++j)
6684 for (int k = 0; k < VF; ++k)
6685 l += a[j];
6686
6687 which is a reassociation of the original operation. */
6688 if (dump_enabled_p ())
6689 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6690 "in-order double reduction not supported.\n");
6691
6692 return false;
6693 }
6694
6695 if (reduction_type == FOLD_LEFT_REDUCTION
6696 && slp_node
6697 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info))
6698 {
6699 /* We cannot use in-order reductions in this case because there is
6700 an implicit reassociation of the operations involved. */
6701 if (dump_enabled_p ())
6702 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6703 "in-order unchained SLP reductions not supported.\n");
6704 return false;
6705 }
6706
6707 /* For double reductions, and for SLP reductions with a neutral value,
6708 we construct a variable-length initial vector by loading a vector
6709 full of the neutral value and then shift-and-inserting the start
6710 values into the low-numbered elements. */
6711 if ((double_reduc || neutral_op)
6712 && !nunits_out.is_constant ()
6713 && !direct_internal_fn_supported_p (IFN_VEC_SHL_INSERT,
6714 vectype_out, OPTIMIZE_FOR_SPEED))
6715 {
6716 if (dump_enabled_p ())
6717 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6718 "reduction on variable-length vectors requires"
6719 " target support for a vector-shift-and-insert"
6720 " operation.\n");
6721 return false;
6722 }
6723
6724 /* Check extra constraints for variable-length unchained SLP reductions. */
6725 if (STMT_SLP_TYPE (stmt_info)
6726 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info)
6727 && !nunits_out.is_constant ())
6728 {
6729 /* We checked above that we could build the initial vector when
6730 there's a neutral element value. Check here for the case in
6731 which each SLP statement has its own initial value and in which
6732 that value needs to be repeated for every instance of the
6733 statement within the initial vector. */
6734 unsigned int group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
6735 scalar_mode elt_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype_out));
6736 if (!neutral_op
6737 && !can_duplicate_and_interleave_p (group_size, elt_mode))
6738 {
6739 if (dump_enabled_p ())
6740 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6741 "unsupported form of SLP reduction for"
6742 " variable-length vectors: cannot build"
6743 " initial vector.\n");
6744 return false;
6745 }
6746 /* The epilogue code relies on the number of elements being a multiple
6747 of the group size. The duplicate-and-interleave approach to setting
6748 up the the initial vector does too. */
6749 if (!multiple_p (nunits_out, group_size))
6750 {
6751 if (dump_enabled_p ())
6752 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6753 "unsupported form of SLP reduction for"
6754 " variable-length vectors: the vector size"
6755 " is not a multiple of the number of results.\n");
6756 return false;
6757 }
6199 } 6758 }
6200 6759
6201 /* In case of widenning multiplication by a constant, we update the type 6760 /* In case of widenning multiplication by a constant, we update the type
6202 of the constant to be the type of the other operand. We check that the 6761 of the constant to be the type of the other operand. We check that the
6203 constant fits the type in the pattern recognition pass. */ 6762 constant fits the type in the pattern recognition pass. */
6216 6775
6217 return false; 6776 return false;
6218 } 6777 }
6219 } 6778 }
6220 6779
6221 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION) 6780 if (reduction_type == COND_REDUCTION)
6222 { 6781 {
6223 widest_int ni; 6782 widest_int ni;
6224 6783
6225 if (! max_loop_iterations (loop, &ni)) 6784 if (! max_loop_iterations (loop, &ni))
6226 { 6785 {
6273 from the vectorized reduction operation generated in the previous iteration. 6832 from the vectorized reduction operation generated in the previous iteration.
6274 6833
6275 This only works when we see both the reduction PHI and its only consumer 6834 This only works when we see both the reduction PHI and its only consumer
6276 in vectorizable_reduction and there are no intermediate stmts 6835 in vectorizable_reduction and there are no intermediate stmts
6277 participating. */ 6836 participating. */
6278 use_operand_p use_p; 6837 stmt_vec_info use_stmt_info;
6279 gimple *use_stmt; 6838 tree reduc_phi_result = gimple_phi_result (reduc_def_phi);
6280 if (ncopies > 1 6839 if (ncopies > 1
6281 && (STMT_VINFO_RELEVANT (stmt_info) <= vect_used_only_live) 6840 && (STMT_VINFO_RELEVANT (stmt_info) <= vect_used_only_live)
6282 && single_imm_use (gimple_phi_result (reduc_def_stmt), &use_p, &use_stmt) 6841 && (use_stmt_info = loop_vinfo->lookup_single_use (reduc_phi_result))
6283 && (use_stmt == stmt 6842 && vect_stmt_to_vectorize (use_stmt_info) == stmt_info)
6284 || STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) == stmt))
6285 { 6843 {
6286 single_defuse_cycle = true; 6844 single_defuse_cycle = true;
6287 epilog_copies = 1; 6845 epilog_copies = 1;
6288 } 6846 }
6289 else 6847 else
6302 "multi def-use cycle not possible for lane-reducing " 6860 "multi def-use cycle not possible for lane-reducing "
6303 "reduction operation\n"); 6861 "reduction operation\n");
6304 return false; 6862 return false;
6305 } 6863 }
6306 6864
6865 if (slp_node)
6866 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6867 else
6868 vec_num = 1;
6869
6870 internal_fn cond_fn = get_conditional_internal_fn (code);
6871 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
6872
6307 if (!vec_stmt) /* transformation not required. */ 6873 if (!vec_stmt) /* transformation not required. */
6308 { 6874 {
6309 if (first_p) 6875 vect_model_reduction_cost (stmt_info, reduc_fn, ncopies, cost_vec);
6310 vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies); 6876 if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
6877 {
6878 if (reduction_type != FOLD_LEFT_REDUCTION
6879 && (cond_fn == IFN_LAST
6880 || !direct_internal_fn_supported_p (cond_fn, vectype_in,
6881 OPTIMIZE_FOR_SPEED)))
6882 {
6883 if (dump_enabled_p ())
6884 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6885 "can't use a fully-masked loop because no"
6886 " conditional operation is available.\n");
6887 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
6888 }
6889 else if (reduc_index == -1)
6890 {
6891 if (dump_enabled_p ())
6892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6893 "can't use a fully-masked loop for chained"
6894 " reductions.\n");
6895 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
6896 }
6897 else
6898 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num,
6899 vectype_in);
6900 }
6901 if (dump_enabled_p ()
6902 && reduction_type == FOLD_LEFT_REDUCTION)
6903 dump_printf_loc (MSG_NOTE, vect_location,
6904 "using an in-order (fold-left) reduction.\n");
6311 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type; 6905 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type;
6312 return true; 6906 return true;
6313 } 6907 }
6314 6908
6315 /* Transform. */ 6909 /* Transform. */
6319 6913
6320 /* FORNOW: Multiple types are not supported for condition. */ 6914 /* FORNOW: Multiple types are not supported for condition. */
6321 if (code == COND_EXPR) 6915 if (code == COND_EXPR)
6322 gcc_assert (ncopies == 1); 6916 gcc_assert (ncopies == 1);
6323 6917
6918 bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
6919
6920 if (reduction_type == FOLD_LEFT_REDUCTION)
6921 return vectorize_fold_left_reduction
6922 (stmt_info, gsi, vec_stmt, slp_node, reduc_def_phi, code,
6923 reduc_fn, ops, vectype_in, reduc_index, masks);
6924
6925 if (reduction_type == EXTRACT_LAST_REDUCTION)
6926 {
6927 gcc_assert (!slp_node);
6928 return vectorizable_condition (stmt_info, gsi, vec_stmt,
6929 NULL, reduc_index, NULL, NULL);
6930 }
6931
6324 /* Create the destination vector */ 6932 /* Create the destination vector */
6325 vec_dest = vect_create_destination_var (scalar_dest, vectype_out); 6933 vec_dest = vect_create_destination_var (scalar_dest, vectype_out);
6326 6934
6327 prev_stmt_info = NULL; 6935 prev_stmt_info = NULL;
6328 prev_phi_info = NULL; 6936 prev_phi_info = NULL;
6329 if (slp_node) 6937 if (!slp_node)
6330 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); 6938 {
6331 else
6332 {
6333 vec_num = 1;
6334 vec_oprnds0.create (1); 6939 vec_oprnds0.create (1);
6335 vec_oprnds1.create (1); 6940 vec_oprnds1.create (1);
6336 if (op_type == ternary_op) 6941 if (op_type == ternary_op)
6337 vec_oprnds2.create (1); 6942 vec_oprnds2.create (1);
6338 } 6943 }
6343 vect_defs.quick_push (NULL_TREE); 6948 vect_defs.quick_push (NULL_TREE);
6344 6949
6345 if (slp_node) 6950 if (slp_node)
6346 phis.splice (SLP_TREE_VEC_STMTS (slp_node_instance->reduc_phis)); 6951 phis.splice (SLP_TREE_VEC_STMTS (slp_node_instance->reduc_phis));
6347 else 6952 else
6348 phis.quick_push (STMT_VINFO_VEC_STMT (vinfo_for_stmt (reduc_def_stmt))); 6953 phis.quick_push (STMT_VINFO_VEC_STMT (reduc_def_info));
6349 6954
6350 for (j = 0; j < ncopies; j++) 6955 for (j = 0; j < ncopies; j++)
6351 { 6956 {
6352 if (code == COND_EXPR) 6957 if (code == COND_EXPR)
6353 { 6958 {
6354 gcc_assert (!slp_node); 6959 gcc_assert (!slp_node);
6355 vectorizable_condition (stmt, gsi, vec_stmt, 6960 vectorizable_condition (stmt_info, gsi, vec_stmt,
6356 PHI_RESULT (phis[0]), 6961 PHI_RESULT (phis[0]->stmt),
6357 reduc_index, NULL); 6962 reduc_index, NULL, NULL);
6358 /* Multiple types are not supported for condition. */ 6963 /* Multiple types are not supported for condition. */
6359 break; 6964 break;
6360 } 6965 }
6361 6966
6362 /* Handle uses. */ 6967 /* Handle uses. */
6387 } 6992 }
6388 } 6993 }
6389 else 6994 else
6390 { 6995 {
6391 vec_oprnds0.quick_push 6996 vec_oprnds0.quick_push
6392 (vect_get_vec_def_for_operand (ops[0], stmt)); 6997 (vect_get_vec_def_for_operand (ops[0], stmt_info));
6393 vec_oprnds1.quick_push 6998 vec_oprnds1.quick_push
6394 (vect_get_vec_def_for_operand (ops[1], stmt)); 6999 (vect_get_vec_def_for_operand (ops[1], stmt_info));
6395 if (op_type == ternary_op) 7000 if (op_type == ternary_op)
6396 vec_oprnds2.quick_push 7001 vec_oprnds2.quick_push
6397 (vect_get_vec_def_for_operand (ops[2], stmt)); 7002 (vect_get_vec_def_for_operand (ops[2], stmt_info));
6398 } 7003 }
6399 } 7004 }
6400 else 7005 else
6401 { 7006 {
6402 if (!slp_node) 7007 if (!slp_node)
6403 { 7008 {
6404 gcc_assert (reduc_index != -1 || ! single_defuse_cycle); 7009 gcc_assert (reduc_index != -1 || ! single_defuse_cycle);
6405 7010
6406 if (single_defuse_cycle && reduc_index == 0) 7011 if (single_defuse_cycle && reduc_index == 0)
6407 vec_oprnds0[0] = gimple_assign_lhs (new_stmt); 7012 vec_oprnds0[0] = gimple_get_lhs (new_stmt_info->stmt);
6408 else 7013 else
6409 vec_oprnds0[0] 7014 vec_oprnds0[0]
6410 = vect_get_vec_def_for_stmt_copy (dts[0], vec_oprnds0[0]); 7015 = vect_get_vec_def_for_stmt_copy (loop_vinfo,
7016 vec_oprnds0[0]);
6411 if (single_defuse_cycle && reduc_index == 1) 7017 if (single_defuse_cycle && reduc_index == 1)
6412 vec_oprnds1[0] = gimple_assign_lhs (new_stmt); 7018 vec_oprnds1[0] = gimple_get_lhs (new_stmt_info->stmt);
6413 else 7019 else
6414 vec_oprnds1[0] 7020 vec_oprnds1[0]
6415 = vect_get_vec_def_for_stmt_copy (dts[1], vec_oprnds1[0]); 7021 = vect_get_vec_def_for_stmt_copy (loop_vinfo,
7022 vec_oprnds1[0]);
6416 if (op_type == ternary_op) 7023 if (op_type == ternary_op)
6417 { 7024 {
6418 if (single_defuse_cycle && reduc_index == 2) 7025 if (single_defuse_cycle && reduc_index == 2)
6419 vec_oprnds2[0] = gimple_assign_lhs (new_stmt); 7026 vec_oprnds2[0] = gimple_get_lhs (new_stmt_info->stmt);
6420 else 7027 else
6421 vec_oprnds2[0] 7028 vec_oprnds2[0]
6422 = vect_get_vec_def_for_stmt_copy (dts[2], vec_oprnds2[0]); 7029 = vect_get_vec_def_for_stmt_copy (loop_vinfo,
7030 vec_oprnds2[0]);
6423 } 7031 }
6424 } 7032 }
6425 } 7033 }
6426 7034
6427 FOR_EACH_VEC_ELT (vec_oprnds0, i, def0) 7035 FOR_EACH_VEC_ELT (vec_oprnds0, i, def0)
6428 { 7036 {
6429 tree vop[3] = { def0, vec_oprnds1[i], NULL_TREE }; 7037 tree vop[3] = { def0, vec_oprnds1[i], NULL_TREE };
6430 if (op_type == ternary_op) 7038 if (masked_loop_p)
6431 vop[2] = vec_oprnds2[i]; 7039 {
6432 7040 /* Make sure that the reduction accumulator is vop[0]. */
6433 new_temp = make_ssa_name (vec_dest, new_stmt); 7041 if (reduc_index == 1)
6434 new_stmt = gimple_build_assign (new_temp, code, 7042 {
6435 vop[0], vop[1], vop[2]); 7043 gcc_assert (commutative_tree_code (code));
6436 vect_finish_stmt_generation (stmt, new_stmt, gsi); 7044 std::swap (vop[0], vop[1]);
7045 }
7046 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies,
7047 vectype_in, i * ncopies + j);
7048 gcall *call = gimple_build_call_internal (cond_fn, 4, mask,
7049 vop[0], vop[1],
7050 vop[0]);
7051 new_temp = make_ssa_name (vec_dest, call);
7052 gimple_call_set_lhs (call, new_temp);
7053 gimple_call_set_nothrow (call, true);
7054 new_stmt_info
7055 = vect_finish_stmt_generation (stmt_info, call, gsi);
7056 }
7057 else
7058 {
7059 if (op_type == ternary_op)
7060 vop[2] = vec_oprnds2[i];
7061
7062 gassign *new_stmt = gimple_build_assign (vec_dest, code,
7063 vop[0], vop[1], vop[2]);
7064 new_temp = make_ssa_name (vec_dest, new_stmt);
7065 gimple_assign_set_lhs (new_stmt, new_temp);
7066 new_stmt_info
7067 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi);
7068 }
6437 7069
6438 if (slp_node) 7070 if (slp_node)
6439 { 7071 {
6440 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); 7072 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info);
6441 vect_defs.quick_push (new_temp); 7073 vect_defs.quick_push (new_temp);
6442 } 7074 }
6443 else 7075 else
6444 vect_defs[0] = new_temp; 7076 vect_defs[0] = new_temp;
6445 } 7077 }
6446 7078
6447 if (slp_node) 7079 if (slp_node)
6448 continue; 7080 continue;
6449 7081
6450 if (j == 0) 7082 if (j == 0)
6451 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; 7083 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info;
6452 else 7084 else
6453 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; 7085 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info;
6454 7086
6455 prev_stmt_info = vinfo_for_stmt (new_stmt); 7087 prev_stmt_info = new_stmt_info;
6456 } 7088 }
6457 7089
6458 /* Finalize the reduction-phi (set its arguments) and create the 7090 /* Finalize the reduction-phi (set its arguments) and create the
6459 epilog reduction code. */ 7091 epilog reduction code. */
6460 if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node) 7092 if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node)
6461 vect_defs[0] = gimple_assign_lhs (*vec_stmt); 7093 vect_defs[0] = gimple_get_lhs ((*vec_stmt)->stmt);
6462 7094
6463 vect_create_epilog_for_reduction (vect_defs, stmt, reduc_def_stmt, 7095 vect_create_epilog_for_reduction (vect_defs, stmt_info, reduc_def_phi,
6464 epilog_copies, 7096 epilog_copies, reduc_fn, phis,
6465 epilog_reduc_code, phis, 7097 double_reduc, slp_node, slp_node_instance,
6466 double_reduc, slp_node, slp_node_instance); 7098 cond_reduc_val, cond_reduc_op_code,
7099 neutral_op);
6467 7100
6468 return true; 7101 return true;
6469 } 7102 }
6470 7103
6471 /* Function vect_min_worthwhile_factor. 7104 /* Function vect_min_worthwhile_factor.
6472 7105
6473 For a loop where we could vectorize the operation indicated by CODE, 7106 For a loop where we could vectorize the operation indicated by CODE,
6474 return the minimum vectorization factor that makes it worthwhile 7107 return the minimum vectorization factor that makes it worthwhile
6475 to use generic vectors. */ 7108 to use generic vectors. */
6476 int 7109 static unsigned int
6477 vect_min_worthwhile_factor (enum tree_code code) 7110 vect_min_worthwhile_factor (enum tree_code code)
6478 { 7111 {
6479 switch (code) 7112 switch (code)
6480 { 7113 {
6481 case PLUS_EXPR: 7114 case PLUS_EXPR:
6500 7133
6501 bool 7134 bool
6502 vect_worthwhile_without_simd_p (vec_info *vinfo, tree_code code) 7135 vect_worthwhile_without_simd_p (vec_info *vinfo, tree_code code)
6503 { 7136 {
6504 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); 7137 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo);
7138 unsigned HOST_WIDE_INT value;
6505 return (loop_vinfo 7139 return (loop_vinfo
6506 && (LOOP_VINFO_VECT_FACTOR (loop_vinfo) 7140 && LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&value)
6507 >= vect_min_worthwhile_factor (code))); 7141 && value >= vect_min_worthwhile_factor (code));
6508 } 7142 }
6509 7143
6510 /* Function vectorizable_induction 7144 /* Function vectorizable_induction
6511 7145
6512 Check if PHI performs an induction computation that can be vectorized. 7146 Check if STMT_INFO performs an induction computation that can be vectorized.
6513 If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized 7147 If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized
6514 phi to replace it, put it in VEC_STMT, and add it to the same basic block. 7148 phi to replace it, put it in VEC_STMT, and add it to the same basic block.
6515 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ 7149 Return true if STMT_INFO is vectorizable in this way. */
6516 7150
6517 bool 7151 bool
6518 vectorizable_induction (gimple *phi, 7152 vectorizable_induction (stmt_vec_info stmt_info,
6519 gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED, 7153 gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
6520 gimple **vec_stmt, slp_tree slp_node) 7154 stmt_vec_info *vec_stmt, slp_tree slp_node,
7155 stmt_vector_for_cost *cost_vec)
6521 { 7156 {
6522 stmt_vec_info stmt_info = vinfo_for_stmt (phi);
6523 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 7157 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
6524 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 7158 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
6525 unsigned ncopies; 7159 unsigned ncopies;
6526 bool nested_in_vect_loop = false; 7160 bool nested_in_vect_loop = false;
6527 struct loop *iv_loop; 7161 struct loop *iv_loop;
6532 tree new_name; 7166 tree new_name;
6533 gimple *new_stmt; 7167 gimple *new_stmt;
6534 gphi *induction_phi; 7168 gphi *induction_phi;
6535 tree induc_def, vec_dest; 7169 tree induc_def, vec_dest;
6536 tree init_expr, step_expr; 7170 tree init_expr, step_expr;
6537 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 7171 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
6538 unsigned i; 7172 unsigned i;
6539 tree expr; 7173 tree expr;
6540 gimple_seq stmts; 7174 gimple_seq stmts;
6541 imm_use_iterator imm_iter; 7175 imm_use_iterator imm_iter;
6542 use_operand_p use_p; 7176 use_operand_p use_p;
6543 gimple *exit_phi; 7177 gimple *exit_phi;
6544 edge latch_e; 7178 edge latch_e;
6545 tree loop_arg; 7179 tree loop_arg;
6546 gimple_stmt_iterator si; 7180 gimple_stmt_iterator si;
6547 basic_block bb = gimple_bb (phi); 7181
6548 7182 gphi *phi = dyn_cast <gphi *> (stmt_info->stmt);
6549 if (gimple_code (phi) != GIMPLE_PHI) 7183 if (!phi)
6550 return false; 7184 return false;
6551 7185
6552 if (!STMT_VINFO_RELEVANT_P (stmt_info)) 7186 if (!STMT_VINFO_RELEVANT_P (stmt_info))
6553 return false; 7187 return false;
6554 7188
6555 /* Make sure it was recognized as induction computation. */ 7189 /* Make sure it was recognized as induction computation. */
6556 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def) 7190 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def)
6557 return false; 7191 return false;
6558 7192
6559 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 7193 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
6560 unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype); 7194 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
6561 7195
6562 if (slp_node) 7196 if (slp_node)
6563 ncopies = 1; 7197 ncopies = 1;
6564 else 7198 else
6565 ncopies = vect_get_num_copies (loop_vinfo, vectype); 7199 ncopies = vect_get_num_copies (loop_vinfo, vectype);
6566 gcc_assert (ncopies >= 1); 7200 gcc_assert (ncopies >= 1);
6567 7201
6568 /* FORNOW. These restrictions should be relaxed. */ 7202 /* FORNOW. These restrictions should be relaxed. */
6569 if (nested_in_vect_loop_p (loop, phi)) 7203 if (nested_in_vect_loop_p (loop, stmt_info))
6570 { 7204 {
6571 imm_use_iterator imm_iter; 7205 imm_use_iterator imm_iter;
6572 use_operand_p use_p; 7206 use_operand_p use_p;
6573 gimple *exit_phi; 7207 gimple *exit_phi;
6574 edge latch_e; 7208 edge latch_e;
6601 break; 7235 break;
6602 } 7236 }
6603 } 7237 }
6604 if (exit_phi) 7238 if (exit_phi)
6605 { 7239 {
6606 stmt_vec_info exit_phi_vinfo = vinfo_for_stmt (exit_phi); 7240 stmt_vec_info exit_phi_vinfo = loop_vinfo->lookup_stmt (exit_phi);
6607 if (!(STMT_VINFO_RELEVANT_P (exit_phi_vinfo) 7241 if (!(STMT_VINFO_RELEVANT_P (exit_phi_vinfo)
6608 && !STMT_VINFO_LIVE_P (exit_phi_vinfo))) 7242 && !STMT_VINFO_LIVE_P (exit_phi_vinfo)))
6609 { 7243 {
6610 if (dump_enabled_p ()) 7244 if (dump_enabled_p ())
6611 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, 7245 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
6620 } 7254 }
6621 else 7255 else
6622 iv_loop = loop; 7256 iv_loop = loop;
6623 gcc_assert (iv_loop == (gimple_bb (phi))->loop_father); 7257 gcc_assert (iv_loop == (gimple_bb (phi))->loop_father);
6624 7258
7259 if (slp_node && !nunits.is_constant ())
7260 {
7261 /* The current SLP code creates the initial value element-by-element. */
7262 if (dump_enabled_p ())
7263 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7264 "SLP induction not supported for variable-length"
7265 " vectors.\n");
7266 return false;
7267 }
7268
6625 if (!vec_stmt) /* transformation not required. */ 7269 if (!vec_stmt) /* transformation not required. */
6626 { 7270 {
6627 STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type; 7271 STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type;
6628 if (dump_enabled_p ()) 7272 DUMP_VECT_SCOPE ("vectorizable_induction");
6629 dump_printf_loc (MSG_NOTE, vect_location, 7273 vect_model_induction_cost (stmt_info, ncopies, cost_vec);
6630 "=== vectorizable_induction ===\n");
6631 vect_model_induction_cost (stmt_info, ncopies);
6632 return true; 7274 return true;
6633 } 7275 }
6634 7276
6635 /* Transform. */ 7277 /* Transform. */
6636 7278
6650 7292
6651 pe = loop_preheader_edge (iv_loop); 7293 pe = loop_preheader_edge (iv_loop);
6652 init_expr = PHI_ARG_DEF_FROM_EDGE (phi, 7294 init_expr = PHI_ARG_DEF_FROM_EDGE (phi,
6653 loop_preheader_edge (iv_loop)); 7295 loop_preheader_edge (iv_loop));
6654 7296
7297 stmts = NULL;
7298 if (!nested_in_vect_loop)
7299 {
7300 /* Convert the initial value to the desired type. */
7301 tree new_type = TREE_TYPE (vectype);
7302 init_expr = gimple_convert (&stmts, new_type, init_expr);
7303
7304 /* If we are using the loop mask to "peel" for alignment then we need
7305 to adjust the start value here. */
7306 tree skip_niters = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
7307 if (skip_niters != NULL_TREE)
7308 {
7309 if (FLOAT_TYPE_P (vectype))
7310 skip_niters = gimple_build (&stmts, FLOAT_EXPR, new_type,
7311 skip_niters);
7312 else
7313 skip_niters = gimple_convert (&stmts, new_type, skip_niters);
7314 tree skip_step = gimple_build (&stmts, MULT_EXPR, new_type,
7315 skip_niters, step_expr);
7316 init_expr = gimple_build (&stmts, MINUS_EXPR, new_type,
7317 init_expr, skip_step);
7318 }
7319 }
7320
6655 /* Convert the step to the desired type. */ 7321 /* Convert the step to the desired type. */
6656 stmts = NULL;
6657 step_expr = gimple_convert (&stmts, TREE_TYPE (vectype), step_expr); 7322 step_expr = gimple_convert (&stmts, TREE_TYPE (vectype), step_expr);
7323
6658 if (stmts) 7324 if (stmts)
6659 { 7325 {
6660 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); 7326 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
6661 gcc_assert (!new_bb); 7327 gcc_assert (!new_bb);
6662 } 7328 }
6663 7329
6664 /* Find the first insertion point in the BB. */ 7330 /* Find the first insertion point in the BB. */
7331 basic_block bb = gimple_bb (phi);
6665 si = gsi_after_labels (bb); 7332 si = gsi_after_labels (bb);
6666 7333
6667 /* For SLP induction we have to generate several IVs as for example 7334 /* For SLP induction we have to generate several IVs as for example
6668 with group size 3 we need [i, i, i, i + S] [i + S, i + S, i + 2*S, i + 2*S] 7335 with group size 3 we need [i, i, i, i + S] [i + S, i + S, i + 2*S, i + 2*S]
6669 [i + 2*S, i + 3*S, i + 3*S, i + 3*S]. The step is the same uniform 7336 [i + 2*S, i + 3*S, i + 3*S, i + 3*S]. The step is the same uniform
6670 [VF*S, VF*S, VF*S, VF*S] for all. */ 7337 [VF*S, VF*S, VF*S, VF*S] for all. */
6671 if (slp_node) 7338 if (slp_node)
6672 { 7339 {
6673 /* Convert the init to the desired type. */ 7340 /* Enforced above. */
6674 stmts = NULL; 7341 unsigned int const_nunits = nunits.to_constant ();
6675 init_expr = gimple_convert (&stmts, TREE_TYPE (vectype), init_expr);
6676 if (stmts)
6677 {
6678 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
6679 gcc_assert (!new_bb);
6680 }
6681 7342
6682 /* Generate [VF*S, VF*S, ... ]. */ 7343 /* Generate [VF*S, VF*S, ... ]. */
6683 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))) 7344 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
6684 { 7345 {
6685 expr = build_int_cst (integer_type_node, vf); 7346 expr = build_int_cst (integer_type_node, vf);
6688 else 7349 else
6689 expr = build_int_cst (TREE_TYPE (step_expr), vf); 7350 expr = build_int_cst (TREE_TYPE (step_expr), vf);
6690 new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr), 7351 new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
6691 expr, step_expr); 7352 expr, step_expr);
6692 if (! CONSTANT_CLASS_P (new_name)) 7353 if (! CONSTANT_CLASS_P (new_name))
6693 new_name = vect_init_vector (phi, new_name, 7354 new_name = vect_init_vector (stmt_info, new_name,
6694 TREE_TYPE (step_expr), NULL); 7355 TREE_TYPE (step_expr), NULL);
6695 new_vec = build_vector_from_val (vectype, new_name); 7356 new_vec = build_vector_from_val (vectype, new_name);
6696 vec_step = vect_init_vector (phi, new_vec, vectype, NULL); 7357 vec_step = vect_init_vector (stmt_info, new_vec, vectype, NULL);
6697 7358
6698 /* Now generate the IVs. */ 7359 /* Now generate the IVs. */
6699 unsigned group_size = SLP_TREE_SCALAR_STMTS (slp_node).length (); 7360 unsigned group_size = SLP_TREE_SCALAR_STMTS (slp_node).length ();
6700 unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); 7361 unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
6701 unsigned elts = nunits * nvects; 7362 unsigned elts = const_nunits * nvects;
6702 unsigned nivs = least_common_multiple (group_size, nunits) / nunits; 7363 unsigned nivs = least_common_multiple (group_size,
7364 const_nunits) / const_nunits;
6703 gcc_assert (elts % group_size == 0); 7365 gcc_assert (elts % group_size == 0);
6704 tree elt = init_expr; 7366 tree elt = init_expr;
6705 unsigned ivn; 7367 unsigned ivn;
6706 for (ivn = 0; ivn < nivs; ++ivn) 7368 for (ivn = 0; ivn < nivs; ++ivn)
6707 { 7369 {
6708 auto_vec<tree, 32> elts (nunits); 7370 tree_vector_builder elts (vectype, const_nunits, 1);
6709 stmts = NULL; 7371 stmts = NULL;
6710 for (unsigned eltn = 0; eltn < nunits; ++eltn) 7372 for (unsigned eltn = 0; eltn < const_nunits; ++eltn)
6711 { 7373 {
6712 if (ivn*nunits + eltn >= group_size 7374 if (ivn*const_nunits + eltn >= group_size
6713 && (ivn*nunits + eltn) % group_size == 0) 7375 && (ivn * const_nunits + eltn) % group_size == 0)
6714 elt = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (elt), 7376 elt = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (elt),
6715 elt, step_expr); 7377 elt, step_expr);
6716 elts.quick_push (elt); 7378 elts.quick_push (elt);
6717 } 7379 }
6718 vec_init = gimple_build_vector (&stmts, vectype, elts); 7380 vec_init = gimple_build_vector (&stmts, &elts);
6719 if (stmts) 7381 if (stmts)
6720 { 7382 {
6721 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); 7383 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
6722 gcc_assert (!new_bb); 7384 gcc_assert (!new_bb);
6723 } 7385 }
6724 7386
6725 /* Create the induction-phi that defines the induction-operand. */ 7387 /* Create the induction-phi that defines the induction-operand. */
6726 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_"); 7388 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
6727 induction_phi = create_phi_node (vec_dest, iv_loop->header); 7389 induction_phi = create_phi_node (vec_dest, iv_loop->header);
6728 set_vinfo_for_stmt (induction_phi, 7390 stmt_vec_info induction_phi_info
6729 new_stmt_vec_info (induction_phi, loop_vinfo)); 7391 = loop_vinfo->add_stmt (induction_phi);
6730 induc_def = PHI_RESULT (induction_phi); 7392 induc_def = PHI_RESULT (induction_phi);
6731 7393
6732 /* Create the iv update inside the loop */ 7394 /* Create the iv update inside the loop */
6733 vec_def = make_ssa_name (vec_dest); 7395 vec_def = make_ssa_name (vec_dest);
6734 new_stmt = gimple_build_assign (vec_def, PLUS_EXPR, induc_def, vec_step); 7396 new_stmt = gimple_build_assign (vec_def, PLUS_EXPR, induc_def, vec_step);
6735 gsi_insert_before (&si, new_stmt, GSI_SAME_STMT); 7397 gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
6736 set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo)); 7398 loop_vinfo->add_stmt (new_stmt);
6737 7399
6738 /* Set the arguments of the phi node: */ 7400 /* Set the arguments of the phi node: */
6739 add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION); 7401 add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
6740 add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop), 7402 add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
6741 UNKNOWN_LOCATION); 7403 UNKNOWN_LOCATION);
6742 7404
6743 SLP_TREE_VEC_STMTS (slp_node).quick_push (induction_phi); 7405 SLP_TREE_VEC_STMTS (slp_node).quick_push (induction_phi_info);
6744 } 7406 }
6745 7407
6746 /* Re-use IVs when we can. */ 7408 /* Re-use IVs when we can. */
6747 if (ivn < nvects) 7409 if (ivn < nvects)
6748 { 7410 {
6749 unsigned vfp 7411 unsigned vfp
6750 = least_common_multiple (group_size, nunits) / group_size; 7412 = least_common_multiple (group_size, const_nunits) / group_size;
6751 /* Generate [VF'*S, VF'*S, ... ]. */ 7413 /* Generate [VF'*S, VF'*S, ... ]. */
6752 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))) 7414 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)))
6753 { 7415 {
6754 expr = build_int_cst (integer_type_node, vfp); 7416 expr = build_int_cst (integer_type_node, vfp);
6755 expr = fold_convert (TREE_TYPE (step_expr), expr); 7417 expr = fold_convert (TREE_TYPE (step_expr), expr);
6757 else 7419 else
6758 expr = build_int_cst (TREE_TYPE (step_expr), vfp); 7420 expr = build_int_cst (TREE_TYPE (step_expr), vfp);
6759 new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr), 7421 new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
6760 expr, step_expr); 7422 expr, step_expr);
6761 if (! CONSTANT_CLASS_P (new_name)) 7423 if (! CONSTANT_CLASS_P (new_name))
6762 new_name = vect_init_vector (phi, new_name, 7424 new_name = vect_init_vector (stmt_info, new_name,
6763 TREE_TYPE (step_expr), NULL); 7425 TREE_TYPE (step_expr), NULL);
6764 new_vec = build_vector_from_val (vectype, new_name); 7426 new_vec = build_vector_from_val (vectype, new_name);
6765 vec_step = vect_init_vector (phi, new_vec, vectype, NULL); 7427 vec_step = vect_init_vector (stmt_info, new_vec, vectype, NULL);
6766 for (; ivn < nvects; ++ivn) 7428 for (; ivn < nvects; ++ivn)
6767 { 7429 {
6768 gimple *iv = SLP_TREE_VEC_STMTS (slp_node)[ivn - nivs]; 7430 gimple *iv = SLP_TREE_VEC_STMTS (slp_node)[ivn - nivs]->stmt;
6769 tree def; 7431 tree def;
6770 if (gimple_code (iv) == GIMPLE_PHI) 7432 if (gimple_code (iv) == GIMPLE_PHI)
6771 def = gimple_phi_result (iv); 7433 def = gimple_phi_result (iv);
6772 else 7434 else
6773 def = gimple_assign_lhs (iv); 7435 def = gimple_assign_lhs (iv);
6779 else 7441 else
6780 { 7442 {
6781 gimple_stmt_iterator tgsi = gsi_for_stmt (iv); 7443 gimple_stmt_iterator tgsi = gsi_for_stmt (iv);
6782 gsi_insert_after (&tgsi, new_stmt, GSI_CONTINUE_LINKING); 7444 gsi_insert_after (&tgsi, new_stmt, GSI_CONTINUE_LINKING);
6783 } 7445 }
6784 set_vinfo_for_stmt (new_stmt, 7446 SLP_TREE_VEC_STMTS (slp_node).quick_push
6785 new_stmt_vec_info (new_stmt, loop_vinfo)); 7447 (loop_vinfo->add_stmt (new_stmt));
6786 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt);
6787 } 7448 }
6788 } 7449 }
6789 7450
6790 return true; 7451 return true;
6791 } 7452 }
6794 if (nested_in_vect_loop) 7455 if (nested_in_vect_loop)
6795 { 7456 {
6796 /* iv_loop is nested in the loop to be vectorized. init_expr had already 7457 /* iv_loop is nested in the loop to be vectorized. init_expr had already
6797 been created during vectorization of previous stmts. We obtain it 7458 been created during vectorization of previous stmts. We obtain it
6798 from the STMT_VINFO_VEC_STMT of the defining stmt. */ 7459 from the STMT_VINFO_VEC_STMT of the defining stmt. */
6799 vec_init = vect_get_vec_def_for_operand (init_expr, phi); 7460 vec_init = vect_get_vec_def_for_operand (init_expr, stmt_info);
6800 /* If the initial value is not of proper type, convert it. */ 7461 /* If the initial value is not of proper type, convert it. */
6801 if (!useless_type_conversion_p (vectype, TREE_TYPE (vec_init))) 7462 if (!useless_type_conversion_p (vectype, TREE_TYPE (vec_init)))
6802 { 7463 {
6803 new_stmt 7464 new_stmt
6804 = gimple_build_assign (vect_get_new_ssa_name (vectype, 7465 = gimple_build_assign (vect_get_new_ssa_name (vectype,
6809 vec_init)); 7470 vec_init));
6810 vec_init = gimple_assign_lhs (new_stmt); 7471 vec_init = gimple_assign_lhs (new_stmt);
6811 new_bb = gsi_insert_on_edge_immediate (loop_preheader_edge (iv_loop), 7472 new_bb = gsi_insert_on_edge_immediate (loop_preheader_edge (iv_loop),
6812 new_stmt); 7473 new_stmt);
6813 gcc_assert (!new_bb); 7474 gcc_assert (!new_bb);
6814 set_vinfo_for_stmt (new_stmt, 7475 loop_vinfo->add_stmt (new_stmt);
6815 new_stmt_vec_info (new_stmt, loop_vinfo));
6816 } 7476 }
6817 } 7477 }
6818 else 7478 else
6819 { 7479 {
6820 /* iv_loop is the loop to be vectorized. Create: 7480 /* iv_loop is the loop to be vectorized. Create:
6821 vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr) */ 7481 vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr) */
6822 stmts = NULL; 7482 stmts = NULL;
6823 new_name = gimple_convert (&stmts, TREE_TYPE (vectype), init_expr); 7483 new_name = gimple_convert (&stmts, TREE_TYPE (vectype), init_expr);
6824 7484
6825 auto_vec<tree, 32> elts (nunits); 7485 unsigned HOST_WIDE_INT const_nunits;
6826 elts.quick_push (new_name); 7486 if (nunits.is_constant (&const_nunits))
6827 for (i = 1; i < nunits; i++) 7487 {
6828 { 7488 tree_vector_builder elts (vectype, const_nunits, 1);
6829 /* Create: new_name_i = new_name + step_expr */
6830 new_name = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (new_name),
6831 new_name, step_expr);
6832 elts.quick_push (new_name); 7489 elts.quick_push (new_name);
6833 } 7490 for (i = 1; i < const_nunits; i++)
6834 /* Create a vector from [new_name_0, new_name_1, ..., 7491 {
6835 new_name_nunits-1] */ 7492 /* Create: new_name_i = new_name + step_expr */
6836 vec_init = gimple_build_vector (&stmts, vectype, elts); 7493 new_name = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (new_name),
7494 new_name, step_expr);
7495 elts.quick_push (new_name);
7496 }
7497 /* Create a vector from [new_name_0, new_name_1, ...,
7498 new_name_nunits-1] */
7499 vec_init = gimple_build_vector (&stmts, &elts);
7500 }
7501 else if (INTEGRAL_TYPE_P (TREE_TYPE (step_expr)))
7502 /* Build the initial value directly from a VEC_SERIES_EXPR. */
7503 vec_init = gimple_build (&stmts, VEC_SERIES_EXPR, vectype,
7504 new_name, step_expr);
7505 else
7506 {
7507 /* Build:
7508 [base, base, base, ...]
7509 + (vectype) [0, 1, 2, ...] * [step, step, step, ...]. */
7510 gcc_assert (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr)));
7511 gcc_assert (flag_associative_math);
7512 tree index = build_index_vector (vectype, 0, 1);
7513 tree base_vec = gimple_build_vector_from_val (&stmts, vectype,
7514 new_name);
7515 tree step_vec = gimple_build_vector_from_val (&stmts, vectype,
7516 step_expr);
7517 vec_init = gimple_build (&stmts, FLOAT_EXPR, vectype, index);
7518 vec_init = gimple_build (&stmts, MULT_EXPR, vectype,
7519 vec_init, step_vec);
7520 vec_init = gimple_build (&stmts, PLUS_EXPR, vectype,
7521 vec_init, base_vec);
7522 }
7523
6837 if (stmts) 7524 if (stmts)
6838 { 7525 {
6839 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); 7526 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts);
6840 gcc_assert (!new_bb); 7527 gcc_assert (!new_bb);
6841 } 7528 }
6870 7557
6871 t = unshare_expr (new_name); 7558 t = unshare_expr (new_name);
6872 gcc_assert (CONSTANT_CLASS_P (new_name) 7559 gcc_assert (CONSTANT_CLASS_P (new_name)
6873 || TREE_CODE (new_name) == SSA_NAME); 7560 || TREE_CODE (new_name) == SSA_NAME);
6874 new_vec = build_vector_from_val (vectype, t); 7561 new_vec = build_vector_from_val (vectype, t);
6875 vec_step = vect_init_vector (phi, new_vec, vectype, NULL); 7562 vec_step = vect_init_vector (stmt_info, new_vec, vectype, NULL);
6876 7563
6877 7564
6878 /* Create the following def-use cycle: 7565 /* Create the following def-use cycle:
6879 loop prolog: 7566 loop prolog:
6880 vec_init = ... 7567 vec_init = ...
6887 vec_loop = vec_iv + vec_step; */ 7574 vec_loop = vec_iv + vec_step; */
6888 7575
6889 /* Create the induction-phi that defines the induction-operand. */ 7576 /* Create the induction-phi that defines the induction-operand. */
6890 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_"); 7577 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_");
6891 induction_phi = create_phi_node (vec_dest, iv_loop->header); 7578 induction_phi = create_phi_node (vec_dest, iv_loop->header);
6892 set_vinfo_for_stmt (induction_phi, 7579 stmt_vec_info induction_phi_info = loop_vinfo->add_stmt (induction_phi);
6893 new_stmt_vec_info (induction_phi, loop_vinfo));
6894 induc_def = PHI_RESULT (induction_phi); 7580 induc_def = PHI_RESULT (induction_phi);
6895 7581
6896 /* Create the iv update inside the loop */ 7582 /* Create the iv update inside the loop */
6897 vec_def = make_ssa_name (vec_dest); 7583 vec_def = make_ssa_name (vec_dest);
6898 new_stmt = gimple_build_assign (vec_def, PLUS_EXPR, induc_def, vec_step); 7584 new_stmt = gimple_build_assign (vec_def, PLUS_EXPR, induc_def, vec_step);
6899 gsi_insert_before (&si, new_stmt, GSI_SAME_STMT); 7585 gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
6900 set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo)); 7586 stmt_vec_info new_stmt_info = loop_vinfo->add_stmt (new_stmt);
6901 7587
6902 /* Set the arguments of the phi node: */ 7588 /* Set the arguments of the phi node: */
6903 add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION); 7589 add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION);
6904 add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop), 7590 add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop),
6905 UNKNOWN_LOCATION); 7591 UNKNOWN_LOCATION);
6906 7592
6907 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = induction_phi; 7593 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = induction_phi_info;
6908 7594
6909 /* In case that vectorization factor (VF) is bigger than the number 7595 /* In case that vectorization factor (VF) is bigger than the number
6910 of elements that we can fit in a vectype (nunits), we have to generate 7596 of elements that we can fit in a vectype (nunits), we have to generate
6911 more than one vector stmt - i.e - we need to "unroll" the 7597 more than one vector stmt - i.e - we need to "unroll" the
6912 vector stmt by a factor VF/nunits. For more details see documentation 7598 vector stmt by a factor VF/nunits. For more details see documentation
6937 7623
6938 t = unshare_expr (new_name); 7624 t = unshare_expr (new_name);
6939 gcc_assert (CONSTANT_CLASS_P (new_name) 7625 gcc_assert (CONSTANT_CLASS_P (new_name)
6940 || TREE_CODE (new_name) == SSA_NAME); 7626 || TREE_CODE (new_name) == SSA_NAME);
6941 new_vec = build_vector_from_val (vectype, t); 7627 new_vec = build_vector_from_val (vectype, t);
6942 vec_step = vect_init_vector (phi, new_vec, vectype, NULL); 7628 vec_step = vect_init_vector (stmt_info, new_vec, vectype, NULL);
6943 7629
6944 vec_def = induc_def; 7630 vec_def = induc_def;
6945 prev_stmt_vinfo = vinfo_for_stmt (induction_phi); 7631 prev_stmt_vinfo = induction_phi_info;
6946 for (i = 1; i < ncopies; i++) 7632 for (i = 1; i < ncopies; i++)
6947 { 7633 {
6948 /* vec_i = vec_prev + vec_step */ 7634 /* vec_i = vec_prev + vec_step */
6949 new_stmt = gimple_build_assign (vec_dest, PLUS_EXPR, 7635 new_stmt = gimple_build_assign (vec_dest, PLUS_EXPR,
6950 vec_def, vec_step); 7636 vec_def, vec_step);
6951 vec_def = make_ssa_name (vec_dest, new_stmt); 7637 vec_def = make_ssa_name (vec_dest, new_stmt);
6952 gimple_assign_set_lhs (new_stmt, vec_def); 7638 gimple_assign_set_lhs (new_stmt, vec_def);
6953 7639
6954 gsi_insert_before (&si, new_stmt, GSI_SAME_STMT); 7640 gsi_insert_before (&si, new_stmt, GSI_SAME_STMT);
6955 set_vinfo_for_stmt (new_stmt, 7641 new_stmt_info = loop_vinfo->add_stmt (new_stmt);
6956 new_stmt_vec_info (new_stmt, loop_vinfo)); 7642 STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt_info;
6957 STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt; 7643 prev_stmt_vinfo = new_stmt_info;
6958 prev_stmt_vinfo = vinfo_for_stmt (new_stmt);
6959 } 7644 }
6960 } 7645 }
6961 7646
6962 if (nested_in_vect_loop) 7647 if (nested_in_vect_loop)
6963 { 7648 {
6976 break; 7661 break;
6977 } 7662 }
6978 } 7663 }
6979 if (exit_phi) 7664 if (exit_phi)
6980 { 7665 {
6981 stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi); 7666 stmt_vec_info stmt_vinfo = loop_vinfo->lookup_stmt (exit_phi);
6982 /* FORNOW. Currently not supporting the case that an inner-loop induction 7667 /* FORNOW. Currently not supporting the case that an inner-loop induction
6983 is not used in the outer-loop (i.e. only outside the outer-loop). */ 7668 is not used in the outer-loop (i.e. only outside the outer-loop). */
6984 gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo) 7669 gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo)
6985 && !STMT_VINFO_LIVE_P (stmt_vinfo)); 7670 && !STMT_VINFO_LIVE_P (stmt_vinfo));
6986 7671
6987 STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt; 7672 STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt_info;
6988 if (dump_enabled_p ()) 7673 if (dump_enabled_p ())
6989 { 7674 dump_printf_loc (MSG_NOTE, vect_location,
6990 dump_printf_loc (MSG_NOTE, vect_location, 7675 "vector of inductions after inner-loop:%G",
6991 "vector of inductions after inner-loop:"); 7676 new_stmt);
6992 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0);
6993 }
6994 } 7677 }
6995 } 7678 }
6996 7679
6997 7680
6998 if (dump_enabled_p ()) 7681 if (dump_enabled_p ())
6999 { 7682 dump_printf_loc (MSG_NOTE, vect_location,
7000 dump_printf_loc (MSG_NOTE, vect_location, 7683 "transform induction: created def-use cycle: %G%G",
7001 "transform induction: created def-use cycle: "); 7684 induction_phi, SSA_NAME_DEF_STMT (vec_def));
7002 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, induction_phi, 0);
7003 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
7004 SSA_NAME_DEF_STMT (vec_def), 0);
7005 }
7006 7685
7007 return true; 7686 return true;
7008 } 7687 }
7009 7688
7010 /* Function vectorizable_live_operation. 7689 /* Function vectorizable_live_operation.
7011 7690
7012 STMT computes a value that is used outside the loop. Check if 7691 STMT_INFO computes a value that is used outside the loop. Check if
7013 it can be supported. */ 7692 it can be supported. */
7014 7693
7015 bool 7694 bool
7016 vectorizable_live_operation (gimple *stmt, 7695 vectorizable_live_operation (stmt_vec_info stmt_info,
7017 gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED, 7696 gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED,
7018 slp_tree slp_node, int slp_index, 7697 slp_tree slp_node, int slp_index,
7019 gimple **vec_stmt) 7698 stmt_vec_info *vec_stmt,
7699 stmt_vector_for_cost *)
7020 { 7700 {
7021 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
7022 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); 7701 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info);
7023 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 7702 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
7024 imm_use_iterator imm_iter; 7703 imm_use_iterator imm_iter;
7025 tree lhs, lhs_type, bitsize, vec_bitsize; 7704 tree lhs, lhs_type, bitsize, vec_bitsize;
7026 tree vectype = STMT_VINFO_VECTYPE (stmt_info); 7705 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
7027 int nunits = TYPE_VECTOR_SUBPARTS (vectype); 7706 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype);
7028 int ncopies; 7707 int ncopies;
7029 gimple *use_stmt; 7708 gimple *use_stmt;
7030 auto_vec<tree> vec_oprnds; 7709 auto_vec<tree> vec_oprnds;
7710 int vec_entry = 0;
7711 poly_uint64 vec_index = 0;
7031 7712
7032 gcc_assert (STMT_VINFO_LIVE_P (stmt_info)); 7713 gcc_assert (STMT_VINFO_LIVE_P (stmt_info));
7033 7714
7034 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def) 7715 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def)
7035 return false; 7716 return false;
7036 7717
7037 /* FORNOW. CHECKME. */ 7718 /* FORNOW. CHECKME. */
7038 if (nested_in_vect_loop_p (loop, stmt)) 7719 if (nested_in_vect_loop_p (loop, stmt_info))
7039 return false; 7720 return false;
7040 7721
7041 /* If STMT is not relevant and it is a simple assignment and its inputs are 7722 /* If STMT is not relevant and it is a simple assignment and its inputs are
7042 invariant then it can remain in place, unvectorized. The original last 7723 invariant then it can remain in place, unvectorized. The original last
7043 scalar value that it computes will be used. */ 7724 scalar value that it computes will be used. */
7044 if (!STMT_VINFO_RELEVANT_P (stmt_info)) 7725 if (!STMT_VINFO_RELEVANT_P (stmt_info))
7045 { 7726 {
7046 gcc_assert (is_simple_and_all_uses_invariant (stmt, loop_vinfo)); 7727 gcc_assert (is_simple_and_all_uses_invariant (stmt_info, loop_vinfo));
7047 if (dump_enabled_p ()) 7728 if (dump_enabled_p ())
7048 dump_printf_loc (MSG_NOTE, vect_location, 7729 dump_printf_loc (MSG_NOTE, vect_location,
7049 "statement is simple and uses invariant. Leaving in " 7730 "statement is simple and uses invariant. Leaving in "
7050 "place.\n"); 7731 "place.\n");
7051 return true; 7732 return true;
7054 if (slp_node) 7735 if (slp_node)
7055 ncopies = 1; 7736 ncopies = 1;
7056 else 7737 else
7057 ncopies = vect_get_num_copies (loop_vinfo, vectype); 7738 ncopies = vect_get_num_copies (loop_vinfo, vectype);
7058 7739
7740 if (slp_node)
7741 {
7742 gcc_assert (slp_index >= 0);
7743
7744 int num_scalar = SLP_TREE_SCALAR_STMTS (slp_node).length ();
7745 int num_vec = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7746
7747 /* Get the last occurrence of the scalar index from the concatenation of
7748 all the slp vectors. Calculate which slp vector it is and the index
7749 within. */
7750 poly_uint64 pos = (num_vec * nunits) - num_scalar + slp_index;
7751
7752 /* Calculate which vector contains the result, and which lane of
7753 that vector we need. */
7754 if (!can_div_trunc_p (pos, nunits, &vec_entry, &vec_index))
7755 {
7756 if (dump_enabled_p ())
7757 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7758 "Cannot determine which vector holds the"
7759 " final result.\n");
7760 return false;
7761 }
7762 }
7763
7059 if (!vec_stmt) 7764 if (!vec_stmt)
7060 /* No transformation required. */ 7765 {
7061 return true; 7766 /* No transformation required. */
7062 7767 if (LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo))
7063 /* If stmt has a related stmt, then use that for getting the lhs. */ 7768 {
7064 if (is_pattern_stmt_p (stmt_info)) 7769 if (!direct_internal_fn_supported_p (IFN_EXTRACT_LAST, vectype,
7065 stmt = STMT_VINFO_RELATED_STMT (stmt_info); 7770 OPTIMIZE_FOR_SPEED))
7771 {
7772 if (dump_enabled_p ())
7773 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7774 "can't use a fully-masked loop because "
7775 "the target doesn't support extract last "
7776 "reduction.\n");
7777 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
7778 }
7779 else if (slp_node)
7780 {
7781 if (dump_enabled_p ())
7782 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7783 "can't use a fully-masked loop because an "
7784 "SLP statement is live after the loop.\n");
7785 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
7786 }
7787 else if (ncopies > 1)
7788 {
7789 if (dump_enabled_p ())
7790 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
7791 "can't use a fully-masked loop because"
7792 " ncopies is greater than 1.\n");
7793 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false;
7794 }
7795 else
7796 {
7797 gcc_assert (ncopies == 1 && !slp_node);
7798 vect_record_loop_mask (loop_vinfo,
7799 &LOOP_VINFO_MASKS (loop_vinfo),
7800 1, vectype);
7801 }
7802 }
7803 return true;
7804 }
7805
7806 /* Use the lhs of the original scalar statement. */
7807 gimple *stmt = vect_orig_stmt (stmt_info)->stmt;
7066 7808
7067 lhs = (is_a <gphi *> (stmt)) ? gimple_phi_result (stmt) 7809 lhs = (is_a <gphi *> (stmt)) ? gimple_phi_result (stmt)
7068 : gimple_get_lhs (stmt); 7810 : gimple_get_lhs (stmt);
7069 lhs_type = TREE_TYPE (lhs); 7811 lhs_type = TREE_TYPE (lhs);
7070 7812
7075 7817
7076 /* Get the vectorized lhs of STMT and the lane to use (counted in bits). */ 7818 /* Get the vectorized lhs of STMT and the lane to use (counted in bits). */
7077 tree vec_lhs, bitstart; 7819 tree vec_lhs, bitstart;
7078 if (slp_node) 7820 if (slp_node)
7079 { 7821 {
7080 gcc_assert (slp_index >= 0); 7822 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7081
7082 int num_scalar = SLP_TREE_SCALAR_STMTS (slp_node).length ();
7083 int num_vec = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node);
7084
7085 /* Get the last occurrence of the scalar index from the concatenation of
7086 all the slp vectors. Calculate which slp vector it is and the index
7087 within. */
7088 int pos = (num_vec * nunits) - num_scalar + slp_index;
7089 int vec_entry = pos / nunits;
7090 int vec_index = pos % nunits;
7091 7823
7092 /* Get the correct slp vectorized stmt. */ 7824 /* Get the correct slp vectorized stmt. */
7093 vec_lhs = gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node)[vec_entry]); 7825 gimple *vec_stmt = SLP_TREE_VEC_STMTS (slp_node)[vec_entry]->stmt;
7826 if (gphi *phi = dyn_cast <gphi *> (vec_stmt))
7827 vec_lhs = gimple_phi_result (phi);
7828 else
7829 vec_lhs = gimple_get_lhs (vec_stmt);
7094 7830
7095 /* Get entry to use. */ 7831 /* Get entry to use. */
7096 bitstart = bitsize_int (vec_index); 7832 bitstart = bitsize_int (vec_index);
7097 bitstart = int_const_binop (MULT_EXPR, bitsize, bitstart); 7833 bitstart = int_const_binop (MULT_EXPR, bitsize, bitstart);
7098 } 7834 }
7099 else 7835 else
7100 { 7836 {
7101 enum vect_def_type dt = STMT_VINFO_DEF_TYPE (stmt_info); 7837 enum vect_def_type dt = STMT_VINFO_DEF_TYPE (stmt_info);
7102 vec_lhs = vect_get_vec_def_for_operand_1 (stmt, dt); 7838 vec_lhs = vect_get_vec_def_for_operand_1 (stmt_info, dt);
7839 gcc_checking_assert (ncopies == 1
7840 || !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo));
7103 7841
7104 /* For multiple copies, get the last copy. */ 7842 /* For multiple copies, get the last copy. */
7105 for (int i = 1; i < ncopies; ++i) 7843 for (int i = 1; i < ncopies; ++i)
7106 vec_lhs = vect_get_vec_def_for_stmt_copy (vect_unknown_def_type, 7844 vec_lhs = vect_get_vec_def_for_stmt_copy (loop_vinfo, vec_lhs);
7107 vec_lhs);
7108 7845
7109 /* Get the last lane in the vector. */ 7846 /* Get the last lane in the vector. */
7110 bitstart = int_const_binop (MINUS_EXPR, vec_bitsize, bitsize); 7847 bitstart = int_const_binop (MINUS_EXPR, vec_bitsize, bitsize);
7111 } 7848 }
7112 7849
7113 /* Create a new vectorized stmt for the uses of STMT and insert outside the
7114 loop. */
7115 gimple_seq stmts = NULL; 7850 gimple_seq stmts = NULL;
7116 tree bftype = TREE_TYPE (vectype); 7851 tree new_tree;
7117 if (VECTOR_BOOLEAN_TYPE_P (vectype)) 7852 if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
7118 bftype = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 1); 7853 {
7119 tree new_tree = build3 (BIT_FIELD_REF, bftype, vec_lhs, bitsize, bitstart); 7854 /* Emit:
7120 new_tree = force_gimple_operand (fold_convert (lhs_type, new_tree), &stmts, 7855
7121 true, NULL_TREE); 7856 SCALAR_RES = EXTRACT_LAST <VEC_LHS, MASK>
7857
7858 where VEC_LHS is the vectorized live-out result and MASK is
7859 the loop mask for the final iteration. */
7860 gcc_assert (ncopies == 1 && !slp_node);
7861 tree scalar_type = TREE_TYPE (STMT_VINFO_VECTYPE (stmt_info));
7862 tree mask = vect_get_loop_mask (gsi, &LOOP_VINFO_MASKS (loop_vinfo),
7863 1, vectype, 0);
7864 tree scalar_res = gimple_build (&stmts, CFN_EXTRACT_LAST,
7865 scalar_type, mask, vec_lhs);
7866
7867 /* Convert the extracted vector element to the required scalar type. */
7868 new_tree = gimple_convert (&stmts, lhs_type, scalar_res);
7869 }
7870 else
7871 {
7872 tree bftype = TREE_TYPE (vectype);
7873 if (VECTOR_BOOLEAN_TYPE_P (vectype))
7874 bftype = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 1);
7875 new_tree = build3 (BIT_FIELD_REF, bftype, vec_lhs, bitsize, bitstart);
7876 new_tree = force_gimple_operand (fold_convert (lhs_type, new_tree),
7877 &stmts, true, NULL_TREE);
7878 }
7879
7122 if (stmts) 7880 if (stmts)
7123 gsi_insert_seq_on_edge_immediate (single_exit (loop), stmts); 7881 gsi_insert_seq_on_edge_immediate (single_exit (loop), stmts);
7124 7882
7125 /* Replace use of lhs with newly computed result. If the use stmt is a 7883 /* Replace use of lhs with newly computed result. If the use stmt is a
7126 single arg PHI, just replace all uses of PHI result. It's necessary 7884 single arg PHI, just replace all uses of PHI result. It's necessary
7144 } 7902 }
7145 7903
7146 return true; 7904 return true;
7147 } 7905 }
7148 7906
7149 /* Kill any debug uses outside LOOP of SSA names defined in STMT. */ 7907 /* Kill any debug uses outside LOOP of SSA names defined in STMT_INFO. */
7150 7908
7151 static void 7909 static void
7152 vect_loop_kill_debug_uses (struct loop *loop, gimple *stmt) 7910 vect_loop_kill_debug_uses (struct loop *loop, stmt_vec_info stmt_info)
7153 { 7911 {
7154 ssa_op_iter op_iter; 7912 ssa_op_iter op_iter;
7155 imm_use_iterator imm_iter; 7913 imm_use_iterator imm_iter;
7156 def_operand_p def_p; 7914 def_operand_p def_p;
7157 gimple *ustmt; 7915 gimple *ustmt;
7158 7916
7159 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF) 7917 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF)
7160 { 7918 {
7161 FOR_EACH_IMM_USE_STMT (ustmt, imm_iter, DEF_FROM_PTR (def_p)) 7919 FOR_EACH_IMM_USE_STMT (ustmt, imm_iter, DEF_FROM_PTR (def_p))
7162 { 7920 {
7163 basic_block bb; 7921 basic_block bb;
7164 7922
7216 return true; 7974 return true;
7217 } 7975 }
7218 return false; 7976 return false;
7219 } 7977 }
7220 7978
7979 /* Return a mask type with half the number of elements as TYPE. */
7980
7981 tree
7982 vect_halve_mask_nunits (tree type)
7983 {
7984 poly_uint64 nunits = exact_div (TYPE_VECTOR_SUBPARTS (type), 2);
7985 return build_truth_vector_type (nunits, current_vector_size);
7986 }
7987
7988 /* Return a mask type with twice as many elements as TYPE. */
7989
7990 tree
7991 vect_double_mask_nunits (tree type)
7992 {
7993 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (type) * 2;
7994 return build_truth_vector_type (nunits, current_vector_size);
7995 }
7996
7997 /* Record that a fully-masked version of LOOP_VINFO would need MASKS to
7998 contain a sequence of NVECTORS masks that each control a vector of type
7999 VECTYPE. */
8000
8001 void
8002 vect_record_loop_mask (loop_vec_info loop_vinfo, vec_loop_masks *masks,
8003 unsigned int nvectors, tree vectype)
8004 {
8005 gcc_assert (nvectors != 0);
8006 if (masks->length () < nvectors)
8007 masks->safe_grow_cleared (nvectors);
8008 rgroup_masks *rgm = &(*masks)[nvectors - 1];
8009 /* The number of scalars per iteration and the number of vectors are
8010 both compile-time constants. */
8011 unsigned int nscalars_per_iter
8012 = exact_div (nvectors * TYPE_VECTOR_SUBPARTS (vectype),
8013 LOOP_VINFO_VECT_FACTOR (loop_vinfo)).to_constant ();
8014 if (rgm->max_nscalars_per_iter < nscalars_per_iter)
8015 {
8016 rgm->max_nscalars_per_iter = nscalars_per_iter;
8017 rgm->mask_type = build_same_sized_truth_vector_type (vectype);
8018 }
8019 }
8020
8021 /* Given a complete set of masks MASKS, extract mask number INDEX
8022 for an rgroup that operates on NVECTORS vectors of type VECTYPE,
8023 where 0 <= INDEX < NVECTORS. Insert any set-up statements before GSI.
8024
8025 See the comment above vec_loop_masks for more details about the mask
8026 arrangement. */
8027
8028 tree
8029 vect_get_loop_mask (gimple_stmt_iterator *gsi, vec_loop_masks *masks,
8030 unsigned int nvectors, tree vectype, unsigned int index)
8031 {
8032 rgroup_masks *rgm = &(*masks)[nvectors - 1];
8033 tree mask_type = rgm->mask_type;
8034
8035 /* Populate the rgroup's mask array, if this is the first time we've
8036 used it. */
8037 if (rgm->masks.is_empty ())
8038 {
8039 rgm->masks.safe_grow_cleared (nvectors);
8040 for (unsigned int i = 0; i < nvectors; ++i)
8041 {
8042 tree mask = make_temp_ssa_name (mask_type, NULL, "loop_mask");
8043 /* Provide a dummy definition until the real one is available. */
8044 SSA_NAME_DEF_STMT (mask) = gimple_build_nop ();
8045 rgm->masks[i] = mask;
8046 }
8047 }
8048
8049 tree mask = rgm->masks[index];
8050 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type),
8051 TYPE_VECTOR_SUBPARTS (vectype)))
8052 {
8053 /* A loop mask for data type X can be reused for data type Y
8054 if X has N times more elements than Y and if Y's elements
8055 are N times bigger than X's. In this case each sequence
8056 of N elements in the loop mask will be all-zero or all-one.
8057 We can then view-convert the mask so that each sequence of
8058 N elements is replaced by a single element. */
8059 gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (mask_type),
8060 TYPE_VECTOR_SUBPARTS (vectype)));
8061 gimple_seq seq = NULL;
8062 mask_type = build_same_sized_truth_vector_type (vectype);
8063 mask = gimple_build (&seq, VIEW_CONVERT_EXPR, mask_type, mask);
8064 if (seq)
8065 gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT);
8066 }
8067 return mask;
8068 }
8069
7221 /* Scale profiling counters by estimation for LOOP which is vectorized 8070 /* Scale profiling counters by estimation for LOOP which is vectorized
7222 by factor VF. */ 8071 by factor VF. */
7223 8072
7224 static void 8073 static void
7225 scale_profile_for_vect_loop (struct loop *loop, unsigned vf) 8074 scale_profile_for_vect_loop (struct loop *loop, unsigned vf)
7227 edge preheader = loop_preheader_edge (loop); 8076 edge preheader = loop_preheader_edge (loop);
7228 /* Reduce loop iterations by the vectorization factor. */ 8077 /* Reduce loop iterations by the vectorization factor. */
7229 gcov_type new_est_niter = niter_for_unrolled_loop (loop, vf); 8078 gcov_type new_est_niter = niter_for_unrolled_loop (loop, vf);
7230 profile_count freq_h = loop->header->count, freq_e = preheader->count (); 8079 profile_count freq_h = loop->header->count, freq_e = preheader->count ();
7231 8080
7232 /* Use frequency only if counts are zero. */ 8081 if (freq_h.nonzero_p ())
7233 if (!(freq_h > 0) && !(freq_e > 0))
7234 {
7235 freq_h = profile_count::from_gcov_type (loop->header->frequency);
7236 freq_e = profile_count::from_gcov_type (EDGE_FREQUENCY (preheader));
7237 }
7238 if (freq_h > 0)
7239 { 8082 {
7240 profile_probability p; 8083 profile_probability p;
7241 8084
7242 /* Avoid dropping loop body profile counter to 0 because of zero count 8085 /* Avoid dropping loop body profile counter to 0 because of zero count
7243 in loop's preheader. */ 8086 in loop's preheader. */
7244 if (!(freq_e > profile_count::from_gcov_type (1))) 8087 if (!(freq_e == profile_count::zero ()))
7245 freq_e = profile_count::from_gcov_type (1); 8088 freq_e = freq_e.force_nonzero ();
7246 p = freq_e.apply_scale (new_est_niter + 1, 1).probability_in (freq_h); 8089 p = freq_e.apply_scale (new_est_niter + 1, 1).probability_in (freq_h);
7247 scale_loop_frequencies (loop, p); 8090 scale_loop_frequencies (loop, p);
7248 } 8091 }
7249 8092
7250 edge exit_e = single_exit (loop); 8093 edge exit_e = single_exit (loop);
7254 edge exit_l = single_pred_edge (loop->latch); 8097 edge exit_l = single_pred_edge (loop->latch);
7255 profile_probability prob = exit_l->probability; 8098 profile_probability prob = exit_l->probability;
7256 exit_l->probability = exit_e->probability.invert (); 8099 exit_l->probability = exit_e->probability.invert ();
7257 if (prob.initialized_p () && exit_l->probability.initialized_p ()) 8100 if (prob.initialized_p () && exit_l->probability.initialized_p ())
7258 scale_bbs_frequencies (&loop->latch, 1, exit_l->probability / prob); 8101 scale_bbs_frequencies (&loop->latch, 1, exit_l->probability / prob);
8102 }
8103
8104 /* Vectorize STMT_INFO if relevant, inserting any new instructions before GSI.
8105 When vectorizing STMT_INFO as a store, set *SEEN_STORE to its
8106 stmt_vec_info. */
8107
8108 static void
8109 vect_transform_loop_stmt (loop_vec_info loop_vinfo, stmt_vec_info stmt_info,
8110 gimple_stmt_iterator *gsi, stmt_vec_info *seen_store)
8111 {
8112 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
8113 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
8114
8115 if (dump_enabled_p ())
8116 dump_printf_loc (MSG_NOTE, vect_location,
8117 "------>vectorizing statement: %G", stmt_info->stmt);
8118
8119 if (MAY_HAVE_DEBUG_BIND_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
8120 vect_loop_kill_debug_uses (loop, stmt_info);
8121
8122 if (!STMT_VINFO_RELEVANT_P (stmt_info)
8123 && !STMT_VINFO_LIVE_P (stmt_info))
8124 return;
8125
8126 if (STMT_VINFO_VECTYPE (stmt_info))
8127 {
8128 poly_uint64 nunits
8129 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
8130 if (!STMT_SLP_TYPE (stmt_info)
8131 && maybe_ne (nunits, vf)
8132 && dump_enabled_p ())
8133 /* For SLP VF is set according to unrolling factor, and not
8134 to vector size, hence for SLP this print is not valid. */
8135 dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
8136 }
8137
8138 /* Pure SLP statements have already been vectorized. We still need
8139 to apply loop vectorization to hybrid SLP statements. */
8140 if (PURE_SLP_STMT (stmt_info))
8141 return;
8142
8143 if (dump_enabled_p ())
8144 dump_printf_loc (MSG_NOTE, vect_location, "transform statement.\n");
8145
8146 if (vect_transform_stmt (stmt_info, gsi, NULL, NULL))
8147 *seen_store = stmt_info;
7259 } 8148 }
7260 8149
7261 /* Function vect_transform_loop. 8150 /* Function vect_transform_loop.
7262 8151
7263 The analysis phase has determined that the loop is vectorizable. 8152 The analysis phase has determined that the loop is vectorizable.
7271 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); 8160 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
7272 struct loop *epilogue = NULL; 8161 struct loop *epilogue = NULL;
7273 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); 8162 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo);
7274 int nbbs = loop->num_nodes; 8163 int nbbs = loop->num_nodes;
7275 int i; 8164 int i;
7276 tree niters_vector = NULL; 8165 tree niters_vector = NULL_TREE;
7277 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); 8166 tree step_vector = NULL_TREE;
7278 bool grouped_store; 8167 tree niters_vector_mult_vf = NULL_TREE;
7279 bool slp_scheduled = false; 8168 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
7280 gimple *stmt, *pattern_stmt; 8169 unsigned int lowest_vf = constant_lower_bound (vf);
7281 gimple_seq pattern_def_seq = NULL; 8170 gimple *stmt;
7282 gimple_stmt_iterator pattern_def_si = gsi_none ();
7283 bool transform_pattern_stmt = false;
7284 bool check_profitability = false; 8171 bool check_profitability = false;
7285 int th; 8172 unsigned int th;
7286 8173
7287 if (dump_enabled_p ()) 8174 DUMP_VECT_SCOPE ("vec_transform_loop");
7288 dump_printf_loc (MSG_NOTE, vect_location, "=== vec_transform_loop ===\n"); 8175
8176 loop_vinfo->shared->check_datarefs ();
7289 8177
7290 /* Use the more conservative vectorization threshold. If the number 8178 /* Use the more conservative vectorization threshold. If the number
7291 of iterations is constant assume the cost check has been performed 8179 of iterations is constant assume the cost check has been performed
7292 by our caller. If the threshold makes all loops profitable that 8180 by our caller. If the threshold makes all loops profitable that
7293 run at least the vectorization factor number of times checking 8181 run at least the (estimated) vectorization factor number of times
7294 is pointless, too. */ 8182 checking is pointless, too. */
7295 th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo); 8183 th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo);
7296 if (th >= LOOP_VINFO_VECT_FACTOR (loop_vinfo) 8184 if (th >= vect_vf_for_cost (loop_vinfo)
7297 && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) 8185 && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
7298 { 8186 {
7299 if (dump_enabled_p ()) 8187 if (dump_enabled_p ())
7300 dump_printf_loc (MSG_NOTE, vect_location, 8188 dump_printf_loc (MSG_NOTE, vect_location,
7301 "Profitability threshold is %d loop iterations.\n", 8189 "Profitability threshold is %d loop iterations.\n",
7316 /* Version the loop first, if required, so the profitability check 8204 /* Version the loop first, if required, so the profitability check
7317 comes first. */ 8205 comes first. */
7318 8206
7319 if (LOOP_REQUIRES_VERSIONING (loop_vinfo)) 8207 if (LOOP_REQUIRES_VERSIONING (loop_vinfo))
7320 { 8208 {
7321 vect_loop_versioning (loop_vinfo, th, check_profitability); 8209 poly_uint64 versioning_threshold
8210 = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo);
8211 if (check_profitability
8212 && ordered_p (poly_uint64 (th), versioning_threshold))
8213 {
8214 versioning_threshold = ordered_max (poly_uint64 (th),
8215 versioning_threshold);
8216 check_profitability = false;
8217 }
8218 vect_loop_versioning (loop_vinfo, th, check_profitability,
8219 versioning_threshold);
7322 check_profitability = false; 8220 check_profitability = false;
7323 } 8221 }
7324 8222
7325 /* Make sure there exists a single-predecessor exit bb also on the 8223 /* Make sure there exists a single-predecessor exit bb also on the
7326 scalar loop copy. Do this after versioning but before peeling 8224 scalar loop copy. Do this after versioning but before peeling
7340 8238
7341 tree niters = vect_build_loop_niters (loop_vinfo); 8239 tree niters = vect_build_loop_niters (loop_vinfo);
7342 LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = niters; 8240 LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = niters;
7343 tree nitersm1 = unshare_expr (LOOP_VINFO_NITERSM1 (loop_vinfo)); 8241 tree nitersm1 = unshare_expr (LOOP_VINFO_NITERSM1 (loop_vinfo));
7344 bool niters_no_overflow = loop_niters_no_overflow (loop_vinfo); 8242 bool niters_no_overflow = loop_niters_no_overflow (loop_vinfo);
7345 epilogue = vect_do_peeling (loop_vinfo, niters, nitersm1, &niters_vector, th, 8243 epilogue = vect_do_peeling (loop_vinfo, niters, nitersm1, &niters_vector,
8244 &step_vector, &niters_vector_mult_vf, th,
7346 check_profitability, niters_no_overflow); 8245 check_profitability, niters_no_overflow);
8246
7347 if (niters_vector == NULL_TREE) 8247 if (niters_vector == NULL_TREE)
7348 { 8248 {
7349 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) 8249 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
7350 niters_vector 8250 && !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
7351 = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)), 8251 && known_eq (lowest_vf, vf))
7352 LOOP_VINFO_INT_NITERS (loop_vinfo) / vf); 8252 {
8253 niters_vector
8254 = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)),
8255 LOOP_VINFO_INT_NITERS (loop_vinfo) / lowest_vf);
8256 step_vector = build_one_cst (TREE_TYPE (niters));
8257 }
7353 else 8258 else
7354 vect_gen_vector_loop_niters (loop_vinfo, niters, &niters_vector, 8259 vect_gen_vector_loop_niters (loop_vinfo, niters, &niters_vector,
7355 niters_no_overflow); 8260 &step_vector, niters_no_overflow);
7356 } 8261 }
7357 8262
7358 /* 1) Make sure the loop header has exactly two entries 8263 /* 1) Make sure the loop header has exactly two entries
7359 2) Make sure we have a preheader basic block. */ 8264 2) Make sure we have a preheader basic block. */
7360 8265
7361 gcc_assert (EDGE_COUNT (loop->header->preds) == 2); 8266 gcc_assert (EDGE_COUNT (loop->header->preds) == 2);
7362 8267
7363 split_edge (loop_preheader_edge (loop)); 8268 split_edge (loop_preheader_edge (loop));
8269
8270 if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
8271 && vect_use_loop_mask_for_alignment_p (loop_vinfo))
8272 /* This will deal with any possible peeling. */
8273 vect_prepare_for_masked_peels (loop_vinfo);
8274
8275 /* Schedule the SLP instances first, then handle loop vectorization
8276 below. */
8277 if (!loop_vinfo->slp_instances.is_empty ())
8278 {
8279 DUMP_VECT_SCOPE ("scheduling SLP instances");
8280 vect_schedule_slp (loop_vinfo);
8281 }
7364 8282
7365 /* FORNOW: the vectorizer supports only loops which body consist 8283 /* FORNOW: the vectorizer supports only loops which body consist
7366 of one basic block (header + empty latch). When the vectorizer will 8284 of one basic block (header + empty latch). When the vectorizer will
7367 support more involved loop forms, the order by which the BBs are 8285 support more involved loop forms, the order by which the BBs are
7368 traversed need to be reconsidered. */ 8286 traversed need to be reconsidered. */
7375 for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si); 8293 for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si);
7376 gsi_next (&si)) 8294 gsi_next (&si))
7377 { 8295 {
7378 gphi *phi = si.phi (); 8296 gphi *phi = si.phi ();
7379 if (dump_enabled_p ()) 8297 if (dump_enabled_p ())
7380 { 8298 dump_printf_loc (MSG_NOTE, vect_location,
7381 dump_printf_loc (MSG_NOTE, vect_location, 8299 "------>vectorizing phi: %G", phi);
7382 "------>vectorizing phi: "); 8300 stmt_info = loop_vinfo->lookup_stmt (phi);
7383 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0);
7384 }
7385 stmt_info = vinfo_for_stmt (phi);
7386 if (!stmt_info) 8301 if (!stmt_info)
7387 continue; 8302 continue;
7388 8303
7389 if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info)) 8304 if (MAY_HAVE_DEBUG_BIND_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
7390 vect_loop_kill_debug_uses (loop, phi); 8305 vect_loop_kill_debug_uses (loop, stmt_info);
7391 8306
7392 if (!STMT_VINFO_RELEVANT_P (stmt_info) 8307 if (!STMT_VINFO_RELEVANT_P (stmt_info)
7393 && !STMT_VINFO_LIVE_P (stmt_info)) 8308 && !STMT_VINFO_LIVE_P (stmt_info))
7394 continue; 8309 continue;
7395 8310
7396 if (STMT_VINFO_VECTYPE (stmt_info) 8311 if (STMT_VINFO_VECTYPE (stmt_info)
7397 && (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)) 8312 && (maybe_ne
7398 != (unsigned HOST_WIDE_INT) vf) 8313 (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)), vf))
7399 && dump_enabled_p ()) 8314 && dump_enabled_p ())
7400 dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n"); 8315 dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
7401 8316
7402 if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def 8317 if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def
7403 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def 8318 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
7404 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle) 8319 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle)
7405 && ! PURE_SLP_STMT (stmt_info)) 8320 && ! PURE_SLP_STMT (stmt_info))
7406 { 8321 {
7407 if (dump_enabled_p ()) 8322 if (dump_enabled_p ())
7408 dump_printf_loc (MSG_NOTE, vect_location, "transform phi.\n"); 8323 dump_printf_loc (MSG_NOTE, vect_location, "transform phi.\n");
7409 vect_transform_stmt (phi, NULL, NULL, NULL, NULL); 8324 vect_transform_stmt (stmt_info, NULL, NULL, NULL);
7410 } 8325 }
7411 } 8326 }
7412 8327
7413 pattern_stmt = NULL;
7414 for (gimple_stmt_iterator si = gsi_start_bb (bb); 8328 for (gimple_stmt_iterator si = gsi_start_bb (bb);
7415 !gsi_end_p (si) || transform_pattern_stmt;) 8329 !gsi_end_p (si);)
7416 { 8330 {
7417 bool is_store; 8331 stmt = gsi_stmt (si);
7418 8332 /* During vectorization remove existing clobber stmts. */
7419 if (transform_pattern_stmt) 8333 if (gimple_clobber_p (stmt))
7420 stmt = pattern_stmt;
7421 else
7422 { 8334 {
7423 stmt = gsi_stmt (si); 8335 unlink_stmt_vdef (stmt);
7424 /* During vectorization remove existing clobber stmts. */ 8336 gsi_remove (&si, true);
7425 if (gimple_clobber_p (stmt)) 8337 release_defs (stmt);
8338 }
8339 else
8340 {
8341 stmt_info = loop_vinfo->lookup_stmt (stmt);
8342
8343 /* vector stmts created in the outer-loop during vectorization of
8344 stmts in an inner-loop may not have a stmt_info, and do not
8345 need to be vectorized. */
8346 stmt_vec_info seen_store = NULL;
8347 if (stmt_info)
7426 { 8348 {
7427 unlink_stmt_vdef (stmt); 8349 if (STMT_VINFO_IN_PATTERN_P (stmt_info))
7428 gsi_remove (&si, true); 8350 {
7429 release_defs (stmt); 8351 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info);
7430 continue; 8352 for (gimple_stmt_iterator subsi = gsi_start (def_seq);
8353 !gsi_end_p (subsi); gsi_next (&subsi))
8354 {
8355 stmt_vec_info pat_stmt_info
8356 = loop_vinfo->lookup_stmt (gsi_stmt (subsi));
8357 vect_transform_loop_stmt (loop_vinfo, pat_stmt_info,
8358 &si, &seen_store);
8359 }
8360 stmt_vec_info pat_stmt_info
8361 = STMT_VINFO_RELATED_STMT (stmt_info);
8362 vect_transform_loop_stmt (loop_vinfo, pat_stmt_info, &si,
8363 &seen_store);
8364 }
8365 vect_transform_loop_stmt (loop_vinfo, stmt_info, &si,
8366 &seen_store);
8367 }
8368 gsi_next (&si);
8369 if (seen_store)
8370 {
8371 if (STMT_VINFO_GROUPED_ACCESS (seen_store))
8372 /* Interleaving. If IS_STORE is TRUE, the
8373 vectorization of the interleaving chain was
8374 completed - free all the stores in the chain. */
8375 vect_remove_stores (DR_GROUP_FIRST_ELEMENT (seen_store));
8376 else
8377 /* Free the attached stmt_vec_info and remove the stmt. */
8378 loop_vinfo->remove_stmt (stmt_info);
7431 } 8379 }
7432 } 8380 }
7433 8381 }
7434 if (dump_enabled_p ()) 8382
8383 /* Stub out scalar statements that must not survive vectorization.
8384 Doing this here helps with grouped statements, or statements that
8385 are involved in patterns. */
8386 for (gimple_stmt_iterator gsi = gsi_start_bb (bb);
8387 !gsi_end_p (gsi); gsi_next (&gsi))
8388 {
8389 gcall *call = dyn_cast <gcall *> (gsi_stmt (gsi));
8390 if (call && gimple_call_internal_p (call, IFN_MASK_LOAD))
7435 { 8391 {
7436 dump_printf_loc (MSG_NOTE, vect_location, 8392 tree lhs = gimple_get_lhs (call);
7437 "------>vectorizing statement: "); 8393 if (!VECTOR_TYPE_P (TREE_TYPE (lhs)))
7438 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0);
7439 }
7440
7441 stmt_info = vinfo_for_stmt (stmt);
7442
7443 /* vector stmts created in the outer-loop during vectorization of
7444 stmts in an inner-loop may not have a stmt_info, and do not
7445 need to be vectorized. */
7446 if (!stmt_info)
7447 {
7448 gsi_next (&si);
7449 continue;
7450 }
7451
7452 if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info))
7453 vect_loop_kill_debug_uses (loop, stmt);
7454
7455 if (!STMT_VINFO_RELEVANT_P (stmt_info)
7456 && !STMT_VINFO_LIVE_P (stmt_info))
7457 {
7458 if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7459 && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
7460 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7461 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7462 {
7463 stmt = pattern_stmt;
7464 stmt_info = vinfo_for_stmt (stmt);
7465 }
7466 else
7467 {
7468 gsi_next (&si);
7469 continue;
7470 }
7471 }
7472 else if (STMT_VINFO_IN_PATTERN_P (stmt_info)
7473 && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info))
7474 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt))
7475 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt))))
7476 transform_pattern_stmt = true;
7477
7478 /* If pattern statement has def stmts, vectorize them too. */
7479 if (is_pattern_stmt_p (stmt_info))
7480 {
7481 if (pattern_def_seq == NULL)
7482 { 8394 {
7483 pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info); 8395 tree zero = build_zero_cst (TREE_TYPE (lhs));
7484 pattern_def_si = gsi_start (pattern_def_seq); 8396 gimple *new_stmt = gimple_build_assign (lhs, zero);
7485 } 8397 gsi_replace (&gsi, new_stmt, true);
7486 else if (!gsi_end_p (pattern_def_si))
7487 gsi_next (&pattern_def_si);
7488 if (pattern_def_seq != NULL)
7489 {
7490 gimple *pattern_def_stmt = NULL;
7491 stmt_vec_info pattern_def_stmt_info = NULL;
7492
7493 while (!gsi_end_p (pattern_def_si))
7494 {
7495 pattern_def_stmt = gsi_stmt (pattern_def_si);
7496 pattern_def_stmt_info
7497 = vinfo_for_stmt (pattern_def_stmt);
7498 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
7499 || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
7500 break;
7501 gsi_next (&pattern_def_si);
7502 }
7503
7504 if (!gsi_end_p (pattern_def_si))
7505 {
7506 if (dump_enabled_p ())
7507 {
7508 dump_printf_loc (MSG_NOTE, vect_location,
7509 "==> vectorizing pattern def "
7510 "stmt: ");
7511 dump_gimple_stmt (MSG_NOTE, TDF_SLIM,
7512 pattern_def_stmt, 0);
7513 }
7514
7515 stmt = pattern_def_stmt;
7516 stmt_info = pattern_def_stmt_info;
7517 }
7518 else
7519 {
7520 pattern_def_si = gsi_none ();
7521 transform_pattern_stmt = false;
7522 }
7523 }
7524 else
7525 transform_pattern_stmt = false;
7526 }
7527
7528 if (STMT_VINFO_VECTYPE (stmt_info))
7529 {
7530 unsigned int nunits
7531 = (unsigned int)
7532 TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info));
7533 if (!STMT_SLP_TYPE (stmt_info)
7534 && nunits != (unsigned int) vf
7535 && dump_enabled_p ())
7536 /* For SLP VF is set according to unrolling factor, and not
7537 to vector size, hence for SLP this print is not valid. */
7538 dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n");
7539 }
7540
7541 /* SLP. Schedule all the SLP instances when the first SLP stmt is
7542 reached. */
7543 if (STMT_SLP_TYPE (stmt_info))
7544 {
7545 if (!slp_scheduled)
7546 {
7547 slp_scheduled = true;
7548
7549 if (dump_enabled_p ())
7550 dump_printf_loc (MSG_NOTE, vect_location,
7551 "=== scheduling SLP instances ===\n");
7552
7553 vect_schedule_slp (loop_vinfo);
7554 }
7555
7556 /* Hybrid SLP stmts must be vectorized in addition to SLP. */
7557 if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info))
7558 {
7559 if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
7560 {
7561 pattern_def_seq = NULL;
7562 gsi_next (&si);
7563 }
7564 continue;
7565 } 8398 }
7566 } 8399 }
7567 8400 }
7568 /* -------- vectorize statement ------------ */
7569 if (dump_enabled_p ())
7570 dump_printf_loc (MSG_NOTE, vect_location, "transform statement.\n");
7571
7572 grouped_store = false;
7573 is_store = vect_transform_stmt (stmt, &si, &grouped_store, NULL, NULL);
7574 if (is_store)
7575 {
7576 if (STMT_VINFO_GROUPED_ACCESS (stmt_info))
7577 {
7578 /* Interleaving. If IS_STORE is TRUE, the vectorization of the
7579 interleaving chain was completed - free all the stores in
7580 the chain. */
7581 gsi_next (&si);
7582 vect_remove_stores (GROUP_FIRST_ELEMENT (stmt_info));
7583 }
7584 else
7585 {
7586 /* Free the attached stmt_vec_info and remove the stmt. */
7587 gimple *store = gsi_stmt (si);
7588 free_stmt_vec_info (store);
7589 unlink_stmt_vdef (store);
7590 gsi_remove (&si, true);
7591 release_defs (store);
7592 }
7593
7594 /* Stores can only appear at the end of pattern statements. */
7595 gcc_assert (!transform_pattern_stmt);
7596 pattern_def_seq = NULL;
7597 }
7598 else if (!transform_pattern_stmt && gsi_end_p (pattern_def_si))
7599 {
7600 pattern_def_seq = NULL;
7601 gsi_next (&si);
7602 }
7603 } /* stmts in BB */
7604 } /* BBs in loop */ 8401 } /* BBs in loop */
7605 8402
7606 slpeel_make_loop_iterate_ntimes (loop, niters_vector); 8403 /* The vectorization factor is always > 1, so if we use an IV increment of 1.
7607 8404 a zero NITERS becomes a nonzero NITERS_VECTOR. */
7608 scale_profile_for_vect_loop (loop, vf); 8405 if (integer_onep (step_vector))
7609 8406 niters_no_overflow = true;
8407 vect_set_loop_condition (loop, loop_vinfo, niters_vector, step_vector,
8408 niters_vector_mult_vf, !niters_no_overflow);
8409
8410 unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo);
8411 scale_profile_for_vect_loop (loop, assumed_vf);
8412
8413 /* True if the final iteration might not handle a full vector's
8414 worth of scalar iterations. */
8415 bool final_iter_may_be_partial = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo);
7610 /* The minimum number of iterations performed by the epilogue. This 8416 /* The minimum number of iterations performed by the epilogue. This
7611 is 1 when peeling for gaps because we always need a final scalar 8417 is 1 when peeling for gaps because we always need a final scalar
7612 iteration. */ 8418 iteration. */
7613 int min_epilogue_iters = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? 1 : 0; 8419 int min_epilogue_iters = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? 1 : 0;
7614 /* +1 to convert latch counts to loop iteration counts, 8420 /* +1 to convert latch counts to loop iteration counts,
7615 -min_epilogue_iters to remove iterations that cannot be performed 8421 -min_epilogue_iters to remove iterations that cannot be performed
7616 by the vector code. */ 8422 by the vector code. */
7617 int bias = 1 - min_epilogue_iters; 8423 int bias_for_lowest = 1 - min_epilogue_iters;
8424 int bias_for_assumed = bias_for_lowest;
8425 int alignment_npeels = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
8426 if (alignment_npeels && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
8427 {
8428 /* When the amount of peeling is known at compile time, the first
8429 iteration will have exactly alignment_npeels active elements.
8430 In the worst case it will have at least one. */
8431 int min_first_active = (alignment_npeels > 0 ? alignment_npeels : 1);
8432 bias_for_lowest += lowest_vf - min_first_active;
8433 bias_for_assumed += assumed_vf - min_first_active;
8434 }
7618 /* In these calculations the "- 1" converts loop iteration counts 8435 /* In these calculations the "- 1" converts loop iteration counts
7619 back to latch counts. */ 8436 back to latch counts. */
7620 if (loop->any_upper_bound) 8437 if (loop->any_upper_bound)
7621 loop->nb_iterations_upper_bound 8438 loop->nb_iterations_upper_bound
7622 = wi::udiv_floor (loop->nb_iterations_upper_bound + bias, vf) - 1; 8439 = (final_iter_may_be_partial
8440 ? wi::udiv_ceil (loop->nb_iterations_upper_bound + bias_for_lowest,
8441 lowest_vf) - 1
8442 : wi::udiv_floor (loop->nb_iterations_upper_bound + bias_for_lowest,
8443 lowest_vf) - 1);
7623 if (loop->any_likely_upper_bound) 8444 if (loop->any_likely_upper_bound)
7624 loop->nb_iterations_likely_upper_bound 8445 loop->nb_iterations_likely_upper_bound
7625 = wi::udiv_floor (loop->nb_iterations_likely_upper_bound + bias, vf) - 1; 8446 = (final_iter_may_be_partial
8447 ? wi::udiv_ceil (loop->nb_iterations_likely_upper_bound
8448 + bias_for_lowest, lowest_vf) - 1
8449 : wi::udiv_floor (loop->nb_iterations_likely_upper_bound
8450 + bias_for_lowest, lowest_vf) - 1);
7626 if (loop->any_estimate) 8451 if (loop->any_estimate)
7627 loop->nb_iterations_estimate 8452 loop->nb_iterations_estimate
7628 = wi::udiv_floor (loop->nb_iterations_estimate + bias, vf) - 1; 8453 = (final_iter_may_be_partial
8454 ? wi::udiv_ceil (loop->nb_iterations_estimate + bias_for_assumed,
8455 assumed_vf) - 1
8456 : wi::udiv_floor (loop->nb_iterations_estimate + bias_for_assumed,
8457 assumed_vf) - 1);
7629 8458
7630 if (dump_enabled_p ()) 8459 if (dump_enabled_p ())
7631 { 8460 {
7632 if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo)) 8461 if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo))
7633 { 8462 {
7637 dump_printf_loc (MSG_NOTE, vect_location, 8466 dump_printf_loc (MSG_NOTE, vect_location,
7638 "OUTER LOOP VECTORIZED\n"); 8467 "OUTER LOOP VECTORIZED\n");
7639 dump_printf (MSG_NOTE, "\n"); 8468 dump_printf (MSG_NOTE, "\n");
7640 } 8469 }
7641 else 8470 else
7642 dump_printf_loc (MSG_NOTE, vect_location, 8471 {
7643 "LOOP EPILOGUE VECTORIZED (VS=%d)\n", 8472 dump_printf_loc (MSG_NOTE, vect_location,
7644 current_vector_size); 8473 "LOOP EPILOGUE VECTORIZED (VS=");
8474 dump_dec (MSG_NOTE, current_vector_size);
8475 dump_printf (MSG_NOTE, ")\n");
8476 }
7645 } 8477 }
7646 8478
7647 /* Free SLP instances here because otherwise stmt reference counting 8479 /* Free SLP instances here because otherwise stmt reference counting
7648 won't work. */ 8480 won't work. */
7649 slp_instance instance; 8481 slp_instance instance;
7650 FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance) 8482 FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance)
7651 vect_free_slp_instance (instance); 8483 vect_free_slp_instance (instance, true);
7652 LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release (); 8484 LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release ();
7653 /* Clear-up safelen field since its value is invalid after vectorization 8485 /* Clear-up safelen field since its value is invalid after vectorization
7654 since vectorized loop can have loop-carried dependencies. */ 8486 since vectorized loop can have loop-carried dependencies. */
7655 loop->safelen = 0; 8487 loop->safelen = 0;
7656 8488
7657 /* Don't vectorize epilogue for epilogue. */ 8489 /* Don't vectorize epilogue for epilogue. */
7658 if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)) 8490 if (LOOP_VINFO_EPILOGUE_P (loop_vinfo))
7659 epilogue = NULL; 8491 epilogue = NULL;
7660 8492
8493 if (!PARAM_VALUE (PARAM_VECT_EPILOGUES_NOMASK))
8494 epilogue = NULL;
8495
7661 if (epilogue) 8496 if (epilogue)
7662 { 8497 {
7663 unsigned int vector_sizes 8498 auto_vector_sizes vector_sizes;
7664 = targetm.vectorize.autovectorize_vector_sizes (); 8499 targetm.vectorize.autovectorize_vector_sizes (&vector_sizes);
7665 vector_sizes &= current_vector_size - 1; 8500 unsigned int next_size = 0;
7666 8501
7667 if (!PARAM_VALUE (PARAM_VECT_EPILOGUES_NOMASK)) 8502 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
7668 epilogue = NULL; 8503 && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0
7669 else if (!vector_sizes) 8504 && known_eq (vf, lowest_vf))
7670 epilogue = NULL; 8505 {
7671 else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) 8506 unsigned int eiters
7672 && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0) 8507 = (LOOP_VINFO_INT_NITERS (loop_vinfo)
7673 { 8508 - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo));
7674 int smallest_vec_size = 1 << ctz_hwi (vector_sizes); 8509 eiters = eiters % lowest_vf;
7675 int ratio = current_vector_size / smallest_vec_size; 8510 epilogue->nb_iterations_upper_bound = eiters - 1;
7676 int eiters = LOOP_VINFO_INT_NITERS (loop_vinfo) 8511
7677 - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo); 8512 unsigned int ratio;
7678 eiters = eiters % vf; 8513 while (next_size < vector_sizes.length ()
7679 8514 && !(constant_multiple_p (current_vector_size,
7680 epilogue->nb_iterations_upper_bound = eiters - 1; 8515 vector_sizes[next_size], &ratio)
7681 8516 && eiters >= lowest_vf / ratio))
7682 if (eiters < vf / ratio) 8517 next_size += 1;
7683 epilogue = NULL; 8518 }
7684 } 8519 else
8520 while (next_size < vector_sizes.length ()
8521 && maybe_lt (current_vector_size, vector_sizes[next_size]))
8522 next_size += 1;
8523
8524 if (next_size == vector_sizes.length ())
8525 epilogue = NULL;
7685 } 8526 }
7686 8527
7687 if (epilogue) 8528 if (epilogue)
7688 { 8529 {
7689 epilogue->force_vectorize = loop->force_vectorize; 8530 epilogue->force_vectorize = loop->force_vectorize;
7779 add_bb_to_loop (store_bb, bb_loop); 8620 add_bb_to_loop (store_bb, bb_loop);
7780 e->flags = EDGE_TRUE_VALUE; 8621 e->flags = EDGE_TRUE_VALUE;
7781 efalse = make_edge (bb, store_bb, EDGE_FALSE_VALUE); 8622 efalse = make_edge (bb, store_bb, EDGE_FALSE_VALUE);
7782 /* Put STORE_BB to likely part. */ 8623 /* Put STORE_BB to likely part. */
7783 efalse->probability = profile_probability::unlikely (); 8624 efalse->probability = profile_probability::unlikely ();
7784 store_bb->frequency = PROB_ALWAYS - EDGE_FREQUENCY (efalse); 8625 store_bb->count = efalse->count ();
7785 make_single_succ_edge (store_bb, join_bb, EDGE_FALLTHRU); 8626 make_single_succ_edge (store_bb, join_bb, EDGE_FALLTHRU);
7786 if (dom_info_available_p (CDI_DOMINATORS)) 8627 if (dom_info_available_p (CDI_DOMINATORS))
7787 set_immediate_dominator (CDI_DOMINATORS, store_bb, bb); 8628 set_immediate_dominator (CDI_DOMINATORS, store_bb, bb);
7788 if (dump_enabled_p ()) 8629 if (dump_enabled_p ())
7789 dump_printf_loc (MSG_NOTE, vect_location, 8630 dump_printf_loc (MSG_NOTE, vect_location,
7823 gsi_to = gsi_start_bb (store_bb); 8664 gsi_to = gsi_start_bb (store_bb);
7824 gsi_move_before (&gsi_from, &gsi_to); 8665 gsi_move_before (&gsi_from, &gsi_to);
7825 /* Setup GSI_TO to the non-empty block start. */ 8666 /* Setup GSI_TO to the non-empty block start. */
7826 gsi_to = gsi_start_bb (store_bb); 8667 gsi_to = gsi_start_bb (store_bb);
7827 if (dump_enabled_p ()) 8668 if (dump_enabled_p ())
7828 { 8669 dump_printf_loc (MSG_NOTE, vect_location,
7829 dump_printf_loc (MSG_NOTE, vect_location, 8670 "Move stmt to created bb\n%G", last);
7830 "Move stmt to created bb\n");
7831 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, last, 0);
7832 }
7833 /* Move all stored value producers if possible. */ 8671 /* Move all stored value producers if possible. */
7834 while (!gsi_end_p (gsi)) 8672 while (!gsi_end_p (gsi))
7835 { 8673 {
7836 tree lhs; 8674 tree lhs;
7837 imm_use_iterator imm_iter; 8675 imm_use_iterator imm_iter;
7891 && gimple_vuse (stmt1) != gimple_vuse (last_store)) 8729 && gimple_vuse (stmt1) != gimple_vuse (last_store))
7892 break; 8730 break;
7893 8731
7894 /* Can move STMT1 to STORE_BB. */ 8732 /* Can move STMT1 to STORE_BB. */
7895 if (dump_enabled_p ()) 8733 if (dump_enabled_p ())
7896 { 8734 dump_printf_loc (MSG_NOTE, vect_location,
7897 dump_printf_loc (MSG_NOTE, vect_location, 8735 "Move stmt to created bb\n%G", stmt1);
7898 "Move stmt to created bb\n");
7899 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt1, 0);
7900 }
7901 gsi_move_before (&gsi_from, &gsi_to); 8736 gsi_move_before (&gsi_from, &gsi_to);
7902 /* Shift GSI_TO for further insertion. */ 8737 /* Shift GSI_TO for further insertion. */
7903 gsi_prev (&gsi_to); 8738 gsi_prev (&gsi_to);
7904 } 8739 }
7905 /* Put other masked stores with the same mask to STORE_BB. */ 8740 /* Put other masked stores with the same mask to STORE_BB. */