Mercurial > hg > CbC > CbC_gcc
comparison gcc/tree-vect-loop.c @ 131:84e7813d76e9
gcc-8.2
author | mir3636 |
---|---|
date | Thu, 25 Oct 2018 07:37:49 +0900 |
parents | 04ced10e8804 |
children | 1830386684a0 |
comparison
equal
deleted
inserted
replaced
111:04ced10e8804 | 131:84e7813d76e9 |
---|---|
1 /* Loop Vectorization | 1 /* Loop Vectorization |
2 Copyright (C) 2003-2017 Free Software Foundation, Inc. | 2 Copyright (C) 2003-2018 Free Software Foundation, Inc. |
3 Contributed by Dorit Naishlos <dorit@il.ibm.com> and | 3 Contributed by Dorit Naishlos <dorit@il.ibm.com> and |
4 Ira Rosen <irar@il.ibm.com> | 4 Ira Rosen <irar@il.ibm.com> |
5 | 5 |
6 This file is part of GCC. | 6 This file is part of GCC. |
7 | 7 |
48 #include "tree-vectorizer.h" | 48 #include "tree-vectorizer.h" |
49 #include "gimple-fold.h" | 49 #include "gimple-fold.h" |
50 #include "cgraph.h" | 50 #include "cgraph.h" |
51 #include "tree-cfg.h" | 51 #include "tree-cfg.h" |
52 #include "tree-if-conv.h" | 52 #include "tree-if-conv.h" |
53 #include "internal-fn.h" | |
54 #include "tree-vector-builder.h" | |
55 #include "vec-perm-indices.h" | |
56 #include "tree-eh.h" | |
53 | 57 |
54 /* Loop Vectorization Pass. | 58 /* Loop Vectorization Pass. |
55 | 59 |
56 This pass tries to vectorize loops. | 60 This pass tries to vectorize loops. |
57 | 61 |
149 http://gcc.gnu.org/projects/tree-ssa/vectorization.html | 153 http://gcc.gnu.org/projects/tree-ssa/vectorization.html |
150 */ | 154 */ |
151 | 155 |
152 static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *); | 156 static void vect_estimate_min_profitable_iters (loop_vec_info, int *, int *); |
153 | 157 |
158 /* Subroutine of vect_determine_vf_for_stmt that handles only one | |
159 statement. VECTYPE_MAYBE_SET_P is true if STMT_VINFO_VECTYPE | |
160 may already be set for general statements (not just data refs). */ | |
161 | |
162 static opt_result | |
163 vect_determine_vf_for_stmt_1 (stmt_vec_info stmt_info, | |
164 bool vectype_maybe_set_p, | |
165 poly_uint64 *vf, | |
166 vec<stmt_vec_info > *mask_producers) | |
167 { | |
168 gimple *stmt = stmt_info->stmt; | |
169 | |
170 if ((!STMT_VINFO_RELEVANT_P (stmt_info) | |
171 && !STMT_VINFO_LIVE_P (stmt_info)) | |
172 || gimple_clobber_p (stmt)) | |
173 { | |
174 if (dump_enabled_p ()) | |
175 dump_printf_loc (MSG_NOTE, vect_location, "skip.\n"); | |
176 return opt_result::success (); | |
177 } | |
178 | |
179 tree stmt_vectype, nunits_vectype; | |
180 opt_result res = vect_get_vector_types_for_stmt (stmt_info, &stmt_vectype, | |
181 &nunits_vectype); | |
182 if (!res) | |
183 return res; | |
184 | |
185 if (stmt_vectype) | |
186 { | |
187 if (STMT_VINFO_VECTYPE (stmt_info)) | |
188 /* The only case when a vectype had been already set is for stmts | |
189 that contain a data ref, or for "pattern-stmts" (stmts generated | |
190 by the vectorizer to represent/replace a certain idiom). */ | |
191 gcc_assert ((STMT_VINFO_DATA_REF (stmt_info) | |
192 || vectype_maybe_set_p) | |
193 && STMT_VINFO_VECTYPE (stmt_info) == stmt_vectype); | |
194 else if (stmt_vectype == boolean_type_node) | |
195 mask_producers->safe_push (stmt_info); | |
196 else | |
197 STMT_VINFO_VECTYPE (stmt_info) = stmt_vectype; | |
198 } | |
199 | |
200 if (nunits_vectype) | |
201 vect_update_max_nunits (vf, nunits_vectype); | |
202 | |
203 return opt_result::success (); | |
204 } | |
205 | |
206 /* Subroutine of vect_determine_vectorization_factor. Set the vector | |
207 types of STMT_INFO and all attached pattern statements and update | |
208 the vectorization factor VF accordingly. If some of the statements | |
209 produce a mask result whose vector type can only be calculated later, | |
210 add them to MASK_PRODUCERS. Return true on success or false if | |
211 something prevented vectorization. */ | |
212 | |
213 static opt_result | |
214 vect_determine_vf_for_stmt (stmt_vec_info stmt_info, poly_uint64 *vf, | |
215 vec<stmt_vec_info > *mask_producers) | |
216 { | |
217 vec_info *vinfo = stmt_info->vinfo; | |
218 if (dump_enabled_p ()) | |
219 dump_printf_loc (MSG_NOTE, vect_location, "==> examining statement: %G", | |
220 stmt_info->stmt); | |
221 opt_result res | |
222 = vect_determine_vf_for_stmt_1 (stmt_info, false, vf, mask_producers); | |
223 if (!res) | |
224 return res; | |
225 | |
226 if (STMT_VINFO_IN_PATTERN_P (stmt_info) | |
227 && STMT_VINFO_RELATED_STMT (stmt_info)) | |
228 { | |
229 gimple *pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info); | |
230 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info); | |
231 | |
232 /* If a pattern statement has def stmts, analyze them too. */ | |
233 for (gimple_stmt_iterator si = gsi_start (pattern_def_seq); | |
234 !gsi_end_p (si); gsi_next (&si)) | |
235 { | |
236 stmt_vec_info def_stmt_info = vinfo->lookup_stmt (gsi_stmt (si)); | |
237 if (dump_enabled_p ()) | |
238 dump_printf_loc (MSG_NOTE, vect_location, | |
239 "==> examining pattern def stmt: %G", | |
240 def_stmt_info->stmt); | |
241 if (!vect_determine_vf_for_stmt_1 (def_stmt_info, true, | |
242 vf, mask_producers)) | |
243 res = vect_determine_vf_for_stmt_1 (def_stmt_info, true, | |
244 vf, mask_producers); | |
245 if (!res) | |
246 return res; | |
247 } | |
248 | |
249 if (dump_enabled_p ()) | |
250 dump_printf_loc (MSG_NOTE, vect_location, | |
251 "==> examining pattern statement: %G", | |
252 stmt_info->stmt); | |
253 res = vect_determine_vf_for_stmt_1 (stmt_info, true, vf, mask_producers); | |
254 if (!res) | |
255 return res; | |
256 } | |
257 | |
258 return opt_result::success (); | |
259 } | |
260 | |
154 /* Function vect_determine_vectorization_factor | 261 /* Function vect_determine_vectorization_factor |
155 | 262 |
156 Determine the vectorization factor (VF). VF is the number of data elements | 263 Determine the vectorization factor (VF). VF is the number of data elements |
157 that are operated upon in parallel in a single iteration of the vectorized | 264 that are operated upon in parallel in a single iteration of the vectorized |
158 loop. For example, when vectorizing a loop that operates on 4byte elements, | 265 loop. For example, when vectorizing a loop that operates on 4byte elements, |
174 for (i=0; i<N; i+=VF){ | 281 for (i=0; i<N; i+=VF){ |
175 a[i:VF] = b[i:VF] + c[i:VF]; | 282 a[i:VF] = b[i:VF] + c[i:VF]; |
176 } | 283 } |
177 */ | 284 */ |
178 | 285 |
179 static bool | 286 static opt_result |
180 vect_determine_vectorization_factor (loop_vec_info loop_vinfo) | 287 vect_determine_vectorization_factor (loop_vec_info loop_vinfo) |
181 { | 288 { |
182 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | 289 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); |
183 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); | 290 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); |
184 unsigned nbbs = loop->num_nodes; | 291 unsigned nbbs = loop->num_nodes; |
185 unsigned int vectorization_factor = 0; | 292 poly_uint64 vectorization_factor = 1; |
186 tree scalar_type = NULL_TREE; | 293 tree scalar_type = NULL_TREE; |
187 gphi *phi; | 294 gphi *phi; |
188 tree vectype; | 295 tree vectype; |
189 unsigned int nunits; | |
190 stmt_vec_info stmt_info; | 296 stmt_vec_info stmt_info; |
191 unsigned i; | 297 unsigned i; |
192 HOST_WIDE_INT dummy; | |
193 gimple *stmt, *pattern_stmt = NULL; | |
194 gimple_seq pattern_def_seq = NULL; | |
195 gimple_stmt_iterator pattern_def_si = gsi_none (); | |
196 bool analyze_pattern_stmt = false; | |
197 bool bool_result; | |
198 auto_vec<stmt_vec_info> mask_producers; | 298 auto_vec<stmt_vec_info> mask_producers; |
199 | 299 |
200 if (dump_enabled_p ()) | 300 DUMP_VECT_SCOPE ("vect_determine_vectorization_factor"); |
201 dump_printf_loc (MSG_NOTE, vect_location, | |
202 "=== vect_determine_vectorization_factor ===\n"); | |
203 | 301 |
204 for (i = 0; i < nbbs; i++) | 302 for (i = 0; i < nbbs; i++) |
205 { | 303 { |
206 basic_block bb = bbs[i]; | 304 basic_block bb = bbs[i]; |
207 | 305 |
208 for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si); | 306 for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si); |
209 gsi_next (&si)) | 307 gsi_next (&si)) |
210 { | 308 { |
211 phi = si.phi (); | 309 phi = si.phi (); |
212 stmt_info = vinfo_for_stmt (phi); | 310 stmt_info = loop_vinfo->lookup_stmt (phi); |
213 if (dump_enabled_p ()) | 311 if (dump_enabled_p ()) |
214 { | 312 dump_printf_loc (MSG_NOTE, vect_location, "==> examining phi: %G", |
215 dump_printf_loc (MSG_NOTE, vect_location, "==> examining phi: "); | 313 phi); |
216 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0); | |
217 } | |
218 | 314 |
219 gcc_assert (stmt_info); | 315 gcc_assert (stmt_info); |
220 | 316 |
221 if (STMT_VINFO_RELEVANT_P (stmt_info) | 317 if (STMT_VINFO_RELEVANT_P (stmt_info) |
222 || STMT_VINFO_LIVE_P (stmt_info)) | 318 || STMT_VINFO_LIVE_P (stmt_info)) |
223 { | 319 { |
224 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info)); | 320 gcc_assert (!STMT_VINFO_VECTYPE (stmt_info)); |
225 scalar_type = TREE_TYPE (PHI_RESULT (phi)); | 321 scalar_type = TREE_TYPE (PHI_RESULT (phi)); |
226 | 322 |
227 if (dump_enabled_p ()) | 323 if (dump_enabled_p ()) |
228 { | 324 dump_printf_loc (MSG_NOTE, vect_location, |
229 dump_printf_loc (MSG_NOTE, vect_location, | 325 "get vectype for scalar type: %T\n", |
230 "get vectype for scalar type: "); | 326 scalar_type); |
231 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type); | |
232 dump_printf (MSG_NOTE, "\n"); | |
233 } | |
234 | 327 |
235 vectype = get_vectype_for_scalar_type (scalar_type); | 328 vectype = get_vectype_for_scalar_type (scalar_type); |
236 if (!vectype) | 329 if (!vectype) |
237 { | 330 return opt_result::failure_at (phi, |
238 if (dump_enabled_p ()) | 331 "not vectorized: unsupported " |
239 { | 332 "data-type %T\n", |
240 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 333 scalar_type); |
241 "not vectorized: unsupported " | |
242 "data-type "); | |
243 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, | |
244 scalar_type); | |
245 dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); | |
246 } | |
247 return false; | |
248 } | |
249 STMT_VINFO_VECTYPE (stmt_info) = vectype; | 334 STMT_VINFO_VECTYPE (stmt_info) = vectype; |
335 | |
336 if (dump_enabled_p ()) | |
337 dump_printf_loc (MSG_NOTE, vect_location, "vectype: %T\n", | |
338 vectype); | |
250 | 339 |
251 if (dump_enabled_p ()) | 340 if (dump_enabled_p ()) |
252 { | 341 { |
253 dump_printf_loc (MSG_NOTE, vect_location, "vectype: "); | 342 dump_printf_loc (MSG_NOTE, vect_location, "nunits = "); |
254 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype); | 343 dump_dec (MSG_NOTE, TYPE_VECTOR_SUBPARTS (vectype)); |
255 dump_printf (MSG_NOTE, "\n"); | |
256 } | |
257 | |
258 nunits = TYPE_VECTOR_SUBPARTS (vectype); | |
259 if (dump_enabled_p ()) | |
260 dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n", | |
261 nunits); | |
262 | |
263 if (!vectorization_factor | |
264 || (nunits > vectorization_factor)) | |
265 vectorization_factor = nunits; | |
266 } | |
267 } | |
268 | |
269 for (gimple_stmt_iterator si = gsi_start_bb (bb); | |
270 !gsi_end_p (si) || analyze_pattern_stmt;) | |
271 { | |
272 tree vf_vectype; | |
273 | |
274 if (analyze_pattern_stmt) | |
275 stmt = pattern_stmt; | |
276 else | |
277 stmt = gsi_stmt (si); | |
278 | |
279 stmt_info = vinfo_for_stmt (stmt); | |
280 | |
281 if (dump_enabled_p ()) | |
282 { | |
283 dump_printf_loc (MSG_NOTE, vect_location, | |
284 "==> examining statement: "); | |
285 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); | |
286 } | |
287 | |
288 gcc_assert (stmt_info); | |
289 | |
290 /* Skip stmts which do not need to be vectorized. */ | |
291 if ((!STMT_VINFO_RELEVANT_P (stmt_info) | |
292 && !STMT_VINFO_LIVE_P (stmt_info)) | |
293 || gimple_clobber_p (stmt)) | |
294 { | |
295 if (STMT_VINFO_IN_PATTERN_P (stmt_info) | |
296 && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info)) | |
297 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) | |
298 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) | |
299 { | |
300 stmt = pattern_stmt; | |
301 stmt_info = vinfo_for_stmt (pattern_stmt); | |
302 if (dump_enabled_p ()) | |
303 { | |
304 dump_printf_loc (MSG_NOTE, vect_location, | |
305 "==> examining pattern statement: "); | |
306 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); | |
307 } | |
308 } | |
309 else | |
310 { | |
311 if (dump_enabled_p ()) | |
312 dump_printf_loc (MSG_NOTE, vect_location, "skip.\n"); | |
313 gsi_next (&si); | |
314 continue; | |
315 } | |
316 } | |
317 else if (STMT_VINFO_IN_PATTERN_P (stmt_info) | |
318 && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info)) | |
319 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) | |
320 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) | |
321 analyze_pattern_stmt = true; | |
322 | |
323 /* If a pattern statement has def stmts, analyze them too. */ | |
324 if (is_pattern_stmt_p (stmt_info)) | |
325 { | |
326 if (pattern_def_seq == NULL) | |
327 { | |
328 pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info); | |
329 pattern_def_si = gsi_start (pattern_def_seq); | |
330 } | |
331 else if (!gsi_end_p (pattern_def_si)) | |
332 gsi_next (&pattern_def_si); | |
333 if (pattern_def_seq != NULL) | |
334 { | |
335 gimple *pattern_def_stmt = NULL; | |
336 stmt_vec_info pattern_def_stmt_info = NULL; | |
337 | |
338 while (!gsi_end_p (pattern_def_si)) | |
339 { | |
340 pattern_def_stmt = gsi_stmt (pattern_def_si); | |
341 pattern_def_stmt_info | |
342 = vinfo_for_stmt (pattern_def_stmt); | |
343 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info) | |
344 || STMT_VINFO_LIVE_P (pattern_def_stmt_info)) | |
345 break; | |
346 gsi_next (&pattern_def_si); | |
347 } | |
348 | |
349 if (!gsi_end_p (pattern_def_si)) | |
350 { | |
351 if (dump_enabled_p ()) | |
352 { | |
353 dump_printf_loc (MSG_NOTE, vect_location, | |
354 "==> examining pattern def stmt: "); | |
355 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, | |
356 pattern_def_stmt, 0); | |
357 } | |
358 | |
359 stmt = pattern_def_stmt; | |
360 stmt_info = pattern_def_stmt_info; | |
361 } | |
362 else | |
363 { | |
364 pattern_def_si = gsi_none (); | |
365 analyze_pattern_stmt = false; | |
366 } | |
367 } | |
368 else | |
369 analyze_pattern_stmt = false; | |
370 } | |
371 | |
372 if (gimple_get_lhs (stmt) == NULL_TREE | |
373 /* MASK_STORE has no lhs, but is ok. */ | |
374 && (!is_gimple_call (stmt) | |
375 || !gimple_call_internal_p (stmt) | |
376 || gimple_call_internal_fn (stmt) != IFN_MASK_STORE)) | |
377 { | |
378 if (is_gimple_call (stmt)) | |
379 { | |
380 /* Ignore calls with no lhs. These must be calls to | |
381 #pragma omp simd functions, and what vectorization factor | |
382 it really needs can't be determined until | |
383 vectorizable_simd_clone_call. */ | |
384 if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si)) | |
385 { | |
386 pattern_def_seq = NULL; | |
387 gsi_next (&si); | |
388 } | |
389 continue; | |
390 } | |
391 if (dump_enabled_p ()) | |
392 { | |
393 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
394 "not vectorized: irregular stmt."); | |
395 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, | |
396 0); | |
397 } | |
398 return false; | |
399 } | |
400 | |
401 if (VECTOR_MODE_P (TYPE_MODE (gimple_expr_type (stmt)))) | |
402 { | |
403 if (dump_enabled_p ()) | |
404 { | |
405 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
406 "not vectorized: vector stmt in loop:"); | |
407 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, 0); | |
408 } | |
409 return false; | |
410 } | |
411 | |
412 bool_result = false; | |
413 | |
414 if (STMT_VINFO_VECTYPE (stmt_info)) | |
415 { | |
416 /* The only case when a vectype had been already set is for stmts | |
417 that contain a dataref, or for "pattern-stmts" (stmts | |
418 generated by the vectorizer to represent/replace a certain | |
419 idiom). */ | |
420 gcc_assert (STMT_VINFO_DATA_REF (stmt_info) | |
421 || is_pattern_stmt_p (stmt_info) | |
422 || !gsi_end_p (pattern_def_si)); | |
423 vectype = STMT_VINFO_VECTYPE (stmt_info); | |
424 } | |
425 else | |
426 { | |
427 gcc_assert (!STMT_VINFO_DATA_REF (stmt_info)); | |
428 if (gimple_call_internal_p (stmt, IFN_MASK_STORE)) | |
429 scalar_type = TREE_TYPE (gimple_call_arg (stmt, 3)); | |
430 else | |
431 scalar_type = TREE_TYPE (gimple_get_lhs (stmt)); | |
432 | |
433 /* Bool ops don't participate in vectorization factor | |
434 computation. For comparison use compared types to | |
435 compute a factor. */ | |
436 if (VECT_SCALAR_BOOLEAN_TYPE_P (scalar_type) | |
437 && is_gimple_assign (stmt) | |
438 && gimple_assign_rhs_code (stmt) != COND_EXPR) | |
439 { | |
440 if (STMT_VINFO_RELEVANT_P (stmt_info) | |
441 || STMT_VINFO_LIVE_P (stmt_info)) | |
442 mask_producers.safe_push (stmt_info); | |
443 bool_result = true; | |
444 | |
445 if (TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) | |
446 == tcc_comparison | |
447 && !VECT_SCALAR_BOOLEAN_TYPE_P | |
448 (TREE_TYPE (gimple_assign_rhs1 (stmt)))) | |
449 scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt)); | |
450 else | |
451 { | |
452 if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si)) | |
453 { | |
454 pattern_def_seq = NULL; | |
455 gsi_next (&si); | |
456 } | |
457 continue; | |
458 } | |
459 } | |
460 | |
461 if (dump_enabled_p ()) | |
462 { | |
463 dump_printf_loc (MSG_NOTE, vect_location, | |
464 "get vectype for scalar type: "); | |
465 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type); | |
466 dump_printf (MSG_NOTE, "\n"); | |
467 } | |
468 vectype = get_vectype_for_scalar_type (scalar_type); | |
469 if (!vectype) | |
470 { | |
471 if (dump_enabled_p ()) | |
472 { | |
473 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
474 "not vectorized: unsupported " | |
475 "data-type "); | |
476 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, | |
477 scalar_type); | |
478 dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); | |
479 } | |
480 return false; | |
481 } | |
482 | |
483 if (!bool_result) | |
484 STMT_VINFO_VECTYPE (stmt_info) = vectype; | |
485 | |
486 if (dump_enabled_p ()) | |
487 { | |
488 dump_printf_loc (MSG_NOTE, vect_location, "vectype: "); | |
489 dump_generic_expr (MSG_NOTE, TDF_SLIM, vectype); | |
490 dump_printf (MSG_NOTE, "\n"); | |
491 } | |
492 } | |
493 | |
494 /* Don't try to compute VF out scalar types if we stmt | |
495 produces boolean vector. Use result vectype instead. */ | |
496 if (VECTOR_BOOLEAN_TYPE_P (vectype)) | |
497 vf_vectype = vectype; | |
498 else | |
499 { | |
500 /* The vectorization factor is according to the smallest | |
501 scalar type (or the largest vector size, but we only | |
502 support one vector size per loop). */ | |
503 if (!bool_result) | |
504 scalar_type = vect_get_smallest_scalar_type (stmt, &dummy, | |
505 &dummy); | |
506 if (dump_enabled_p ()) | |
507 { | |
508 dump_printf_loc (MSG_NOTE, vect_location, | |
509 "get vectype for scalar type: "); | |
510 dump_generic_expr (MSG_NOTE, TDF_SLIM, scalar_type); | |
511 dump_printf (MSG_NOTE, "\n"); | 344 dump_printf (MSG_NOTE, "\n"); |
512 } | 345 } |
513 vf_vectype = get_vectype_for_scalar_type (scalar_type); | 346 |
347 vect_update_max_nunits (&vectorization_factor, vectype); | |
514 } | 348 } |
515 if (!vf_vectype) | 349 } |
516 { | 350 |
517 if (dump_enabled_p ()) | 351 for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); |
518 { | 352 gsi_next (&si)) |
519 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 353 { |
520 "not vectorized: unsupported data-type "); | 354 stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); |
521 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, | 355 opt_result res |
522 scalar_type); | 356 = vect_determine_vf_for_stmt (stmt_info, &vectorization_factor, |
523 dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); | 357 &mask_producers); |
524 } | 358 if (!res) |
525 return false; | 359 return res; |
526 } | |
527 | |
528 if ((GET_MODE_SIZE (TYPE_MODE (vectype)) | |
529 != GET_MODE_SIZE (TYPE_MODE (vf_vectype)))) | |
530 { | |
531 if (dump_enabled_p ()) | |
532 { | |
533 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
534 "not vectorized: different sized vector " | |
535 "types in statement, "); | |
536 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, | |
537 vectype); | |
538 dump_printf (MSG_MISSED_OPTIMIZATION, " and "); | |
539 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, | |
540 vf_vectype); | |
541 dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); | |
542 } | |
543 return false; | |
544 } | |
545 | |
546 if (dump_enabled_p ()) | |
547 { | |
548 dump_printf_loc (MSG_NOTE, vect_location, "vectype: "); | |
549 dump_generic_expr (MSG_NOTE, TDF_SLIM, vf_vectype); | |
550 dump_printf (MSG_NOTE, "\n"); | |
551 } | |
552 | |
553 nunits = TYPE_VECTOR_SUBPARTS (vf_vectype); | |
554 if (dump_enabled_p ()) | |
555 dump_printf_loc (MSG_NOTE, vect_location, "nunits = %d\n", nunits); | |
556 if (!vectorization_factor | |
557 || (nunits > vectorization_factor)) | |
558 vectorization_factor = nunits; | |
559 | |
560 if (!analyze_pattern_stmt && gsi_end_p (pattern_def_si)) | |
561 { | |
562 pattern_def_seq = NULL; | |
563 gsi_next (&si); | |
564 } | |
565 } | 360 } |
566 } | 361 } |
567 | 362 |
568 /* TODO: Analyze cost. Decide if worth while to vectorize. */ | 363 /* TODO: Analyze cost. Decide if worth while to vectorize. */ |
569 if (dump_enabled_p ()) | 364 if (dump_enabled_p ()) |
570 dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = %d\n", | 365 { |
571 vectorization_factor); | 366 dump_printf_loc (MSG_NOTE, vect_location, "vectorization factor = "); |
572 if (vectorization_factor <= 1) | 367 dump_dec (MSG_NOTE, vectorization_factor); |
573 { | 368 dump_printf (MSG_NOTE, "\n"); |
574 if (dump_enabled_p ()) | 369 } |
575 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 370 |
576 "not vectorized: unsupported data-type\n"); | 371 if (known_le (vectorization_factor, 1U)) |
577 return false; | 372 return opt_result::failure_at (vect_location, |
578 } | 373 "not vectorized: unsupported data-type\n"); |
579 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor; | 374 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor; |
580 | 375 |
581 for (i = 0; i < mask_producers.length (); i++) | 376 for (i = 0; i < mask_producers.length (); i++) |
582 { | 377 { |
583 tree mask_type = NULL; | 378 stmt_info = mask_producers[i]; |
584 | 379 opt_tree mask_type = vect_get_mask_type_for_stmt (stmt_info); |
585 stmt = STMT_VINFO_STMT (mask_producers[i]); | |
586 | |
587 if (is_gimple_assign (stmt) | |
588 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison | |
589 && !VECT_SCALAR_BOOLEAN_TYPE_P | |
590 (TREE_TYPE (gimple_assign_rhs1 (stmt)))) | |
591 { | |
592 scalar_type = TREE_TYPE (gimple_assign_rhs1 (stmt)); | |
593 mask_type = get_mask_type_for_scalar_type (scalar_type); | |
594 | |
595 if (!mask_type) | |
596 { | |
597 if (dump_enabled_p ()) | |
598 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
599 "not vectorized: unsupported mask\n"); | |
600 return false; | |
601 } | |
602 } | |
603 else | |
604 { | |
605 tree rhs; | |
606 ssa_op_iter iter; | |
607 gimple *def_stmt; | |
608 enum vect_def_type dt; | |
609 | |
610 FOR_EACH_SSA_TREE_OPERAND (rhs, stmt, iter, SSA_OP_USE) | |
611 { | |
612 if (!vect_is_simple_use (rhs, mask_producers[i]->vinfo, | |
613 &def_stmt, &dt, &vectype)) | |
614 { | |
615 if (dump_enabled_p ()) | |
616 { | |
617 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
618 "not vectorized: can't compute mask type " | |
619 "for statement, "); | |
620 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, | |
621 0); | |
622 } | |
623 return false; | |
624 } | |
625 | |
626 /* No vectype probably means external definition. | |
627 Allow it in case there is another operand which | |
628 allows to determine mask type. */ | |
629 if (!vectype) | |
630 continue; | |
631 | |
632 if (!mask_type) | |
633 mask_type = vectype; | |
634 else if (TYPE_VECTOR_SUBPARTS (mask_type) | |
635 != TYPE_VECTOR_SUBPARTS (vectype)) | |
636 { | |
637 if (dump_enabled_p ()) | |
638 { | |
639 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
640 "not vectorized: different sized masks " | |
641 "types in statement, "); | |
642 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, | |
643 mask_type); | |
644 dump_printf (MSG_MISSED_OPTIMIZATION, " and "); | |
645 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, | |
646 vectype); | |
647 dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); | |
648 } | |
649 return false; | |
650 } | |
651 else if (VECTOR_BOOLEAN_TYPE_P (mask_type) | |
652 != VECTOR_BOOLEAN_TYPE_P (vectype)) | |
653 { | |
654 if (dump_enabled_p ()) | |
655 { | |
656 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
657 "not vectorized: mixed mask and " | |
658 "nonmask vector types in statement, "); | |
659 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, | |
660 mask_type); | |
661 dump_printf (MSG_MISSED_OPTIMIZATION, " and "); | |
662 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, | |
663 vectype); | |
664 dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); | |
665 } | |
666 return false; | |
667 } | |
668 } | |
669 | |
670 /* We may compare boolean value loaded as vector of integers. | |
671 Fix mask_type in such case. */ | |
672 if (mask_type | |
673 && !VECTOR_BOOLEAN_TYPE_P (mask_type) | |
674 && gimple_code (stmt) == GIMPLE_ASSIGN | |
675 && TREE_CODE_CLASS (gimple_assign_rhs_code (stmt)) == tcc_comparison) | |
676 mask_type = build_same_sized_truth_vector_type (mask_type); | |
677 } | |
678 | |
679 /* No mask_type should mean loop invariant predicate. | |
680 This is probably a subject for optimization in | |
681 if-conversion. */ | |
682 if (!mask_type) | 380 if (!mask_type) |
683 { | 381 return opt_result::propagate_failure (mask_type); |
684 if (dump_enabled_p ()) | 382 STMT_VINFO_VECTYPE (stmt_info) = mask_type; |
685 { | 383 } |
686 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 384 |
687 "not vectorized: can't compute mask type " | 385 return opt_result::success (); |
688 "for statement, "); | |
689 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, stmt, | |
690 0); | |
691 } | |
692 return false; | |
693 } | |
694 | |
695 STMT_VINFO_VECTYPE (mask_producers[i]) = mask_type; | |
696 } | |
697 | |
698 return true; | |
699 } | 386 } |
700 | 387 |
701 | 388 |
702 /* Function vect_is_simple_iv_evolution. | 389 /* Function vect_is_simple_iv_evolution. |
703 | 390 |
725 | 412 |
726 step_expr = evolution_part; | 413 step_expr = evolution_part; |
727 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb)); | 414 init_expr = unshare_expr (initial_condition_in_loop_num (access_fn, loop_nb)); |
728 | 415 |
729 if (dump_enabled_p ()) | 416 if (dump_enabled_p ()) |
730 { | 417 dump_printf_loc (MSG_NOTE, vect_location, "step: %T, init: %T\n", |
731 dump_printf_loc (MSG_NOTE, vect_location, "step: "); | 418 step_expr, init_expr); |
732 dump_generic_expr (MSG_NOTE, TDF_SLIM, step_expr); | |
733 dump_printf (MSG_NOTE, ", init: "); | |
734 dump_generic_expr (MSG_NOTE, TDF_SLIM, init_expr); | |
735 dump_printf (MSG_NOTE, "\n"); | |
736 } | |
737 | 419 |
738 *init = init_expr; | 420 *init = init_expr; |
739 *step = step_expr; | 421 *step = step_expr; |
740 | 422 |
741 if (TREE_CODE (step_expr) != INTEGER_CST | 423 if (TREE_CODE (step_expr) != INTEGER_CST |
755 } | 437 } |
756 | 438 |
757 return true; | 439 return true; |
758 } | 440 } |
759 | 441 |
442 /* Return true if PHI, described by STMT_INFO, is the inner PHI in | |
443 what we are assuming is a double reduction. For example, given | |
444 a structure like this: | |
445 | |
446 outer1: | |
447 x_1 = PHI <x_4(outer2), ...>; | |
448 ... | |
449 | |
450 inner: | |
451 x_2 = PHI <x_1(outer1), ...>; | |
452 ... | |
453 x_3 = ...; | |
454 ... | |
455 | |
456 outer2: | |
457 x_4 = PHI <x_3(inner)>; | |
458 ... | |
459 | |
460 outer loop analysis would treat x_1 as a double reduction phi and | |
461 this function would then return true for x_2. */ | |
462 | |
463 static bool | |
464 vect_inner_phi_in_double_reduction_p (stmt_vec_info stmt_info, gphi *phi) | |
465 { | |
466 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); | |
467 use_operand_p use_p; | |
468 ssa_op_iter op_iter; | |
469 FOR_EACH_PHI_ARG (use_p, phi, op_iter, SSA_OP_USE) | |
470 if (stmt_vec_info def_info = loop_vinfo->lookup_def (USE_FROM_PTR (use_p))) | |
471 if (STMT_VINFO_DEF_TYPE (def_info) == vect_double_reduction_def) | |
472 return true; | |
473 return false; | |
474 } | |
475 | |
760 /* Function vect_analyze_scalar_cycles_1. | 476 /* Function vect_analyze_scalar_cycles_1. |
761 | 477 |
762 Examine the cross iteration def-use cycles of scalar variables | 478 Examine the cross iteration def-use cycles of scalar variables |
763 in LOOP. LOOP_VINFO represents the loop that is now being | 479 in LOOP. LOOP_VINFO represents the loop that is now being |
764 considered for vectorization (can be LOOP, or an outer-loop | 480 considered for vectorization (can be LOOP, or an outer-loop |
767 static void | 483 static void |
768 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop) | 484 vect_analyze_scalar_cycles_1 (loop_vec_info loop_vinfo, struct loop *loop) |
769 { | 485 { |
770 basic_block bb = loop->header; | 486 basic_block bb = loop->header; |
771 tree init, step; | 487 tree init, step; |
772 auto_vec<gimple *, 64> worklist; | 488 auto_vec<stmt_vec_info, 64> worklist; |
773 gphi_iterator gsi; | 489 gphi_iterator gsi; |
774 bool double_reduc; | 490 bool double_reduc; |
775 | 491 |
776 if (dump_enabled_p ()) | 492 DUMP_VECT_SCOPE ("vect_analyze_scalar_cycles"); |
777 dump_printf_loc (MSG_NOTE, vect_location, | |
778 "=== vect_analyze_scalar_cycles ===\n"); | |
779 | 493 |
780 /* First - identify all inductions. Reduction detection assumes that all the | 494 /* First - identify all inductions. Reduction detection assumes that all the |
781 inductions have been identified, therefore, this order must not be | 495 inductions have been identified, therefore, this order must not be |
782 changed. */ | 496 changed. */ |
783 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | 497 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
784 { | 498 { |
785 gphi *phi = gsi.phi (); | 499 gphi *phi = gsi.phi (); |
786 tree access_fn = NULL; | 500 tree access_fn = NULL; |
787 tree def = PHI_RESULT (phi); | 501 tree def = PHI_RESULT (phi); |
788 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi); | 502 stmt_vec_info stmt_vinfo = loop_vinfo->lookup_stmt (phi); |
789 | 503 |
790 if (dump_enabled_p ()) | 504 if (dump_enabled_p ()) |
791 { | 505 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: %G", phi); |
792 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: "); | |
793 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0); | |
794 } | |
795 | 506 |
796 /* Skip virtual phi's. The data dependences that are associated with | 507 /* Skip virtual phi's. The data dependences that are associated with |
797 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */ | 508 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere. */ |
798 if (virtual_operand_p (def)) | 509 if (virtual_operand_p (def)) |
799 continue; | 510 continue; |
804 access_fn = analyze_scalar_evolution (loop, def); | 515 access_fn = analyze_scalar_evolution (loop, def); |
805 if (access_fn) | 516 if (access_fn) |
806 { | 517 { |
807 STRIP_NOPS (access_fn); | 518 STRIP_NOPS (access_fn); |
808 if (dump_enabled_p ()) | 519 if (dump_enabled_p ()) |
809 { | 520 dump_printf_loc (MSG_NOTE, vect_location, |
810 dump_printf_loc (MSG_NOTE, vect_location, | 521 "Access function of PHI: %T\n", access_fn); |
811 "Access function of PHI: "); | |
812 dump_generic_expr (MSG_NOTE, TDF_SLIM, access_fn); | |
813 dump_printf (MSG_NOTE, "\n"); | |
814 } | |
815 STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo) | 522 STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo) |
816 = initial_condition_in_loop_num (access_fn, loop->num); | 523 = initial_condition_in_loop_num (access_fn, loop->num); |
817 STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) | 524 STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo) |
818 = evolution_part_in_loop_num (access_fn, loop->num); | 525 = evolution_part_in_loop_num (access_fn, loop->num); |
819 } | 526 } |
820 | 527 |
821 if (!access_fn | 528 if (!access_fn |
529 || vect_inner_phi_in_double_reduction_p (stmt_vinfo, phi) | |
822 || !vect_is_simple_iv_evolution (loop->num, access_fn, &init, &step) | 530 || !vect_is_simple_iv_evolution (loop->num, access_fn, &init, &step) |
823 || (LOOP_VINFO_LOOP (loop_vinfo) != loop | 531 || (LOOP_VINFO_LOOP (loop_vinfo) != loop |
824 && TREE_CODE (step) != INTEGER_CST)) | 532 && TREE_CODE (step) != INTEGER_CST)) |
825 { | 533 { |
826 worklist.safe_push (phi); | 534 worklist.safe_push (stmt_vinfo); |
827 continue; | 535 continue; |
828 } | 536 } |
829 | 537 |
830 gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo) | 538 gcc_assert (STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo) |
831 != NULL_TREE); | 539 != NULL_TREE); |
838 | 546 |
839 | 547 |
840 /* Second - identify all reductions and nested cycles. */ | 548 /* Second - identify all reductions and nested cycles. */ |
841 while (worklist.length () > 0) | 549 while (worklist.length () > 0) |
842 { | 550 { |
843 gimple *phi = worklist.pop (); | 551 stmt_vec_info stmt_vinfo = worklist.pop (); |
552 gphi *phi = as_a <gphi *> (stmt_vinfo->stmt); | |
844 tree def = PHI_RESULT (phi); | 553 tree def = PHI_RESULT (phi); |
845 stmt_vec_info stmt_vinfo = vinfo_for_stmt (phi); | |
846 gimple *reduc_stmt; | |
847 | 554 |
848 if (dump_enabled_p ()) | 555 if (dump_enabled_p ()) |
849 { | 556 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: %G", phi); |
850 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: "); | |
851 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0); | |
852 } | |
853 | 557 |
854 gcc_assert (!virtual_operand_p (def) | 558 gcc_assert (!virtual_operand_p (def) |
855 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type); | 559 && STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_unknown_def_type); |
856 | 560 |
857 reduc_stmt = vect_force_simple_reduction (loop_vinfo, phi, | 561 stmt_vec_info reduc_stmt_info |
858 &double_reduc, false); | 562 = vect_force_simple_reduction (loop_vinfo, stmt_vinfo, |
859 if (reduc_stmt) | 563 &double_reduc, false); |
564 if (reduc_stmt_info) | |
860 { | 565 { |
861 if (double_reduc) | 566 if (double_reduc) |
862 { | 567 { |
863 if (dump_enabled_p ()) | 568 if (dump_enabled_p ()) |
864 dump_printf_loc (MSG_NOTE, vect_location, | 569 dump_printf_loc (MSG_NOTE, vect_location, |
865 "Detected double reduction.\n"); | 570 "Detected double reduction.\n"); |
866 | 571 |
867 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def; | 572 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_double_reduction_def; |
868 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) = | 573 STMT_VINFO_DEF_TYPE (reduc_stmt_info) |
869 vect_double_reduction_def; | 574 = vect_double_reduction_def; |
870 } | 575 } |
871 else | 576 else |
872 { | 577 { |
873 if (loop != LOOP_VINFO_LOOP (loop_vinfo)) | 578 if (loop != LOOP_VINFO_LOOP (loop_vinfo)) |
874 { | 579 { |
875 if (dump_enabled_p ()) | 580 if (dump_enabled_p ()) |
876 dump_printf_loc (MSG_NOTE, vect_location, | 581 dump_printf_loc (MSG_NOTE, vect_location, |
877 "Detected vectorizable nested cycle.\n"); | 582 "Detected vectorizable nested cycle.\n"); |
878 | 583 |
879 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle; | 584 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_nested_cycle; |
880 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) = | 585 STMT_VINFO_DEF_TYPE (reduc_stmt_info) = vect_nested_cycle; |
881 vect_nested_cycle; | |
882 } | 586 } |
883 else | 587 else |
884 { | 588 { |
885 if (dump_enabled_p ()) | 589 if (dump_enabled_p ()) |
886 dump_printf_loc (MSG_NOTE, vect_location, | 590 dump_printf_loc (MSG_NOTE, vect_location, |
887 "Detected reduction.\n"); | 591 "Detected reduction.\n"); |
888 | 592 |
889 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def; | 593 STMT_VINFO_DEF_TYPE (stmt_vinfo) = vect_reduction_def; |
890 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (reduc_stmt)) = | 594 STMT_VINFO_DEF_TYPE (reduc_stmt_info) = vect_reduction_def; |
891 vect_reduction_def; | |
892 /* Store the reduction cycles for possible vectorization in | 595 /* Store the reduction cycles for possible vectorization in |
893 loop-aware SLP if it was not detected as reduction | 596 loop-aware SLP if it was not detected as reduction |
894 chain. */ | 597 chain. */ |
895 if (! GROUP_FIRST_ELEMENT (vinfo_for_stmt (reduc_stmt))) | 598 if (! REDUC_GROUP_FIRST_ELEMENT (reduc_stmt_info)) |
896 LOOP_VINFO_REDUCTIONS (loop_vinfo).safe_push (reduc_stmt); | 599 LOOP_VINFO_REDUCTIONS (loop_vinfo).safe_push |
600 (reduc_stmt_info); | |
897 } | 601 } |
898 } | 602 } |
899 } | 603 } |
900 else | 604 else |
901 if (dump_enabled_p ()) | 605 if (dump_enabled_p ()) |
944 | 648 |
945 if (loop->inner) | 649 if (loop->inner) |
946 vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner); | 650 vect_analyze_scalar_cycles_1 (loop_vinfo, loop->inner); |
947 } | 651 } |
948 | 652 |
949 /* Transfer group and reduction information from STMT to its pattern stmt. */ | 653 /* Transfer group and reduction information from STMT_INFO to its |
654 pattern stmt. */ | |
950 | 655 |
951 static void | 656 static void |
952 vect_fixup_reduc_chain (gimple *stmt) | 657 vect_fixup_reduc_chain (stmt_vec_info stmt_info) |
953 { | 658 { |
954 gimple *firstp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)); | 659 stmt_vec_info firstp = STMT_VINFO_RELATED_STMT (stmt_info); |
955 gimple *stmtp; | 660 stmt_vec_info stmtp; |
956 gcc_assert (!GROUP_FIRST_ELEMENT (vinfo_for_stmt (firstp)) | 661 gcc_assert (!REDUC_GROUP_FIRST_ELEMENT (firstp) |
957 && GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))); | 662 && REDUC_GROUP_FIRST_ELEMENT (stmt_info)); |
958 GROUP_SIZE (vinfo_for_stmt (firstp)) = GROUP_SIZE (vinfo_for_stmt (stmt)); | 663 REDUC_GROUP_SIZE (firstp) = REDUC_GROUP_SIZE (stmt_info); |
959 do | 664 do |
960 { | 665 { |
961 stmtp = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)); | 666 stmtp = STMT_VINFO_RELATED_STMT (stmt_info); |
962 GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmtp)) = firstp; | 667 REDUC_GROUP_FIRST_ELEMENT (stmtp) = firstp; |
963 stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmt)); | 668 stmt_info = REDUC_GROUP_NEXT_ELEMENT (stmt_info); |
964 if (stmt) | 669 if (stmt_info) |
965 GROUP_NEXT_ELEMENT (vinfo_for_stmt (stmtp)) | 670 REDUC_GROUP_NEXT_ELEMENT (stmtp) |
966 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)); | 671 = STMT_VINFO_RELATED_STMT (stmt_info); |
967 } | 672 } |
968 while (stmt); | 673 while (stmt_info); |
969 STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmtp)) = vect_reduction_def; | 674 STMT_VINFO_DEF_TYPE (stmtp) = vect_reduction_def; |
970 } | 675 } |
971 | 676 |
972 /* Fixup scalar cycles that now have their stmts detected as patterns. */ | 677 /* Fixup scalar cycles that now have their stmts detected as patterns. */ |
973 | 678 |
974 static void | 679 static void |
975 vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo) | 680 vect_fixup_scalar_cycles_with_patterns (loop_vec_info loop_vinfo) |
976 { | 681 { |
977 gimple *first; | 682 stmt_vec_info first; |
978 unsigned i; | 683 unsigned i; |
979 | 684 |
980 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo), i, first) | 685 FOR_EACH_VEC_ELT (LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo), i, first) |
981 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (first))) | 686 if (STMT_VINFO_IN_PATTERN_P (first)) |
982 { | 687 { |
983 gimple *next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first)); | 688 stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (first); |
984 while (next) | 689 while (next) |
985 { | 690 { |
986 if (! STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (next))) | 691 if (! STMT_VINFO_IN_PATTERN_P (next)) |
987 break; | 692 break; |
988 next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next)); | 693 next = REDUC_GROUP_NEXT_ELEMENT (next); |
989 } | 694 } |
990 /* If not all stmt in the chain are patterns try to handle | 695 /* If not all stmt in the chain are patterns try to handle |
991 the chain without patterns. */ | 696 the chain without patterns. */ |
992 if (! next) | 697 if (! next) |
993 { | 698 { |
994 vect_fixup_reduc_chain (first); | 699 vect_fixup_reduc_chain (first); |
995 LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)[i] | 700 LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo)[i] |
996 = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (first)); | 701 = STMT_VINFO_RELATED_STMT (first); |
997 } | 702 } |
998 } | 703 } |
999 } | 704 } |
1000 | 705 |
1001 /* Function vect_get_loop_niters. | 706 /* Function vect_get_loop_niters. |
1018 gcond *cond = get_loop_exit_condition (loop); | 723 gcond *cond = get_loop_exit_condition (loop); |
1019 | 724 |
1020 *assumptions = boolean_true_node; | 725 *assumptions = boolean_true_node; |
1021 *number_of_iterationsm1 = chrec_dont_know; | 726 *number_of_iterationsm1 = chrec_dont_know; |
1022 *number_of_iterations = chrec_dont_know; | 727 *number_of_iterations = chrec_dont_know; |
1023 if (dump_enabled_p ()) | 728 DUMP_VECT_SCOPE ("get_loop_niters"); |
1024 dump_printf_loc (MSG_NOTE, vect_location, | |
1025 "=== get_loop_niters ===\n"); | |
1026 | 729 |
1027 if (!exit) | 730 if (!exit) |
1028 return cond; | 731 return cond; |
1029 | 732 |
1030 niter = chrec_dont_know; | 733 niter = chrec_dont_know; |
1053 fold_build1 (TRUTH_NOT_EXPR, | 756 fold_build1 (TRUTH_NOT_EXPR, |
1054 boolean_type_node, | 757 boolean_type_node, |
1055 may_be_zero)); | 758 may_be_zero)); |
1056 else | 759 else |
1057 niter = fold_build3 (COND_EXPR, TREE_TYPE (niter), may_be_zero, | 760 niter = fold_build3 (COND_EXPR, TREE_TYPE (niter), may_be_zero, |
1058 build_int_cst (TREE_TYPE (niter), 0), niter); | 761 build_int_cst (TREE_TYPE (niter), 0), |
762 rewrite_to_non_trapping_overflow (niter)); | |
1059 | 763 |
1060 may_be_zero = NULL_TREE; | 764 may_be_zero = NULL_TREE; |
1061 } | 765 } |
1062 else if (integer_nonzerop (may_be_zero)) | 766 else if (integer_nonzerop (may_be_zero)) |
1063 { | 767 { |
1099 | 803 |
1100 | 804 |
1101 /* Create and initialize a new loop_vec_info struct for LOOP_IN, as well as | 805 /* Create and initialize a new loop_vec_info struct for LOOP_IN, as well as |
1102 stmt_vec_info structs for all the stmts in LOOP_IN. */ | 806 stmt_vec_info structs for all the stmts in LOOP_IN. */ |
1103 | 807 |
1104 _loop_vec_info::_loop_vec_info (struct loop *loop_in) | 808 _loop_vec_info::_loop_vec_info (struct loop *loop_in, vec_info_shared *shared) |
1105 : vec_info (vec_info::loop, init_cost (loop_in)), | 809 : vec_info (vec_info::loop, init_cost (loop_in), shared), |
1106 loop (loop_in), | 810 loop (loop_in), |
1107 bbs (XCNEWVEC (basic_block, loop->num_nodes)), | 811 bbs (XCNEWVEC (basic_block, loop->num_nodes)), |
1108 num_itersm1 (NULL_TREE), | 812 num_itersm1 (NULL_TREE), |
1109 num_iters (NULL_TREE), | 813 num_iters (NULL_TREE), |
1110 num_iters_unchanged (NULL_TREE), | 814 num_iters_unchanged (NULL_TREE), |
1111 num_iters_assumptions (NULL_TREE), | 815 num_iters_assumptions (NULL_TREE), |
1112 th (0), | 816 th (0), |
817 versioning_threshold (0), | |
1113 vectorization_factor (0), | 818 vectorization_factor (0), |
1114 max_vectorization_factor (0), | 819 max_vectorization_factor (0), |
820 mask_skip_niters (NULL_TREE), | |
821 mask_compare_type (NULL_TREE), | |
1115 unaligned_dr (NULL), | 822 unaligned_dr (NULL), |
1116 peeling_for_alignment (0), | 823 peeling_for_alignment (0), |
1117 ptr_mask (0), | 824 ptr_mask (0), |
825 ivexpr_map (NULL), | |
1118 slp_unrolling_factor (1), | 826 slp_unrolling_factor (1), |
1119 single_scalar_iteration_cost (0), | 827 single_scalar_iteration_cost (0), |
1120 vectorizable (false), | 828 vectorizable (false), |
829 can_fully_mask_p (true), | |
830 fully_masked_p (false), | |
1121 peeling_for_gaps (false), | 831 peeling_for_gaps (false), |
1122 peeling_for_niter (false), | 832 peeling_for_niter (false), |
1123 operands_swapped (false), | 833 operands_swapped (false), |
1124 no_data_dependencies (false), | 834 no_data_dependencies (false), |
1125 has_mask_store (false), | 835 has_mask_store (false), |
1126 scalar_loop (NULL), | 836 scalar_loop (NULL), |
1127 orig_loop_info (NULL) | 837 orig_loop_info (NULL) |
1128 { | 838 { |
1129 /* Create/Update stmt_info for all stmts in the loop. */ | |
1130 basic_block *body = get_loop_body (loop); | |
1131 for (unsigned int i = 0; i < loop->num_nodes; i++) | |
1132 { | |
1133 basic_block bb = body[i]; | |
1134 gimple_stmt_iterator si; | |
1135 | |
1136 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si)) | |
1137 { | |
1138 gimple *phi = gsi_stmt (si); | |
1139 gimple_set_uid (phi, 0); | |
1140 set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, this)); | |
1141 } | |
1142 | |
1143 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) | |
1144 { | |
1145 gimple *stmt = gsi_stmt (si); | |
1146 gimple_set_uid (stmt, 0); | |
1147 set_vinfo_for_stmt (stmt, new_stmt_vec_info (stmt, this)); | |
1148 } | |
1149 } | |
1150 free (body); | |
1151 | |
1152 /* CHECKME: We want to visit all BBs before their successors (except for | 839 /* CHECKME: We want to visit all BBs before their successors (except for |
1153 latch blocks, for which this assertion wouldn't hold). In the simple | 840 latch blocks, for which this assertion wouldn't hold). In the simple |
1154 case of the loop forms we allow, a dfs order of the BBs would the same | 841 case of the loop forms we allow, a dfs order of the BBs would the same |
1155 as reversed postorder traversal, so we are safe. */ | 842 as reversed postorder traversal, so we are safe. */ |
1156 | 843 |
1157 unsigned int nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p, | 844 unsigned int nbbs = dfs_enumerate_from (loop->header, 0, bb_in_loop_p, |
1158 bbs, loop->num_nodes, loop); | 845 bbs, loop->num_nodes, loop); |
1159 gcc_assert (nbbs == loop->num_nodes); | 846 gcc_assert (nbbs == loop->num_nodes); |
847 | |
848 for (unsigned int i = 0; i < nbbs; i++) | |
849 { | |
850 basic_block bb = bbs[i]; | |
851 gimple_stmt_iterator si; | |
852 | |
853 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si)) | |
854 { | |
855 gimple *phi = gsi_stmt (si); | |
856 gimple_set_uid (phi, 0); | |
857 add_stmt (phi); | |
858 } | |
859 | |
860 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) | |
861 { | |
862 gimple *stmt = gsi_stmt (si); | |
863 gimple_set_uid (stmt, 0); | |
864 add_stmt (stmt); | |
865 } | |
866 } | |
1160 } | 867 } |
1161 | 868 |
869 /* Free all levels of MASKS. */ | |
870 | |
871 void | |
872 release_vec_loop_masks (vec_loop_masks *masks) | |
873 { | |
874 rgroup_masks *rgm; | |
875 unsigned int i; | |
876 FOR_EACH_VEC_ELT (*masks, i, rgm) | |
877 rgm->masks.release (); | |
878 masks->release (); | |
879 } | |
1162 | 880 |
1163 /* Free all memory used by the _loop_vec_info, as well as all the | 881 /* Free all memory used by the _loop_vec_info, as well as all the |
1164 stmt_vec_info structs of all the stmts in the loop. */ | 882 stmt_vec_info structs of all the stmts in the loop. */ |
1165 | 883 |
1166 _loop_vec_info::~_loop_vec_info () | 884 _loop_vec_info::~_loop_vec_info () |
1171 | 889 |
1172 nbbs = loop->num_nodes; | 890 nbbs = loop->num_nodes; |
1173 for (j = 0; j < nbbs; j++) | 891 for (j = 0; j < nbbs; j++) |
1174 { | 892 { |
1175 basic_block bb = bbs[j]; | 893 basic_block bb = bbs[j]; |
1176 for (si = gsi_start_phis (bb); !gsi_end_p (si); gsi_next (&si)) | |
1177 free_stmt_vec_info (gsi_stmt (si)); | |
1178 | |
1179 for (si = gsi_start_bb (bb); !gsi_end_p (si); ) | 894 for (si = gsi_start_bb (bb); !gsi_end_p (si); ) |
1180 { | 895 { |
1181 gimple *stmt = gsi_stmt (si); | 896 gimple *stmt = gsi_stmt (si); |
1182 | 897 |
1183 /* We may have broken canonical form by moving a constant | 898 /* We may have broken canonical form by moving a constant |
1213 gimple_assign_rhs3_ptr (stmt)); | 928 gimple_assign_rhs3_ptr (stmt)); |
1214 } | 929 } |
1215 } | 930 } |
1216 } | 931 } |
1217 } | 932 } |
1218 | |
1219 /* Free stmt_vec_info. */ | |
1220 free_stmt_vec_info (stmt); | |
1221 gsi_next (&si); | 933 gsi_next (&si); |
1222 } | 934 } |
1223 } | 935 } |
1224 | 936 |
1225 free (bbs); | 937 free (bbs); |
1226 | 938 |
939 release_vec_loop_masks (&masks); | |
940 delete ivexpr_map; | |
941 | |
1227 loop->aux = NULL; | 942 loop->aux = NULL; |
1228 } | 943 } |
1229 | 944 |
945 /* Return an invariant or register for EXPR and emit necessary | |
946 computations in the LOOP_VINFO loop preheader. */ | |
947 | |
948 tree | |
949 cse_and_gimplify_to_preheader (loop_vec_info loop_vinfo, tree expr) | |
950 { | |
951 if (is_gimple_reg (expr) | |
952 || is_gimple_min_invariant (expr)) | |
953 return expr; | |
954 | |
955 if (! loop_vinfo->ivexpr_map) | |
956 loop_vinfo->ivexpr_map = new hash_map<tree_operand_hash, tree>; | |
957 tree &cached = loop_vinfo->ivexpr_map->get_or_insert (expr); | |
958 if (! cached) | |
959 { | |
960 gimple_seq stmts = NULL; | |
961 cached = force_gimple_operand (unshare_expr (expr), | |
962 &stmts, true, NULL_TREE); | |
963 if (stmts) | |
964 { | |
965 edge e = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo)); | |
966 gsi_insert_seq_on_edge_immediate (e, stmts); | |
967 } | |
968 } | |
969 return cached; | |
970 } | |
971 | |
972 /* Return true if we can use CMP_TYPE as the comparison type to produce | |
973 all masks required to mask LOOP_VINFO. */ | |
974 | |
975 static bool | |
976 can_produce_all_loop_masks_p (loop_vec_info loop_vinfo, tree cmp_type) | |
977 { | |
978 rgroup_masks *rgm; | |
979 unsigned int i; | |
980 FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), i, rgm) | |
981 if (rgm->mask_type != NULL_TREE | |
982 && !direct_internal_fn_supported_p (IFN_WHILE_ULT, | |
983 cmp_type, rgm->mask_type, | |
984 OPTIMIZE_FOR_SPEED)) | |
985 return false; | |
986 return true; | |
987 } | |
988 | |
989 /* Calculate the maximum number of scalars per iteration for every | |
990 rgroup in LOOP_VINFO. */ | |
991 | |
992 static unsigned int | |
993 vect_get_max_nscalars_per_iter (loop_vec_info loop_vinfo) | |
994 { | |
995 unsigned int res = 1; | |
996 unsigned int i; | |
997 rgroup_masks *rgm; | |
998 FOR_EACH_VEC_ELT (LOOP_VINFO_MASKS (loop_vinfo), i, rgm) | |
999 res = MAX (res, rgm->max_nscalars_per_iter); | |
1000 return res; | |
1001 } | |
1002 | |
1003 /* Each statement in LOOP_VINFO can be masked where necessary. Check | |
1004 whether we can actually generate the masks required. Return true if so, | |
1005 storing the type of the scalar IV in LOOP_VINFO_MASK_COMPARE_TYPE. */ | |
1006 | |
1007 static bool | |
1008 vect_verify_full_masking (loop_vec_info loop_vinfo) | |
1009 { | |
1010 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
1011 unsigned int min_ni_width; | |
1012 | |
1013 /* Use a normal loop if there are no statements that need masking. | |
1014 This only happens in rare degenerate cases: it means that the loop | |
1015 has no loads, no stores, and no live-out values. */ | |
1016 if (LOOP_VINFO_MASKS (loop_vinfo).is_empty ()) | |
1017 return false; | |
1018 | |
1019 /* Get the maximum number of iterations that is representable | |
1020 in the counter type. */ | |
1021 tree ni_type = TREE_TYPE (LOOP_VINFO_NITERSM1 (loop_vinfo)); | |
1022 widest_int max_ni = wi::to_widest (TYPE_MAX_VALUE (ni_type)) + 1; | |
1023 | |
1024 /* Get a more refined estimate for the number of iterations. */ | |
1025 widest_int max_back_edges; | |
1026 if (max_loop_iterations (loop, &max_back_edges)) | |
1027 max_ni = wi::smin (max_ni, max_back_edges + 1); | |
1028 | |
1029 /* Account for rgroup masks, in which each bit is replicated N times. */ | |
1030 max_ni *= vect_get_max_nscalars_per_iter (loop_vinfo); | |
1031 | |
1032 /* Work out how many bits we need to represent the limit. */ | |
1033 min_ni_width = wi::min_precision (max_ni, UNSIGNED); | |
1034 | |
1035 /* Find a scalar mode for which WHILE_ULT is supported. */ | |
1036 opt_scalar_int_mode cmp_mode_iter; | |
1037 tree cmp_type = NULL_TREE; | |
1038 FOR_EACH_MODE_IN_CLASS (cmp_mode_iter, MODE_INT) | |
1039 { | |
1040 unsigned int cmp_bits = GET_MODE_BITSIZE (cmp_mode_iter.require ()); | |
1041 if (cmp_bits >= min_ni_width | |
1042 && targetm.scalar_mode_supported_p (cmp_mode_iter.require ())) | |
1043 { | |
1044 tree this_type = build_nonstandard_integer_type (cmp_bits, true); | |
1045 if (this_type | |
1046 && can_produce_all_loop_masks_p (loop_vinfo, this_type)) | |
1047 { | |
1048 /* Although we could stop as soon as we find a valid mode, | |
1049 it's often better to continue until we hit Pmode, since the | |
1050 operands to the WHILE are more likely to be reusable in | |
1051 address calculations. */ | |
1052 cmp_type = this_type; | |
1053 if (cmp_bits >= GET_MODE_BITSIZE (Pmode)) | |
1054 break; | |
1055 } | |
1056 } | |
1057 } | |
1058 | |
1059 if (!cmp_type) | |
1060 return false; | |
1061 | |
1062 LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo) = cmp_type; | |
1063 return true; | |
1064 } | |
1230 | 1065 |
1231 /* Calculate the cost of one scalar iteration of the loop. */ | 1066 /* Calculate the cost of one scalar iteration of the loop. */ |
1232 static void | 1067 static void |
1233 vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo) | 1068 vect_compute_single_scalar_iteration_cost (loop_vec_info loop_vinfo) |
1234 { | 1069 { |
1235 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | 1070 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); |
1236 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); | 1071 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); |
1237 int nbbs = loop->num_nodes, factor, scalar_single_iter_cost = 0; | 1072 int nbbs = loop->num_nodes, factor; |
1238 int innerloop_iters, i; | 1073 int innerloop_iters, i; |
1239 | 1074 |
1240 /* Count statements in scalar loop. Using this as scalar cost for a single | 1075 DUMP_VECT_SCOPE ("vect_compute_single_scalar_iteration_cost"); |
1241 iteration for now. | 1076 |
1242 | 1077 /* Gather costs for statements in the scalar loop. */ |
1243 TODO: Add outer loop support. | |
1244 | |
1245 TODO: Consider assigning different costs to different scalar | |
1246 statements. */ | |
1247 | 1078 |
1248 /* FORNOW. */ | 1079 /* FORNOW. */ |
1249 innerloop_iters = 1; | 1080 innerloop_iters = 1; |
1250 if (loop->inner) | 1081 if (loop->inner) |
1251 innerloop_iters = 50; /* FIXME */ | 1082 innerloop_iters = 50; /* FIXME */ |
1261 factor = 1; | 1092 factor = 1; |
1262 | 1093 |
1263 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) | 1094 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si)) |
1264 { | 1095 { |
1265 gimple *stmt = gsi_stmt (si); | 1096 gimple *stmt = gsi_stmt (si); |
1266 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | 1097 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (stmt); |
1267 | 1098 |
1268 if (!is_gimple_assign (stmt) && !is_gimple_call (stmt)) | 1099 if (!is_gimple_assign (stmt) && !is_gimple_call (stmt)) |
1269 continue; | 1100 continue; |
1270 | 1101 |
1271 /* Skip stmts that are not vectorized inside the loop. */ | 1102 /* Skip stmts that are not vectorized inside the loop. */ |
1285 kind = scalar_store; | 1116 kind = scalar_store; |
1286 } | 1117 } |
1287 else | 1118 else |
1288 kind = scalar_stmt; | 1119 kind = scalar_stmt; |
1289 | 1120 |
1290 scalar_single_iter_cost | 1121 record_stmt_cost (&LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), |
1291 += record_stmt_cost (&LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), | 1122 factor, kind, stmt_info, 0, vect_prologue); |
1292 factor, kind, stmt_info, 0, vect_prologue); | |
1293 } | 1123 } |
1294 } | 1124 } |
1295 LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo) | 1125 |
1296 = scalar_single_iter_cost; | 1126 /* Now accumulate cost. */ |
1127 void *target_cost_data = init_cost (loop); | |
1128 stmt_info_for_cost *si; | |
1129 int j; | |
1130 FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), | |
1131 j, si) | |
1132 (void) add_stmt_cost (target_cost_data, si->count, | |
1133 si->kind, si->stmt_info, si->misalign, | |
1134 vect_body); | |
1135 unsigned dummy, body_cost = 0; | |
1136 finish_cost (target_cost_data, &dummy, &body_cost, &dummy); | |
1137 destroy_cost_data (target_cost_data); | |
1138 LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo) = body_cost; | |
1297 } | 1139 } |
1298 | 1140 |
1299 | 1141 |
1300 /* Function vect_analyze_loop_form_1. | 1142 /* Function vect_analyze_loop_form_1. |
1301 | 1143 |
1304 - the loop has a single entry and exit | 1146 - the loop has a single entry and exit |
1305 - the loop exit condition is simple enough | 1147 - the loop exit condition is simple enough |
1306 - the number of iterations can be analyzed, i.e, a countable loop. The | 1148 - the number of iterations can be analyzed, i.e, a countable loop. The |
1307 niter could be analyzed under some assumptions. */ | 1149 niter could be analyzed under some assumptions. */ |
1308 | 1150 |
1309 bool | 1151 opt_result |
1310 vect_analyze_loop_form_1 (struct loop *loop, gcond **loop_cond, | 1152 vect_analyze_loop_form_1 (struct loop *loop, gcond **loop_cond, |
1311 tree *assumptions, tree *number_of_iterationsm1, | 1153 tree *assumptions, tree *number_of_iterationsm1, |
1312 tree *number_of_iterations, gcond **inner_loop_cond) | 1154 tree *number_of_iterations, gcond **inner_loop_cond) |
1313 { | 1155 { |
1314 if (dump_enabled_p ()) | 1156 DUMP_VECT_SCOPE ("vect_analyze_loop_form"); |
1315 dump_printf_loc (MSG_NOTE, vect_location, | |
1316 "=== vect_analyze_loop_form ===\n"); | |
1317 | 1157 |
1318 /* Different restrictions apply when we are considering an inner-most loop, | 1158 /* Different restrictions apply when we are considering an inner-most loop, |
1319 vs. an outer (nested) loop. | 1159 vs. an outer (nested) loop. |
1320 (FORNOW. May want to relax some of these restrictions in the future). */ | 1160 (FORNOW. May want to relax some of these restrictions in the future). */ |
1321 | 1161 |
1332 | +--> latch --+ | 1172 | +--> latch --+ |
1333 | | 1173 | |
1334 (exit-bb) */ | 1174 (exit-bb) */ |
1335 | 1175 |
1336 if (loop->num_nodes != 2) | 1176 if (loop->num_nodes != 2) |
1337 { | 1177 return opt_result::failure_at (vect_location, |
1338 if (dump_enabled_p ()) | 1178 "not vectorized:" |
1339 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 1179 " control flow in loop.\n"); |
1340 "not vectorized: control flow in loop.\n"); | |
1341 return false; | |
1342 } | |
1343 | 1180 |
1344 if (empty_block_p (loop->header)) | 1181 if (empty_block_p (loop->header)) |
1345 { | 1182 return opt_result::failure_at (vect_location, |
1346 if (dump_enabled_p ()) | 1183 "not vectorized: empty loop.\n"); |
1347 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
1348 "not vectorized: empty loop.\n"); | |
1349 return false; | |
1350 } | |
1351 } | 1184 } |
1352 else | 1185 else |
1353 { | 1186 { |
1354 struct loop *innerloop = loop->inner; | 1187 struct loop *innerloop = loop->inner; |
1355 edge entryedge; | 1188 edge entryedge; |
1370 | 1203 |
1371 The inner-loop has the properties expected of inner-most loops | 1204 The inner-loop has the properties expected of inner-most loops |
1372 as described above. */ | 1205 as described above. */ |
1373 | 1206 |
1374 if ((loop->inner)->inner || (loop->inner)->next) | 1207 if ((loop->inner)->inner || (loop->inner)->next) |
1375 { | 1208 return opt_result::failure_at (vect_location, |
1376 if (dump_enabled_p ()) | 1209 "not vectorized:" |
1377 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 1210 " multiple nested loops.\n"); |
1378 "not vectorized: multiple nested loops.\n"); | |
1379 return false; | |
1380 } | |
1381 | 1211 |
1382 if (loop->num_nodes != 5) | 1212 if (loop->num_nodes != 5) |
1383 { | 1213 return opt_result::failure_at (vect_location, |
1384 if (dump_enabled_p ()) | 1214 "not vectorized:" |
1385 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 1215 " control flow in loop.\n"); |
1386 "not vectorized: control flow in loop.\n"); | |
1387 return false; | |
1388 } | |
1389 | 1216 |
1390 entryedge = loop_preheader_edge (innerloop); | 1217 entryedge = loop_preheader_edge (innerloop); |
1391 if (entryedge->src != loop->header | 1218 if (entryedge->src != loop->header |
1392 || !single_exit (innerloop) | 1219 || !single_exit (innerloop) |
1393 || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)->src) | 1220 || single_exit (innerloop)->dest != EDGE_PRED (loop->latch, 0)->src) |
1221 return opt_result::failure_at (vect_location, | |
1222 "not vectorized:" | |
1223 " unsupported outerloop form.\n"); | |
1224 | |
1225 /* Analyze the inner-loop. */ | |
1226 tree inner_niterm1, inner_niter, inner_assumptions; | |
1227 opt_result res | |
1228 = vect_analyze_loop_form_1 (loop->inner, inner_loop_cond, | |
1229 &inner_assumptions, &inner_niterm1, | |
1230 &inner_niter, NULL); | |
1231 if (!res) | |
1394 { | 1232 { |
1395 if (dump_enabled_p ()) | 1233 if (dump_enabled_p ()) |
1396 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 1234 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
1397 "not vectorized: unsupported outerloop form.\n"); | |
1398 return false; | |
1399 } | |
1400 | |
1401 /* Analyze the inner-loop. */ | |
1402 tree inner_niterm1, inner_niter, inner_assumptions; | |
1403 if (! vect_analyze_loop_form_1 (loop->inner, inner_loop_cond, | |
1404 &inner_assumptions, &inner_niterm1, | |
1405 &inner_niter, NULL) | |
1406 /* Don't support analyzing niter under assumptions for inner | |
1407 loop. */ | |
1408 || !integer_onep (inner_assumptions)) | |
1409 { | |
1410 if (dump_enabled_p ()) | |
1411 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
1412 "not vectorized: Bad inner loop.\n"); | 1235 "not vectorized: Bad inner loop.\n"); |
1413 return false; | 1236 return res; |
1414 } | 1237 } |
1238 | |
1239 /* Don't support analyzing niter under assumptions for inner | |
1240 loop. */ | |
1241 if (!integer_onep (inner_assumptions)) | |
1242 return opt_result::failure_at (vect_location, | |
1243 "not vectorized: Bad inner loop.\n"); | |
1415 | 1244 |
1416 if (!expr_invariant_in_loop_p (loop, inner_niter)) | 1245 if (!expr_invariant_in_loop_p (loop, inner_niter)) |
1417 { | 1246 return opt_result::failure_at (vect_location, |
1418 if (dump_enabled_p ()) | 1247 "not vectorized: inner-loop count not" |
1419 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 1248 " invariant.\n"); |
1420 "not vectorized: inner-loop count not" | |
1421 " invariant.\n"); | |
1422 return false; | |
1423 } | |
1424 | 1249 |
1425 if (dump_enabled_p ()) | 1250 if (dump_enabled_p ()) |
1426 dump_printf_loc (MSG_NOTE, vect_location, | 1251 dump_printf_loc (MSG_NOTE, vect_location, |
1427 "Considering outer-loop vectorization.\n"); | 1252 "Considering outer-loop vectorization.\n"); |
1428 } | 1253 } |
1429 | 1254 |
1430 if (!single_exit (loop) | 1255 if (!single_exit (loop)) |
1431 || EDGE_COUNT (loop->header->preds) != 2) | 1256 return opt_result::failure_at (vect_location, |
1432 { | 1257 "not vectorized: multiple exits.\n"); |
1433 if (dump_enabled_p ()) | 1258 if (EDGE_COUNT (loop->header->preds) != 2) |
1434 { | 1259 return opt_result::failure_at (vect_location, |
1435 if (!single_exit (loop)) | 1260 "not vectorized:" |
1436 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 1261 " too many incoming edges.\n"); |
1437 "not vectorized: multiple exits.\n"); | |
1438 else if (EDGE_COUNT (loop->header->preds) != 2) | |
1439 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
1440 "not vectorized: too many incoming edges.\n"); | |
1441 } | |
1442 return false; | |
1443 } | |
1444 | 1262 |
1445 /* We assume that the loop exit condition is at the end of the loop. i.e, | 1263 /* We assume that the loop exit condition is at the end of the loop. i.e, |
1446 that the loop is represented as a do-while (with a proper if-guard | 1264 that the loop is represented as a do-while (with a proper if-guard |
1447 before the loop if needed), where the loop header contains all the | 1265 before the loop if needed), where the loop header contains all the |
1448 executable statements, and the latch is empty. */ | 1266 executable statements, and the latch is empty. */ |
1449 if (!empty_block_p (loop->latch) | 1267 if (!empty_block_p (loop->latch) |
1450 || !gimple_seq_empty_p (phi_nodes (loop->latch))) | 1268 || !gimple_seq_empty_p (phi_nodes (loop->latch))) |
1451 { | 1269 return opt_result::failure_at (vect_location, |
1452 if (dump_enabled_p ()) | 1270 "not vectorized: latch block not empty.\n"); |
1453 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
1454 "not vectorized: latch block not empty.\n"); | |
1455 return false; | |
1456 } | |
1457 | 1271 |
1458 /* Make sure the exit is not abnormal. */ | 1272 /* Make sure the exit is not abnormal. */ |
1459 edge e = single_exit (loop); | 1273 edge e = single_exit (loop); |
1460 if (e->flags & EDGE_ABNORMAL) | 1274 if (e->flags & EDGE_ABNORMAL) |
1461 { | 1275 return opt_result::failure_at (vect_location, |
1462 if (dump_enabled_p ()) | 1276 "not vectorized:" |
1463 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 1277 " abnormal loop exit edge.\n"); |
1464 "not vectorized: abnormal loop exit edge.\n"); | |
1465 return false; | |
1466 } | |
1467 | 1278 |
1468 *loop_cond = vect_get_loop_niters (loop, assumptions, number_of_iterations, | 1279 *loop_cond = vect_get_loop_niters (loop, assumptions, number_of_iterations, |
1469 number_of_iterationsm1); | 1280 number_of_iterationsm1); |
1470 if (!*loop_cond) | 1281 if (!*loop_cond) |
1471 { | 1282 return opt_result::failure_at |
1472 if (dump_enabled_p ()) | 1283 (vect_location, |
1473 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 1284 "not vectorized: complicated exit condition.\n"); |
1474 "not vectorized: complicated exit condition.\n"); | |
1475 return false; | |
1476 } | |
1477 | 1285 |
1478 if (integer_zerop (*assumptions) | 1286 if (integer_zerop (*assumptions) |
1479 || !*number_of_iterations | 1287 || !*number_of_iterations |
1480 || chrec_contains_undetermined (*number_of_iterations)) | 1288 || chrec_contains_undetermined (*number_of_iterations)) |
1481 { | 1289 return opt_result::failure_at |
1482 if (dump_enabled_p ()) | 1290 (*loop_cond, |
1483 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 1291 "not vectorized: number of iterations cannot be computed.\n"); |
1484 "not vectorized: number of iterations cannot be " | |
1485 "computed.\n"); | |
1486 return false; | |
1487 } | |
1488 | 1292 |
1489 if (integer_zerop (*number_of_iterations)) | 1293 if (integer_zerop (*number_of_iterations)) |
1490 { | 1294 return opt_result::failure_at |
1491 if (dump_enabled_p ()) | 1295 (*loop_cond, |
1492 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 1296 "not vectorized: number of iterations = 0.\n"); |
1493 "not vectorized: number of iterations = 0.\n"); | 1297 |
1494 return false; | 1298 return opt_result::success (); |
1495 } | |
1496 | |
1497 return true; | |
1498 } | 1299 } |
1499 | 1300 |
1500 /* Analyze LOOP form and return a loop_vec_info if it is of suitable form. */ | 1301 /* Analyze LOOP form and return a loop_vec_info if it is of suitable form. */ |
1501 | 1302 |
1502 loop_vec_info | 1303 opt_loop_vec_info |
1503 vect_analyze_loop_form (struct loop *loop) | 1304 vect_analyze_loop_form (struct loop *loop, vec_info_shared *shared) |
1504 { | 1305 { |
1505 tree assumptions, number_of_iterations, number_of_iterationsm1; | 1306 tree assumptions, number_of_iterations, number_of_iterationsm1; |
1506 gcond *loop_cond, *inner_loop_cond = NULL; | 1307 gcond *loop_cond, *inner_loop_cond = NULL; |
1507 | 1308 |
1508 if (! vect_analyze_loop_form_1 (loop, &loop_cond, | 1309 opt_result res |
1509 &assumptions, &number_of_iterationsm1, | 1310 = vect_analyze_loop_form_1 (loop, &loop_cond, |
1510 &number_of_iterations, &inner_loop_cond)) | 1311 &assumptions, &number_of_iterationsm1, |
1511 return NULL; | 1312 &number_of_iterations, &inner_loop_cond); |
1512 | 1313 if (!res) |
1513 loop_vec_info loop_vinfo = new _loop_vec_info (loop); | 1314 return opt_loop_vec_info::propagate_failure (res); |
1315 | |
1316 loop_vec_info loop_vinfo = new _loop_vec_info (loop, shared); | |
1514 LOOP_VINFO_NITERSM1 (loop_vinfo) = number_of_iterationsm1; | 1317 LOOP_VINFO_NITERSM1 (loop_vinfo) = number_of_iterationsm1; |
1515 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations; | 1318 LOOP_VINFO_NITERS (loop_vinfo) = number_of_iterations; |
1516 LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations; | 1319 LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = number_of_iterations; |
1517 if (!integer_onep (assumptions)) | 1320 if (!integer_onep (assumptions)) |
1518 { | 1321 { |
1537 dump_generic_expr (MSG_NOTE, TDF_DETAILS, number_of_iterations); | 1340 dump_generic_expr (MSG_NOTE, TDF_DETAILS, number_of_iterations); |
1538 dump_printf (MSG_NOTE, "\n"); | 1341 dump_printf (MSG_NOTE, "\n"); |
1539 } | 1342 } |
1540 } | 1343 } |
1541 | 1344 |
1542 STMT_VINFO_TYPE (vinfo_for_stmt (loop_cond)) = loop_exit_ctrl_vec_info_type; | 1345 stmt_vec_info loop_cond_info = loop_vinfo->lookup_stmt (loop_cond); |
1346 STMT_VINFO_TYPE (loop_cond_info) = loop_exit_ctrl_vec_info_type; | |
1543 if (inner_loop_cond) | 1347 if (inner_loop_cond) |
1544 STMT_VINFO_TYPE (vinfo_for_stmt (inner_loop_cond)) | 1348 { |
1545 = loop_exit_ctrl_vec_info_type; | 1349 stmt_vec_info inner_loop_cond_info |
1350 = loop_vinfo->lookup_stmt (inner_loop_cond); | |
1351 STMT_VINFO_TYPE (inner_loop_cond_info) = loop_exit_ctrl_vec_info_type; | |
1352 } | |
1546 | 1353 |
1547 gcc_assert (!loop->aux); | 1354 gcc_assert (!loop->aux); |
1548 loop->aux = loop_vinfo; | 1355 loop->aux = loop_vinfo; |
1549 return loop_vinfo; | 1356 return opt_loop_vec_info::success (loop_vinfo); |
1550 } | 1357 } |
1551 | 1358 |
1552 | 1359 |
1553 | 1360 |
1554 /* Scan the loop stmts and dependent on whether there are any (non-)SLP | 1361 /* Scan the loop stmts and dependent on whether there are any (non-)SLP |
1558 vect_update_vf_for_slp (loop_vec_info loop_vinfo) | 1365 vect_update_vf_for_slp (loop_vec_info loop_vinfo) |
1559 { | 1366 { |
1560 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | 1367 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); |
1561 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); | 1368 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); |
1562 int nbbs = loop->num_nodes; | 1369 int nbbs = loop->num_nodes; |
1563 unsigned int vectorization_factor; | 1370 poly_uint64 vectorization_factor; |
1564 int i; | 1371 int i; |
1565 | 1372 |
1566 if (dump_enabled_p ()) | 1373 DUMP_VECT_SCOPE ("vect_update_vf_for_slp"); |
1567 dump_printf_loc (MSG_NOTE, vect_location, | |
1568 "=== vect_update_vf_for_slp ===\n"); | |
1569 | 1374 |
1570 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); | 1375 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); |
1571 gcc_assert (vectorization_factor != 0); | 1376 gcc_assert (known_ne (vectorization_factor, 0U)); |
1572 | 1377 |
1573 /* If all the stmts in the loop can be SLPed, we perform only SLP, and | 1378 /* If all the stmts in the loop can be SLPed, we perform only SLP, and |
1574 vectorization factor of the loop is the unrolling factor required by | 1379 vectorization factor of the loop is the unrolling factor required by |
1575 the SLP instances. If that unrolling factor is 1, we say, that we | 1380 the SLP instances. If that unrolling factor is 1, we say, that we |
1576 perform pure SLP on loop - cross iteration parallelism is not | 1381 perform pure SLP on loop - cross iteration parallelism is not |
1580 { | 1385 { |
1581 basic_block bb = bbs[i]; | 1386 basic_block bb = bbs[i]; |
1582 for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); | 1387 for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); |
1583 gsi_next (&si)) | 1388 gsi_next (&si)) |
1584 { | 1389 { |
1585 gimple *stmt = gsi_stmt (si); | 1390 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); |
1586 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | 1391 stmt_info = vect_stmt_to_vectorize (stmt_info); |
1587 if (STMT_VINFO_IN_PATTERN_P (stmt_info) | |
1588 && STMT_VINFO_RELATED_STMT (stmt_info)) | |
1589 { | |
1590 stmt = STMT_VINFO_RELATED_STMT (stmt_info); | |
1591 stmt_info = vinfo_for_stmt (stmt); | |
1592 } | |
1593 if ((STMT_VINFO_RELEVANT_P (stmt_info) | 1392 if ((STMT_VINFO_RELEVANT_P (stmt_info) |
1594 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))) | 1393 || VECTORIZABLE_CYCLE_DEF (STMT_VINFO_DEF_TYPE (stmt_info))) |
1595 && !PURE_SLP_STMT (stmt_info)) | 1394 && !PURE_SLP_STMT (stmt_info)) |
1596 /* STMT needs both SLP and loop-based vectorization. */ | 1395 /* STMT needs both SLP and loop-based vectorization. */ |
1597 only_slp_in_loop = false; | 1396 only_slp_in_loop = false; |
1606 } | 1405 } |
1607 else | 1406 else |
1608 { | 1407 { |
1609 dump_printf_loc (MSG_NOTE, vect_location, | 1408 dump_printf_loc (MSG_NOTE, vect_location, |
1610 "Loop contains SLP and non-SLP stmts\n"); | 1409 "Loop contains SLP and non-SLP stmts\n"); |
1410 /* Both the vectorization factor and unroll factor have the form | |
1411 current_vector_size * X for some rational X, so they must have | |
1412 a common multiple. */ | |
1611 vectorization_factor | 1413 vectorization_factor |
1612 = least_common_multiple (vectorization_factor, | 1414 = force_common_multiple (vectorization_factor, |
1613 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo)); | 1415 LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo)); |
1614 } | 1416 } |
1615 | 1417 |
1616 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor; | 1418 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = vectorization_factor; |
1617 if (dump_enabled_p ()) | 1419 if (dump_enabled_p ()) |
1618 dump_printf_loc (MSG_NOTE, vect_location, | 1420 { |
1619 "Updating vectorization factor to %d\n", | 1421 dump_printf_loc (MSG_NOTE, vect_location, |
1620 vectorization_factor); | 1422 "Updating vectorization factor to "); |
1423 dump_dec (MSG_NOTE, vectorization_factor); | |
1424 dump_printf (MSG_NOTE, ".\n"); | |
1425 } | |
1621 } | 1426 } |
1622 | 1427 |
1428 /* Return true if STMT_INFO describes a double reduction phi and if | |
1429 the other phi in the reduction is also relevant for vectorization. | |
1430 This rejects cases such as: | |
1431 | |
1432 outer1: | |
1433 x_1 = PHI <x_3(outer2), ...>; | |
1434 ... | |
1435 | |
1436 inner: | |
1437 x_2 = ...; | |
1438 ... | |
1439 | |
1440 outer2: | |
1441 x_3 = PHI <x_2(inner)>; | |
1442 | |
1443 if nothing in x_2 or elsewhere makes x_1 relevant. */ | |
1444 | |
1445 static bool | |
1446 vect_active_double_reduction_p (stmt_vec_info stmt_info) | |
1447 { | |
1448 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_double_reduction_def) | |
1449 return false; | |
1450 | |
1451 return STMT_VINFO_RELEVANT_P (STMT_VINFO_REDUC_DEF (stmt_info)); | |
1452 } | |
1453 | |
1623 /* Function vect_analyze_loop_operations. | 1454 /* Function vect_analyze_loop_operations. |
1624 | 1455 |
1625 Scan the loop stmts and make sure they are all vectorizable. */ | 1456 Scan the loop stmts and make sure they are all vectorizable. */ |
1626 | 1457 |
1627 static bool | 1458 static opt_result |
1628 vect_analyze_loop_operations (loop_vec_info loop_vinfo) | 1459 vect_analyze_loop_operations (loop_vec_info loop_vinfo) |
1629 { | 1460 { |
1630 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | 1461 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); |
1631 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); | 1462 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); |
1632 int nbbs = loop->num_nodes; | 1463 int nbbs = loop->num_nodes; |
1633 int i; | 1464 int i; |
1634 stmt_vec_info stmt_info; | 1465 stmt_vec_info stmt_info; |
1635 bool need_to_vectorize = false; | 1466 bool need_to_vectorize = false; |
1636 bool ok; | 1467 bool ok; |
1637 | 1468 |
1638 if (dump_enabled_p ()) | 1469 DUMP_VECT_SCOPE ("vect_analyze_loop_operations"); |
1639 dump_printf_loc (MSG_NOTE, vect_location, | 1470 |
1640 "=== vect_analyze_loop_operations ===\n"); | 1471 stmt_vector_for_cost cost_vec; |
1472 cost_vec.create (2); | |
1641 | 1473 |
1642 for (i = 0; i < nbbs; i++) | 1474 for (i = 0; i < nbbs; i++) |
1643 { | 1475 { |
1644 basic_block bb = bbs[i]; | 1476 basic_block bb = bbs[i]; |
1645 | 1477 |
1647 gsi_next (&si)) | 1479 gsi_next (&si)) |
1648 { | 1480 { |
1649 gphi *phi = si.phi (); | 1481 gphi *phi = si.phi (); |
1650 ok = true; | 1482 ok = true; |
1651 | 1483 |
1652 stmt_info = vinfo_for_stmt (phi); | 1484 stmt_info = loop_vinfo->lookup_stmt (phi); |
1653 if (dump_enabled_p ()) | 1485 if (dump_enabled_p ()) |
1654 { | 1486 dump_printf_loc (MSG_NOTE, vect_location, "examining phi: %G", phi); |
1655 dump_printf_loc (MSG_NOTE, vect_location, "examining phi: "); | |
1656 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0); | |
1657 } | |
1658 if (virtual_operand_p (gimple_phi_result (phi))) | 1487 if (virtual_operand_p (gimple_phi_result (phi))) |
1659 continue; | 1488 continue; |
1660 | 1489 |
1661 /* Inner-loop loop-closed exit phi in outer-loop vectorization | 1490 /* Inner-loop loop-closed exit phi in outer-loop vectorization |
1662 (i.e., a phi in the tail of the outer-loop). */ | 1491 (i.e., a phi in the tail of the outer-loop). */ |
1665 /* FORNOW: we currently don't support the case that these phis | 1494 /* FORNOW: we currently don't support the case that these phis |
1666 are not used in the outerloop (unless it is double reduction, | 1495 are not used in the outerloop (unless it is double reduction, |
1667 i.e., this phi is vect_reduction_def), cause this case | 1496 i.e., this phi is vect_reduction_def), cause this case |
1668 requires to actually do something here. */ | 1497 requires to actually do something here. */ |
1669 if (STMT_VINFO_LIVE_P (stmt_info) | 1498 if (STMT_VINFO_LIVE_P (stmt_info) |
1670 && STMT_VINFO_DEF_TYPE (stmt_info) | 1499 && !vect_active_double_reduction_p (stmt_info)) |
1671 != vect_double_reduction_def) | 1500 return opt_result::failure_at (phi, |
1672 { | 1501 "Unsupported loop-closed phi" |
1673 if (dump_enabled_p ()) | 1502 " in outer-loop.\n"); |
1674 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
1675 "Unsupported loop-closed phi in " | |
1676 "outer-loop.\n"); | |
1677 return false; | |
1678 } | |
1679 | 1503 |
1680 /* If PHI is used in the outer loop, we check that its operand | 1504 /* If PHI is used in the outer loop, we check that its operand |
1681 is defined in the inner loop. */ | 1505 is defined in the inner loop. */ |
1682 if (STMT_VINFO_RELEVANT_P (stmt_info)) | 1506 if (STMT_VINFO_RELEVANT_P (stmt_info)) |
1683 { | 1507 { |
1684 tree phi_op; | 1508 tree phi_op; |
1685 gimple *op_def_stmt; | |
1686 | 1509 |
1687 if (gimple_phi_num_args (phi) != 1) | 1510 if (gimple_phi_num_args (phi) != 1) |
1688 return false; | 1511 return opt_result::failure_at (phi, "unsupported phi"); |
1689 | 1512 |
1690 phi_op = PHI_ARG_DEF (phi, 0); | 1513 phi_op = PHI_ARG_DEF (phi, 0); |
1691 if (TREE_CODE (phi_op) != SSA_NAME) | 1514 stmt_vec_info op_def_info = loop_vinfo->lookup_def (phi_op); |
1692 return false; | 1515 if (!op_def_info) |
1693 | 1516 return opt_result::failure_at (phi, "unsupported phi"); |
1694 op_def_stmt = SSA_NAME_DEF_STMT (phi_op); | 1517 |
1695 if (gimple_nop_p (op_def_stmt) | 1518 if (STMT_VINFO_RELEVANT (op_def_info) != vect_used_in_outer |
1696 || !flow_bb_inside_loop_p (loop, gimple_bb (op_def_stmt)) | 1519 && (STMT_VINFO_RELEVANT (op_def_info) |
1697 || !vinfo_for_stmt (op_def_stmt)) | 1520 != vect_used_in_outer_by_reduction)) |
1698 return false; | 1521 return opt_result::failure_at (phi, "unsupported phi"); |
1699 | |
1700 if (STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt)) | |
1701 != vect_used_in_outer | |
1702 && STMT_VINFO_RELEVANT (vinfo_for_stmt (op_def_stmt)) | |
1703 != vect_used_in_outer_by_reduction) | |
1704 return false; | |
1705 } | 1522 } |
1706 | 1523 |
1707 continue; | 1524 continue; |
1708 } | 1525 } |
1709 | 1526 |
1710 gcc_assert (stmt_info); | 1527 gcc_assert (stmt_info); |
1711 | 1528 |
1712 if ((STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope | 1529 if ((STMT_VINFO_RELEVANT (stmt_info) == vect_used_in_scope |
1713 || STMT_VINFO_LIVE_P (stmt_info)) | 1530 || STMT_VINFO_LIVE_P (stmt_info)) |
1714 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def) | 1531 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def) |
1715 { | 1532 /* A scalar-dependence cycle that we don't support. */ |
1716 /* A scalar-dependence cycle that we don't support. */ | 1533 return opt_result::failure_at (phi, |
1717 if (dump_enabled_p ()) | 1534 "not vectorized:" |
1718 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 1535 " scalar dependence cycle.\n"); |
1719 "not vectorized: scalar dependence cycle.\n"); | |
1720 return false; | |
1721 } | |
1722 | 1536 |
1723 if (STMT_VINFO_RELEVANT_P (stmt_info)) | 1537 if (STMT_VINFO_RELEVANT_P (stmt_info)) |
1724 { | 1538 { |
1725 need_to_vectorize = true; | 1539 need_to_vectorize = true; |
1726 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def | 1540 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def |
1727 && ! PURE_SLP_STMT (stmt_info)) | 1541 && ! PURE_SLP_STMT (stmt_info)) |
1728 ok = vectorizable_induction (phi, NULL, NULL, NULL); | 1542 ok = vectorizable_induction (stmt_info, NULL, NULL, NULL, |
1543 &cost_vec); | |
1729 else if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def | 1544 else if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def |
1730 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle) | 1545 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle) |
1731 && ! PURE_SLP_STMT (stmt_info)) | 1546 && ! PURE_SLP_STMT (stmt_info)) |
1732 ok = vectorizable_reduction (phi, NULL, NULL, NULL, NULL); | 1547 ok = vectorizable_reduction (stmt_info, NULL, NULL, NULL, NULL, |
1548 &cost_vec); | |
1733 } | 1549 } |
1734 | 1550 |
1735 if (ok && STMT_VINFO_LIVE_P (stmt_info)) | 1551 /* SLP PHIs are tested by vect_slp_analyze_node_operations. */ |
1736 ok = vectorizable_live_operation (phi, NULL, NULL, -1, NULL); | 1552 if (ok |
1553 && STMT_VINFO_LIVE_P (stmt_info) | |
1554 && !PURE_SLP_STMT (stmt_info)) | |
1555 ok = vectorizable_live_operation (stmt_info, NULL, NULL, -1, NULL, | |
1556 &cost_vec); | |
1737 | 1557 |
1738 if (!ok) | 1558 if (!ok) |
1739 { | 1559 return opt_result::failure_at (phi, |
1740 if (dump_enabled_p ()) | 1560 "not vectorized: relevant phi not " |
1741 { | 1561 "supported: %G", |
1742 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 1562 static_cast <gimple *> (phi)); |
1743 "not vectorized: relevant phi not " | |
1744 "supported: "); | |
1745 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, phi, 0); | |
1746 } | |
1747 return false; | |
1748 } | |
1749 } | 1563 } |
1750 | 1564 |
1751 for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); | 1565 for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); |
1752 gsi_next (&si)) | 1566 gsi_next (&si)) |
1753 { | 1567 { |
1754 gimple *stmt = gsi_stmt (si); | 1568 gimple *stmt = gsi_stmt (si); |
1755 if (!gimple_clobber_p (stmt) | 1569 if (!gimple_clobber_p (stmt)) |
1756 && !vect_analyze_stmt (stmt, &need_to_vectorize, NULL, NULL)) | 1570 { |
1757 return false; | 1571 opt_result res |
1572 = vect_analyze_stmt (loop_vinfo->lookup_stmt (stmt), | |
1573 &need_to_vectorize, | |
1574 NULL, NULL, &cost_vec); | |
1575 if (!res) | |
1576 return res; | |
1577 } | |
1758 } | 1578 } |
1759 } /* bbs */ | 1579 } /* bbs */ |
1580 | |
1581 add_stmt_costs (loop_vinfo->target_cost_data, &cost_vec); | |
1582 cost_vec.release (); | |
1760 | 1583 |
1761 /* All operations in the loop are either irrelevant (deal with loop | 1584 /* All operations in the loop are either irrelevant (deal with loop |
1762 control, or dead), or only used outside the loop and can be moved | 1585 control, or dead), or only used outside the loop and can be moved |
1763 out of the loop (e.g. invariants, inductions). The loop can be | 1586 out of the loop (e.g. invariants, inductions). The loop can be |
1764 optimized away by scalar optimizations. We're better off not | 1587 optimized away by scalar optimizations. We're better off not |
1766 if (!need_to_vectorize) | 1589 if (!need_to_vectorize) |
1767 { | 1590 { |
1768 if (dump_enabled_p ()) | 1591 if (dump_enabled_p ()) |
1769 dump_printf_loc (MSG_NOTE, vect_location, | 1592 dump_printf_loc (MSG_NOTE, vect_location, |
1770 "All the computation can be taken out of the loop.\n"); | 1593 "All the computation can be taken out of the loop.\n"); |
1594 return opt_result::failure_at | |
1595 (vect_location, | |
1596 "not vectorized: redundant loop. no profit to vectorize.\n"); | |
1597 } | |
1598 | |
1599 return opt_result::success (); | |
1600 } | |
1601 | |
1602 /* Analyze the cost of the loop described by LOOP_VINFO. Decide if it | |
1603 is worthwhile to vectorize. Return 1 if definitely yes, 0 if | |
1604 definitely no, or -1 if it's worth retrying. */ | |
1605 | |
1606 static int | |
1607 vect_analyze_loop_costing (loop_vec_info loop_vinfo) | |
1608 { | |
1609 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
1610 unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo); | |
1611 | |
1612 /* Only fully-masked loops can have iteration counts less than the | |
1613 vectorization factor. */ | |
1614 if (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)) | |
1615 { | |
1616 HOST_WIDE_INT max_niter; | |
1617 | |
1618 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) | |
1619 max_niter = LOOP_VINFO_INT_NITERS (loop_vinfo); | |
1620 else | |
1621 max_niter = max_stmt_executions_int (loop); | |
1622 | |
1623 if (max_niter != -1 | |
1624 && (unsigned HOST_WIDE_INT) max_niter < assumed_vf) | |
1625 { | |
1626 if (dump_enabled_p ()) | |
1627 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
1628 "not vectorized: iteration count smaller than " | |
1629 "vectorization factor.\n"); | |
1630 return 0; | |
1631 } | |
1632 } | |
1633 | |
1634 int min_profitable_iters, min_profitable_estimate; | |
1635 vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters, | |
1636 &min_profitable_estimate); | |
1637 | |
1638 if (min_profitable_iters < 0) | |
1639 { | |
1771 if (dump_enabled_p ()) | 1640 if (dump_enabled_p ()) |
1772 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 1641 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
1773 "not vectorized: redundant loop. no profit to " | 1642 "not vectorized: vectorization not profitable.\n"); |
1774 "vectorize.\n"); | |
1775 return false; | |
1776 } | |
1777 | |
1778 return true; | |
1779 } | |
1780 | |
1781 | |
1782 /* Function vect_analyze_loop_2. | |
1783 | |
1784 Apply a set of analyses on LOOP, and create a loop_vec_info struct | |
1785 for it. The different analyses will record information in the | |
1786 loop_vec_info struct. */ | |
1787 static bool | |
1788 vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal) | |
1789 { | |
1790 bool ok; | |
1791 int max_vf = MAX_VECTORIZATION_FACTOR; | |
1792 int min_vf = 2; | |
1793 unsigned int n_stmts = 0; | |
1794 | |
1795 /* The first group of checks is independent of the vector size. */ | |
1796 fatal = true; | |
1797 | |
1798 /* Find all data references in the loop (which correspond to vdefs/vuses) | |
1799 and analyze their evolution in the loop. */ | |
1800 | |
1801 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); | |
1802 | |
1803 loop_p loop = LOOP_VINFO_LOOP (loop_vinfo); | |
1804 if (!find_loop_nest (loop, &LOOP_VINFO_LOOP_NEST (loop_vinfo))) | |
1805 { | |
1806 if (dump_enabled_p ()) | 1643 if (dump_enabled_p ()) |
1807 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 1644 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
1808 "not vectorized: loop nest containing two " | 1645 "not vectorized: vector version will never be " |
1809 "or more consecutive inner loops cannot be " | 1646 "profitable.\n"); |
1810 "vectorized\n"); | 1647 return -1; |
1811 return false; | 1648 } |
1812 } | 1649 |
1813 | 1650 int min_scalar_loop_bound = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND) |
1651 * assumed_vf); | |
1652 | |
1653 /* Use the cost model only if it is more conservative than user specified | |
1654 threshold. */ | |
1655 unsigned int th = (unsigned) MAX (min_scalar_loop_bound, | |
1656 min_profitable_iters); | |
1657 | |
1658 LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = th; | |
1659 | |
1660 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) | |
1661 && LOOP_VINFO_INT_NITERS (loop_vinfo) < th) | |
1662 { | |
1663 if (dump_enabled_p ()) | |
1664 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
1665 "not vectorized: vectorization not profitable.\n"); | |
1666 if (dump_enabled_p ()) | |
1667 dump_printf_loc (MSG_NOTE, vect_location, | |
1668 "not vectorized: iteration count smaller than user " | |
1669 "specified loop bound parameter or minimum profitable " | |
1670 "iterations (whichever is more conservative).\n"); | |
1671 return 0; | |
1672 } | |
1673 | |
1674 HOST_WIDE_INT estimated_niter = estimated_stmt_executions_int (loop); | |
1675 if (estimated_niter == -1) | |
1676 estimated_niter = likely_max_stmt_executions_int (loop); | |
1677 if (estimated_niter != -1 | |
1678 && ((unsigned HOST_WIDE_INT) estimated_niter | |
1679 < MAX (th, (unsigned) min_profitable_estimate))) | |
1680 { | |
1681 if (dump_enabled_p ()) | |
1682 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
1683 "not vectorized: estimated iteration count too " | |
1684 "small.\n"); | |
1685 if (dump_enabled_p ()) | |
1686 dump_printf_loc (MSG_NOTE, vect_location, | |
1687 "not vectorized: estimated iteration count smaller " | |
1688 "than specified loop bound parameter or minimum " | |
1689 "profitable iterations (whichever is more " | |
1690 "conservative).\n"); | |
1691 return -1; | |
1692 } | |
1693 | |
1694 return 1; | |
1695 } | |
1696 | |
1697 static opt_result | |
1698 vect_get_datarefs_in_loop (loop_p loop, basic_block *bbs, | |
1699 vec<data_reference_p> *datarefs, | |
1700 unsigned int *n_stmts) | |
1701 { | |
1702 *n_stmts = 0; | |
1814 for (unsigned i = 0; i < loop->num_nodes; i++) | 1703 for (unsigned i = 0; i < loop->num_nodes; i++) |
1815 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); | 1704 for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]); |
1816 !gsi_end_p (gsi); gsi_next (&gsi)) | 1705 !gsi_end_p (gsi); gsi_next (&gsi)) |
1817 { | 1706 { |
1818 gimple *stmt = gsi_stmt (gsi); | 1707 gimple *stmt = gsi_stmt (gsi); |
1819 if (is_gimple_debug (stmt)) | 1708 if (is_gimple_debug (stmt)) |
1820 continue; | 1709 continue; |
1821 ++n_stmts; | 1710 ++(*n_stmts); |
1822 if (!find_data_references_in_stmt (loop, stmt, | 1711 opt_result res = vect_find_stmt_data_reference (loop, stmt, datarefs); |
1823 &LOOP_VINFO_DATAREFS (loop_vinfo))) | 1712 if (!res) |
1824 { | 1713 { |
1825 if (is_gimple_call (stmt) && loop->safelen) | 1714 if (is_gimple_call (stmt) && loop->safelen) |
1826 { | 1715 { |
1827 tree fndecl = gimple_call_fndecl (stmt), op; | 1716 tree fndecl = gimple_call_fndecl (stmt), op; |
1828 if (fndecl != NULL_TREE) | 1717 if (fndecl != NULL_TREE) |
1850 && get_base_address (op))))) | 1739 && get_base_address (op))))) |
1851 continue; | 1740 continue; |
1852 } | 1741 } |
1853 } | 1742 } |
1854 } | 1743 } |
1855 if (dump_enabled_p ()) | 1744 return res; |
1856 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
1857 "not vectorized: loop contains function " | |
1858 "calls or data references that cannot " | |
1859 "be analyzed\n"); | |
1860 return false; | |
1861 } | 1745 } |
1746 /* If dependence analysis will give up due to the limit on the | |
1747 number of datarefs stop here and fail fatally. */ | |
1748 if (datarefs->length () | |
1749 > (unsigned)PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS)) | |
1750 return opt_result::failure_at (stmt, "exceeded param " | |
1751 "loop-max-datarefs-for-datadeps\n"); | |
1862 } | 1752 } |
1753 return opt_result::success (); | |
1754 } | |
1755 | |
1756 /* Function vect_analyze_loop_2. | |
1757 | |
1758 Apply a set of analyses on LOOP, and create a loop_vec_info struct | |
1759 for it. The different analyses will record information in the | |
1760 loop_vec_info struct. */ | |
1761 static opt_result | |
1762 vect_analyze_loop_2 (loop_vec_info loop_vinfo, bool &fatal, unsigned *n_stmts) | |
1763 { | |
1764 opt_result ok = opt_result::success (); | |
1765 int res; | |
1766 unsigned int max_vf = MAX_VECTORIZATION_FACTOR; | |
1767 poly_uint64 min_vf = 2; | |
1768 | |
1769 /* The first group of checks is independent of the vector size. */ | |
1770 fatal = true; | |
1771 | |
1772 /* Find all data references in the loop (which correspond to vdefs/vuses) | |
1773 and analyze their evolution in the loop. */ | |
1774 | |
1775 loop_p loop = LOOP_VINFO_LOOP (loop_vinfo); | |
1776 | |
1777 /* Gather the data references and count stmts in the loop. */ | |
1778 if (!LOOP_VINFO_DATAREFS (loop_vinfo).exists ()) | |
1779 { | |
1780 opt_result res | |
1781 = vect_get_datarefs_in_loop (loop, LOOP_VINFO_BBS (loop_vinfo), | |
1782 &LOOP_VINFO_DATAREFS (loop_vinfo), | |
1783 n_stmts); | |
1784 if (!res) | |
1785 { | |
1786 if (dump_enabled_p ()) | |
1787 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
1788 "not vectorized: loop contains function " | |
1789 "calls or data references that cannot " | |
1790 "be analyzed\n"); | |
1791 return res; | |
1792 } | |
1793 loop_vinfo->shared->save_datarefs (); | |
1794 } | |
1795 else | |
1796 loop_vinfo->shared->check_datarefs (); | |
1863 | 1797 |
1864 /* Analyze the data references and also adjust the minimal | 1798 /* Analyze the data references and also adjust the minimal |
1865 vectorization factor according to the loads and stores. */ | 1799 vectorization factor according to the loads and stores. */ |
1866 | 1800 |
1867 ok = vect_analyze_data_refs (loop_vinfo, &min_vf); | 1801 ok = vect_analyze_data_refs (loop_vinfo, &min_vf); |
1868 if (!ok) | 1802 if (!ok) |
1869 { | 1803 { |
1870 if (dump_enabled_p ()) | 1804 if (dump_enabled_p ()) |
1871 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 1805 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
1872 "bad data references.\n"); | 1806 "bad data references.\n"); |
1873 return false; | 1807 return ok; |
1874 } | 1808 } |
1875 | 1809 |
1876 /* Classify all cross-iteration scalar data-flow cycles. | 1810 /* Classify all cross-iteration scalar data-flow cycles. |
1877 Cross-iteration cycles caused by virtual phis are analyzed separately. */ | 1811 Cross-iteration cycles caused by virtual phis are analyzed separately. */ |
1878 vect_analyze_scalar_cycles (loop_vinfo); | 1812 vect_analyze_scalar_cycles (loop_vinfo); |
1888 if (!ok) | 1822 if (!ok) |
1889 { | 1823 { |
1890 if (dump_enabled_p ()) | 1824 if (dump_enabled_p ()) |
1891 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 1825 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
1892 "bad data access.\n"); | 1826 "bad data access.\n"); |
1893 return false; | 1827 return ok; |
1894 } | 1828 } |
1895 | 1829 |
1896 /* Data-flow analysis to detect stmts that do not need to be vectorized. */ | 1830 /* Data-flow analysis to detect stmts that do not need to be vectorized. */ |
1897 | 1831 |
1898 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo); | 1832 ok = vect_mark_stmts_to_be_vectorized (loop_vinfo); |
1899 if (!ok) | 1833 if (!ok) |
1900 { | 1834 { |
1901 if (dump_enabled_p ()) | 1835 if (dump_enabled_p ()) |
1902 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 1836 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
1903 "unexpected pattern.\n"); | 1837 "unexpected pattern.\n"); |
1904 return false; | 1838 return ok; |
1905 } | 1839 } |
1906 | 1840 |
1907 /* While the rest of the analysis below depends on it in some way. */ | 1841 /* While the rest of the analysis below depends on it in some way. */ |
1908 fatal = false; | 1842 fatal = false; |
1909 | 1843 |
1911 and adjust the maximum vectorization factor according to | 1845 and adjust the maximum vectorization factor according to |
1912 the dependences. | 1846 the dependences. |
1913 FORNOW: fail at the first data dependence that we encounter. */ | 1847 FORNOW: fail at the first data dependence that we encounter. */ |
1914 | 1848 |
1915 ok = vect_analyze_data_ref_dependences (loop_vinfo, &max_vf); | 1849 ok = vect_analyze_data_ref_dependences (loop_vinfo, &max_vf); |
1916 if (!ok | 1850 if (!ok) |
1917 || max_vf < min_vf) | |
1918 { | 1851 { |
1919 if (dump_enabled_p ()) | 1852 if (dump_enabled_p ()) |
1920 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 1853 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
1921 "bad data dependence.\n"); | 1854 "bad data dependence.\n"); |
1922 return false; | 1855 return ok; |
1923 } | 1856 } |
1857 if (max_vf != MAX_VECTORIZATION_FACTOR | |
1858 && maybe_lt (max_vf, min_vf)) | |
1859 return opt_result::failure_at (vect_location, "bad data dependence.\n"); | |
1924 LOOP_VINFO_MAX_VECT_FACTOR (loop_vinfo) = max_vf; | 1860 LOOP_VINFO_MAX_VECT_FACTOR (loop_vinfo) = max_vf; |
1925 | 1861 |
1926 ok = vect_determine_vectorization_factor (loop_vinfo); | 1862 ok = vect_determine_vectorization_factor (loop_vinfo); |
1927 if (!ok) | 1863 if (!ok) |
1928 { | 1864 { |
1929 if (dump_enabled_p ()) | 1865 if (dump_enabled_p ()) |
1930 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 1866 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
1931 "can't determine vectorization factor.\n"); | 1867 "can't determine vectorization factor.\n"); |
1932 return false; | 1868 return ok; |
1933 } | 1869 } |
1934 if (max_vf < LOOP_VINFO_VECT_FACTOR (loop_vinfo)) | 1870 if (max_vf != MAX_VECTORIZATION_FACTOR |
1935 { | 1871 && maybe_lt (max_vf, LOOP_VINFO_VECT_FACTOR (loop_vinfo))) |
1936 if (dump_enabled_p ()) | 1872 return opt_result::failure_at (vect_location, "bad data dependence.\n"); |
1937 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
1938 "bad data dependence.\n"); | |
1939 return false; | |
1940 } | |
1941 | 1873 |
1942 /* Compute the scalar iteration cost. */ | 1874 /* Compute the scalar iteration cost. */ |
1943 vect_compute_single_scalar_iteration_cost (loop_vinfo); | 1875 vect_compute_single_scalar_iteration_cost (loop_vinfo); |
1944 | 1876 |
1945 int saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); | 1877 poly_uint64 saved_vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); |
1946 HOST_WIDE_INT estimated_niter; | |
1947 unsigned th; | 1878 unsigned th; |
1948 int min_scalar_loop_bound; | |
1949 | 1879 |
1950 /* Check the SLP opportunities in the loop, analyze and build SLP trees. */ | 1880 /* Check the SLP opportunities in the loop, analyze and build SLP trees. */ |
1951 ok = vect_analyze_slp (loop_vinfo, n_stmts); | 1881 ok = vect_analyze_slp (loop_vinfo, *n_stmts); |
1952 if (!ok) | 1882 if (!ok) |
1953 return false; | 1883 return ok; |
1954 | 1884 |
1955 /* If there are any SLP instances mark them as pure_slp. */ | 1885 /* If there are any SLP instances mark them as pure_slp. */ |
1956 bool slp = vect_make_slp_decision (loop_vinfo); | 1886 bool slp = vect_make_slp_decision (loop_vinfo); |
1957 if (slp) | 1887 if (slp) |
1958 { | 1888 { |
1961 | 1891 |
1962 /* Update the vectorization factor based on the SLP decision. */ | 1892 /* Update the vectorization factor based on the SLP decision. */ |
1963 vect_update_vf_for_slp (loop_vinfo); | 1893 vect_update_vf_for_slp (loop_vinfo); |
1964 } | 1894 } |
1965 | 1895 |
1896 bool saved_can_fully_mask_p = LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo); | |
1897 | |
1898 /* We don't expect to have to roll back to anything other than an empty | |
1899 set of rgroups. */ | |
1900 gcc_assert (LOOP_VINFO_MASKS (loop_vinfo).is_empty ()); | |
1901 | |
1966 /* This is the point where we can re-start analysis with SLP forced off. */ | 1902 /* This is the point where we can re-start analysis with SLP forced off. */ |
1967 start_over: | 1903 start_over: |
1968 | 1904 |
1969 /* Now the vectorization factor is final. */ | 1905 /* Now the vectorization factor is final. */ |
1970 unsigned vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); | 1906 poly_uint64 vectorization_factor = LOOP_VINFO_VECT_FACTOR (loop_vinfo); |
1971 gcc_assert (vectorization_factor != 0); | 1907 gcc_assert (known_ne (vectorization_factor, 0U)); |
1972 | 1908 |
1973 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && dump_enabled_p ()) | 1909 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) && dump_enabled_p ()) |
1974 dump_printf_loc (MSG_NOTE, vect_location, | 1910 { |
1975 "vectorization_factor = %d, niters = " | 1911 dump_printf_loc (MSG_NOTE, vect_location, |
1976 HOST_WIDE_INT_PRINT_DEC "\n", vectorization_factor, | 1912 "vectorization_factor = "); |
1977 LOOP_VINFO_INT_NITERS (loop_vinfo)); | 1913 dump_dec (MSG_NOTE, vectorization_factor); |
1914 dump_printf (MSG_NOTE, ", niters = %wd\n", | |
1915 LOOP_VINFO_INT_NITERS (loop_vinfo)); | |
1916 } | |
1978 | 1917 |
1979 HOST_WIDE_INT max_niter | 1918 HOST_WIDE_INT max_niter |
1980 = likely_max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo)); | 1919 = likely_max_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo)); |
1981 if ((LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) | |
1982 && (LOOP_VINFO_INT_NITERS (loop_vinfo) < vectorization_factor)) | |
1983 || (max_niter != -1 | |
1984 && (unsigned HOST_WIDE_INT) max_niter < vectorization_factor)) | |
1985 { | |
1986 if (dump_enabled_p ()) | |
1987 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
1988 "not vectorized: iteration count smaller than " | |
1989 "vectorization factor.\n"); | |
1990 return false; | |
1991 } | |
1992 | 1920 |
1993 /* Analyze the alignment of the data-refs in the loop. | 1921 /* Analyze the alignment of the data-refs in the loop. |
1994 Fail if a data reference is found that cannot be vectorized. */ | 1922 Fail if a data reference is found that cannot be vectorized. */ |
1995 | 1923 |
1996 ok = vect_analyze_data_refs_alignment (loop_vinfo); | 1924 ok = vect_analyze_data_refs_alignment (loop_vinfo); |
1997 if (!ok) | 1925 if (!ok) |
1998 { | 1926 { |
1999 if (dump_enabled_p ()) | 1927 if (dump_enabled_p ()) |
2000 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 1928 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
2001 "bad data alignment.\n"); | 1929 "bad data alignment.\n"); |
2002 return false; | 1930 return ok; |
2003 } | 1931 } |
2004 | 1932 |
2005 /* Prune the list of ddrs to be tested at run-time by versioning for alias. | 1933 /* Prune the list of ddrs to be tested at run-time by versioning for alias. |
2006 It is important to call pruning after vect_analyze_data_ref_accesses, | 1934 It is important to call pruning after vect_analyze_data_ref_accesses, |
2007 since we use grouping information gathered by interleaving analysis. */ | 1935 since we use grouping information gathered by interleaving analysis. */ |
2008 ok = vect_prune_runtime_alias_test_list (loop_vinfo); | 1936 ok = vect_prune_runtime_alias_test_list (loop_vinfo); |
2009 if (!ok) | 1937 if (!ok) |
2010 return false; | 1938 return ok; |
2011 | 1939 |
2012 /* Do not invoke vect_enhance_data_refs_alignment for eplilogue | 1940 /* Do not invoke vect_enhance_data_refs_alignment for epilogue |
2013 vectorization. */ | 1941 vectorization, since we do not want to add extra peeling or |
1942 add versioning for alignment. */ | |
2014 if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo)) | 1943 if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo)) |
2015 { | |
2016 /* This pass will decide on using loop versioning and/or loop peeling in | 1944 /* This pass will decide on using loop versioning and/or loop peeling in |
2017 order to enhance the alignment of data references in the loop. */ | 1945 order to enhance the alignment of data references in the loop. */ |
2018 ok = vect_enhance_data_refs_alignment (loop_vinfo); | 1946 ok = vect_enhance_data_refs_alignment (loop_vinfo); |
2019 if (!ok) | 1947 else |
2020 { | 1948 ok = vect_verify_datarefs_alignment (loop_vinfo); |
2021 if (dump_enabled_p ()) | 1949 if (!ok) |
2022 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 1950 return ok; |
2023 "bad data alignment.\n"); | |
2024 return false; | |
2025 } | |
2026 } | |
2027 | 1951 |
2028 if (slp) | 1952 if (slp) |
2029 { | 1953 { |
2030 /* Analyze operations in the SLP instances. Note this may | 1954 /* Analyze operations in the SLP instances. Note this may |
2031 remove unsupported SLP instances which makes the above | 1955 remove unsupported SLP instances which makes the above |
2032 SLP kind detection invalid. */ | 1956 SLP kind detection invalid. */ |
2033 unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length (); | 1957 unsigned old_size = LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length (); |
2034 vect_slp_analyze_operations (loop_vinfo); | 1958 vect_slp_analyze_operations (loop_vinfo); |
2035 if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length () != old_size) | 1959 if (LOOP_VINFO_SLP_INSTANCES (loop_vinfo).length () != old_size) |
2036 goto again; | 1960 { |
1961 ok = opt_result::failure_at (vect_location, | |
1962 "unsupported SLP instances\n"); | |
1963 goto again; | |
1964 } | |
2037 } | 1965 } |
2038 | 1966 |
2039 /* Scan all the remaining operations in the loop that are not subject | 1967 /* Scan all the remaining operations in the loop that are not subject |
2040 to SLP and make sure they are vectorizable. */ | 1968 to SLP and make sure they are vectorizable. */ |
2041 ok = vect_analyze_loop_operations (loop_vinfo); | 1969 ok = vect_analyze_loop_operations (loop_vinfo); |
2042 if (!ok) | 1970 if (!ok) |
2043 { | 1971 { |
2044 if (dump_enabled_p ()) | 1972 if (dump_enabled_p ()) |
2045 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 1973 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
2046 "bad operation or unsupported loop bound.\n"); | 1974 "bad operation or unsupported loop bound.\n"); |
2047 return false; | 1975 return ok; |
1976 } | |
1977 | |
1978 /* Decide whether to use a fully-masked loop for this vectorization | |
1979 factor. */ | |
1980 LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) | |
1981 = (LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) | |
1982 && vect_verify_full_masking (loop_vinfo)); | |
1983 if (dump_enabled_p ()) | |
1984 { | |
1985 if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)) | |
1986 dump_printf_loc (MSG_NOTE, vect_location, | |
1987 "using a fully-masked loop.\n"); | |
1988 else | |
1989 dump_printf_loc (MSG_NOTE, vect_location, | |
1990 "not using a fully-masked loop.\n"); | |
2048 } | 1991 } |
2049 | 1992 |
2050 /* If epilog loop is required because of data accesses with gaps, | 1993 /* If epilog loop is required because of data accesses with gaps, |
2051 one additional iteration needs to be peeled. Check if there is | 1994 one additional iteration needs to be peeled. Check if there is |
2052 enough iterations for vectorization. */ | 1995 enough iterations for vectorization. */ |
2053 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) | 1996 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) |
2054 && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) | 1997 && LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) |
2055 { | 1998 && !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)) |
2056 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); | 1999 { |
2000 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); | |
2057 tree scalar_niters = LOOP_VINFO_NITERSM1 (loop_vinfo); | 2001 tree scalar_niters = LOOP_VINFO_NITERSM1 (loop_vinfo); |
2058 | 2002 |
2059 if (wi::to_widest (scalar_niters) < vf) | 2003 if (known_lt (wi::to_widest (scalar_niters), vf)) |
2060 { | 2004 return opt_result::failure_at (vect_location, |
2061 if (dump_enabled_p ()) | 2005 "loop has no enough iterations to" |
2062 dump_printf_loc (MSG_NOTE, vect_location, | 2006 " support peeling for gaps.\n"); |
2063 "loop has no enough iterations to support" | 2007 } |
2064 " peeling for gaps.\n"); | 2008 |
2065 return false; | 2009 /* Check the costings of the loop make vectorizing worthwhile. */ |
2066 } | 2010 res = vect_analyze_loop_costing (loop_vinfo); |
2067 } | 2011 if (res < 0) |
2068 | 2012 { |
2069 /* Analyze cost. Decide if worth while to vectorize. */ | 2013 ok = opt_result::failure_at (vect_location, |
2070 int min_profitable_estimate, min_profitable_iters; | 2014 "Loop costings may not be worthwhile.\n"); |
2071 vect_estimate_min_profitable_iters (loop_vinfo, &min_profitable_iters, | |
2072 &min_profitable_estimate); | |
2073 | |
2074 if (min_profitable_iters < 0) | |
2075 { | |
2076 if (dump_enabled_p ()) | |
2077 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
2078 "not vectorized: vectorization not profitable.\n"); | |
2079 if (dump_enabled_p ()) | |
2080 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
2081 "not vectorized: vector version will never be " | |
2082 "profitable.\n"); | |
2083 goto again; | 2015 goto again; |
2084 } | 2016 } |
2085 | 2017 if (!res) |
2086 min_scalar_loop_bound = (PARAM_VALUE (PARAM_MIN_VECT_LOOP_BOUND) | 2018 return opt_result::failure_at (vect_location, |
2087 * vectorization_factor); | 2019 "Loop costings not worthwhile.\n"); |
2088 | |
2089 /* Use the cost model only if it is more conservative than user specified | |
2090 threshold. */ | |
2091 th = (unsigned) MAX (min_scalar_loop_bound, min_profitable_iters); | |
2092 | |
2093 LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = th; | |
2094 | |
2095 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) | |
2096 && LOOP_VINFO_INT_NITERS (loop_vinfo) < th) | |
2097 { | |
2098 if (dump_enabled_p ()) | |
2099 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
2100 "not vectorized: vectorization not profitable.\n"); | |
2101 if (dump_enabled_p ()) | |
2102 dump_printf_loc (MSG_NOTE, vect_location, | |
2103 "not vectorized: iteration count smaller than user " | |
2104 "specified loop bound parameter or minimum profitable " | |
2105 "iterations (whichever is more conservative).\n"); | |
2106 goto again; | |
2107 } | |
2108 | |
2109 estimated_niter | |
2110 = estimated_stmt_executions_int (LOOP_VINFO_LOOP (loop_vinfo)); | |
2111 if (estimated_niter == -1) | |
2112 estimated_niter = max_niter; | |
2113 if (estimated_niter != -1 | |
2114 && ((unsigned HOST_WIDE_INT) estimated_niter | |
2115 < MAX (th, (unsigned) min_profitable_estimate))) | |
2116 { | |
2117 if (dump_enabled_p ()) | |
2118 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
2119 "not vectorized: estimated iteration count too " | |
2120 "small.\n"); | |
2121 if (dump_enabled_p ()) | |
2122 dump_printf_loc (MSG_NOTE, vect_location, | |
2123 "not vectorized: estimated iteration count smaller " | |
2124 "than specified loop bound parameter or minimum " | |
2125 "profitable iterations (whichever is more " | |
2126 "conservative).\n"); | |
2127 goto again; | |
2128 } | |
2129 | 2020 |
2130 /* Decide whether we need to create an epilogue loop to handle | 2021 /* Decide whether we need to create an epilogue loop to handle |
2131 remaining scalar iterations. */ | 2022 remaining scalar iterations. */ |
2132 th = ((LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) | 2023 th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo); |
2133 / LOOP_VINFO_VECT_FACTOR (loop_vinfo)) | 2024 |
2134 * LOOP_VINFO_VECT_FACTOR (loop_vinfo)); | 2025 unsigned HOST_WIDE_INT const_vf; |
2135 | 2026 if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)) |
2136 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) | 2027 /* The main loop handles all iterations. */ |
2137 && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0) | 2028 LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false; |
2138 { | 2029 else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) |
2139 if (ctz_hwi (LOOP_VINFO_INT_NITERS (loop_vinfo) | 2030 && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0) |
2140 - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)) | 2031 { |
2141 < exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo))) | 2032 /* Work out the (constant) number of iterations that need to be |
2033 peeled for reasons other than niters. */ | |
2034 unsigned int peel_niter = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo); | |
2035 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) | |
2036 peel_niter += 1; | |
2037 if (!multiple_p (LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter, | |
2038 LOOP_VINFO_VECT_FACTOR (loop_vinfo))) | |
2142 LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true; | 2039 LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true; |
2143 } | 2040 } |
2144 else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) | 2041 else if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) |
2145 || (tree_ctz (LOOP_VINFO_NITERS (loop_vinfo)) | 2042 /* ??? When peeling for gaps but not alignment, we could |
2146 < (unsigned)exact_log2 (LOOP_VINFO_VECT_FACTOR (loop_vinfo)) | 2043 try to check whether the (variable) niters is known to be |
2147 /* In case of versioning, check if the maximum number of | 2044 VF * N + 1. That's something of a niche case though. */ |
2148 iterations is greater than th. If they are identical, | 2045 || LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) |
2149 the epilogue is unnecessary. */ | 2046 || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&const_vf) |
2047 || ((tree_ctz (LOOP_VINFO_NITERS (loop_vinfo)) | |
2048 < (unsigned) exact_log2 (const_vf)) | |
2049 /* In case of versioning, check if the maximum number of | |
2050 iterations is greater than th. If they are identical, | |
2051 the epilogue is unnecessary. */ | |
2150 && (!LOOP_REQUIRES_VERSIONING (loop_vinfo) | 2052 && (!LOOP_REQUIRES_VERSIONING (loop_vinfo) |
2151 || (unsigned HOST_WIDE_INT) max_niter > th))) | 2053 || ((unsigned HOST_WIDE_INT) max_niter |
2054 > (th / const_vf) * const_vf)))) | |
2152 LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true; | 2055 LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = true; |
2153 | 2056 |
2154 /* If an epilogue loop is required make sure we can create one. */ | 2057 /* If an epilogue loop is required make sure we can create one. */ |
2155 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) | 2058 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) |
2156 || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)) | 2059 || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo)) |
2160 if (!vect_can_advance_ivs_p (loop_vinfo) | 2063 if (!vect_can_advance_ivs_p (loop_vinfo) |
2161 || !slpeel_can_duplicate_loop_p (LOOP_VINFO_LOOP (loop_vinfo), | 2064 || !slpeel_can_duplicate_loop_p (LOOP_VINFO_LOOP (loop_vinfo), |
2162 single_exit (LOOP_VINFO_LOOP | 2065 single_exit (LOOP_VINFO_LOOP |
2163 (loop_vinfo)))) | 2066 (loop_vinfo)))) |
2164 { | 2067 { |
2165 if (dump_enabled_p ()) | 2068 ok = opt_result::failure_at (vect_location, |
2166 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 2069 "not vectorized: can't create required " |
2167 "not vectorized: can't create required " | 2070 "epilog loop\n"); |
2168 "epilog loop\n"); | |
2169 goto again; | 2071 goto again; |
2170 } | 2072 } |
2171 } | 2073 } |
2172 | 2074 |
2173 /* During peeling, we need to check if number of loop iterations is | 2075 /* During peeling, we need to check if number of loop iterations is |
2174 enough for both peeled prolog loop and vector loop. This check | 2076 enough for both peeled prolog loop and vector loop. This check |
2175 can be merged along with threshold check of loop versioning, so | 2077 can be merged along with threshold check of loop versioning, so |
2176 increase threshold for this case if necessary. */ | 2078 increase threshold for this case if necessary. */ |
2177 if (LOOP_REQUIRES_VERSIONING (loop_vinfo) | 2079 if (LOOP_REQUIRES_VERSIONING (loop_vinfo)) |
2178 && (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) | 2080 { |
2179 || LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))) | 2081 poly_uint64 niters_th = 0; |
2180 { | 2082 |
2181 unsigned niters_th; | 2083 if (!vect_use_loop_mask_for_alignment_p (loop_vinfo)) |
2182 | 2084 { |
2183 /* Niters for peeled prolog loop. */ | 2085 /* Niters for peeled prolog loop. */ |
2184 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0) | 2086 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) < 0) |
2185 { | 2087 { |
2186 struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo); | 2088 dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo); |
2187 tree vectype = STMT_VINFO_VECTYPE (vinfo_for_stmt (DR_STMT (dr))); | 2089 tree vectype = STMT_VINFO_VECTYPE (dr_info->stmt); |
2188 | 2090 niters_th += TYPE_VECTOR_SUBPARTS (vectype) - 1; |
2189 niters_th = TYPE_VECTOR_SUBPARTS (vectype) - 1; | 2091 } |
2190 } | 2092 else |
2191 else | 2093 niters_th += LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo); |
2192 niters_th = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo); | 2094 } |
2193 | 2095 |
2194 /* Niters for at least one iteration of vectorized loop. */ | 2096 /* Niters for at least one iteration of vectorized loop. */ |
2195 niters_th += LOOP_VINFO_VECT_FACTOR (loop_vinfo); | 2097 if (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)) |
2098 niters_th += LOOP_VINFO_VECT_FACTOR (loop_vinfo); | |
2196 /* One additional iteration because of peeling for gap. */ | 2099 /* One additional iteration because of peeling for gap. */ |
2197 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) | 2100 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) |
2198 niters_th++; | 2101 niters_th += 1; |
2199 if (LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) < niters_th) | 2102 LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo) = niters_th; |
2200 LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = niters_th; | 2103 } |
2201 } | 2104 |
2202 | 2105 gcc_assert (known_eq (vectorization_factor, |
2203 gcc_assert (vectorization_factor | 2106 LOOP_VINFO_VECT_FACTOR (loop_vinfo))); |
2204 == (unsigned)LOOP_VINFO_VECT_FACTOR (loop_vinfo)); | |
2205 | 2107 |
2206 /* Ok to vectorize! */ | 2108 /* Ok to vectorize! */ |
2207 return true; | 2109 return opt_result::success (); |
2208 | 2110 |
2209 again: | 2111 again: |
2112 /* Ensure that "ok" is false (with an opt_problem if dumping is enabled). */ | |
2113 gcc_assert (!ok); | |
2114 | |
2210 /* Try again with SLP forced off but if we didn't do any SLP there is | 2115 /* Try again with SLP forced off but if we didn't do any SLP there is |
2211 no point in re-trying. */ | 2116 no point in re-trying. */ |
2212 if (!slp) | 2117 if (!slp) |
2213 return false; | 2118 return ok; |
2214 | 2119 |
2215 /* If there are reduction chains re-trying will fail anyway. */ | 2120 /* If there are reduction chains re-trying will fail anyway. */ |
2216 if (! LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).is_empty ()) | 2121 if (! LOOP_VINFO_REDUCTION_CHAINS (loop_vinfo).is_empty ()) |
2217 return false; | 2122 return ok; |
2218 | 2123 |
2219 /* Likewise if the grouped loads or stores in the SLP cannot be handled | 2124 /* Likewise if the grouped loads or stores in the SLP cannot be handled |
2220 via interleaving or lane instructions. */ | 2125 via interleaving or lane instructions. */ |
2221 slp_instance instance; | 2126 slp_instance instance; |
2222 slp_tree node; | 2127 slp_tree node; |
2223 unsigned i, j; | 2128 unsigned i, j; |
2224 FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance) | 2129 FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance) |
2225 { | 2130 { |
2226 stmt_vec_info vinfo; | 2131 stmt_vec_info vinfo; |
2227 vinfo = vinfo_for_stmt | 2132 vinfo = SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance))[0]; |
2228 (SLP_TREE_SCALAR_STMTS (SLP_INSTANCE_TREE (instance))[0]); | |
2229 if (! STMT_VINFO_GROUPED_ACCESS (vinfo)) | 2133 if (! STMT_VINFO_GROUPED_ACCESS (vinfo)) |
2230 continue; | 2134 continue; |
2231 vinfo = vinfo_for_stmt (STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo)); | 2135 vinfo = DR_GROUP_FIRST_ELEMENT (vinfo); |
2232 unsigned int size = STMT_VINFO_GROUP_SIZE (vinfo); | 2136 unsigned int size = DR_GROUP_SIZE (vinfo); |
2233 tree vectype = STMT_VINFO_VECTYPE (vinfo); | 2137 tree vectype = STMT_VINFO_VECTYPE (vinfo); |
2234 if (! vect_store_lanes_supported (vectype, size) | 2138 if (! vect_store_lanes_supported (vectype, size, false) |
2235 && ! vect_grouped_store_supported (vectype, size)) | 2139 && ! known_eq (TYPE_VECTOR_SUBPARTS (vectype), 1U) |
2236 return false; | 2140 && ! vect_grouped_store_supported (vectype, size)) |
2141 return opt_result::failure_at (vinfo->stmt, | |
2142 "unsupported grouped store\n"); | |
2237 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), j, node) | 2143 FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (instance), j, node) |
2238 { | 2144 { |
2239 vinfo = vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]); | 2145 vinfo = SLP_TREE_SCALAR_STMTS (node)[0]; |
2240 vinfo = vinfo_for_stmt (STMT_VINFO_GROUP_FIRST_ELEMENT (vinfo)); | 2146 vinfo = DR_GROUP_FIRST_ELEMENT (vinfo); |
2241 bool single_element_p = !STMT_VINFO_GROUP_NEXT_ELEMENT (vinfo); | 2147 bool single_element_p = !DR_GROUP_NEXT_ELEMENT (vinfo); |
2242 size = STMT_VINFO_GROUP_SIZE (vinfo); | 2148 size = DR_GROUP_SIZE (vinfo); |
2243 vectype = STMT_VINFO_VECTYPE (vinfo); | 2149 vectype = STMT_VINFO_VECTYPE (vinfo); |
2244 if (! vect_load_lanes_supported (vectype, size) | 2150 if (! vect_load_lanes_supported (vectype, size, false) |
2245 && ! vect_grouped_load_supported (vectype, single_element_p, | 2151 && ! vect_grouped_load_supported (vectype, single_element_p, |
2246 size)) | 2152 size)) |
2247 return false; | 2153 return opt_result::failure_at (vinfo->stmt, |
2154 "unsupported grouped load\n"); | |
2248 } | 2155 } |
2249 } | 2156 } |
2250 | 2157 |
2251 if (dump_enabled_p ()) | 2158 if (dump_enabled_p ()) |
2252 dump_printf_loc (MSG_NOTE, vect_location, | 2159 dump_printf_loc (MSG_NOTE, vect_location, |
2256 slp = false; | 2163 slp = false; |
2257 /* Restore vectorization factor as it were without SLP. */ | 2164 /* Restore vectorization factor as it were without SLP. */ |
2258 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = saved_vectorization_factor; | 2165 LOOP_VINFO_VECT_FACTOR (loop_vinfo) = saved_vectorization_factor; |
2259 /* Free the SLP instances. */ | 2166 /* Free the SLP instances. */ |
2260 FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), j, instance) | 2167 FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), j, instance) |
2261 vect_free_slp_instance (instance); | 2168 vect_free_slp_instance (instance, false); |
2262 LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release (); | 2169 LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release (); |
2263 /* Reset SLP type to loop_vect on all stmts. */ | 2170 /* Reset SLP type to loop_vect on all stmts. */ |
2264 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i) | 2171 for (i = 0; i < LOOP_VINFO_LOOP (loop_vinfo)->num_nodes; ++i) |
2265 { | 2172 { |
2266 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i]; | 2173 basic_block bb = LOOP_VINFO_BBS (loop_vinfo)[i]; |
2267 for (gimple_stmt_iterator si = gsi_start_phis (bb); | 2174 for (gimple_stmt_iterator si = gsi_start_phis (bb); |
2268 !gsi_end_p (si); gsi_next (&si)) | 2175 !gsi_end_p (si); gsi_next (&si)) |
2269 { | 2176 { |
2270 stmt_vec_info stmt_info = vinfo_for_stmt (gsi_stmt (si)); | 2177 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); |
2271 STMT_SLP_TYPE (stmt_info) = loop_vect; | 2178 STMT_SLP_TYPE (stmt_info) = loop_vect; |
2272 } | 2179 } |
2273 for (gimple_stmt_iterator si = gsi_start_bb (bb); | 2180 for (gimple_stmt_iterator si = gsi_start_bb (bb); |
2274 !gsi_end_p (si); gsi_next (&si)) | 2181 !gsi_end_p (si); gsi_next (&si)) |
2275 { | 2182 { |
2276 stmt_vec_info stmt_info = vinfo_for_stmt (gsi_stmt (si)); | 2183 stmt_vec_info stmt_info = loop_vinfo->lookup_stmt (gsi_stmt (si)); |
2277 STMT_SLP_TYPE (stmt_info) = loop_vect; | 2184 STMT_SLP_TYPE (stmt_info) = loop_vect; |
2278 if (STMT_VINFO_IN_PATTERN_P (stmt_info)) | 2185 if (STMT_VINFO_IN_PATTERN_P (stmt_info)) |
2279 { | 2186 { |
2280 stmt_info = vinfo_for_stmt (STMT_VINFO_RELATED_STMT (stmt_info)); | 2187 gimple *pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info); |
2188 stmt_info = STMT_VINFO_RELATED_STMT (stmt_info); | |
2281 STMT_SLP_TYPE (stmt_info) = loop_vect; | 2189 STMT_SLP_TYPE (stmt_info) = loop_vect; |
2282 for (gimple_stmt_iterator pi | 2190 for (gimple_stmt_iterator pi = gsi_start (pattern_def_seq); |
2283 = gsi_start (STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)); | |
2284 !gsi_end_p (pi); gsi_next (&pi)) | 2191 !gsi_end_p (pi); gsi_next (&pi)) |
2285 { | 2192 STMT_SLP_TYPE (loop_vinfo->lookup_stmt (gsi_stmt (pi))) |
2286 gimple *pstmt = gsi_stmt (pi); | 2193 = loop_vect; |
2287 STMT_SLP_TYPE (vinfo_for_stmt (pstmt)) = loop_vect; | |
2288 } | |
2289 } | 2194 } |
2290 } | 2195 } |
2291 } | 2196 } |
2292 /* Free optimized alias test DDRS. */ | 2197 /* Free optimized alias test DDRS. */ |
2198 LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).truncate (0); | |
2293 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).release (); | 2199 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo).release (); |
2294 LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).release (); | 2200 LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).release (); |
2295 /* Reset target cost data. */ | 2201 /* Reset target cost data. */ |
2296 destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)); | 2202 destroy_cost_data (LOOP_VINFO_TARGET_COST_DATA (loop_vinfo)); |
2297 LOOP_VINFO_TARGET_COST_DATA (loop_vinfo) | 2203 LOOP_VINFO_TARGET_COST_DATA (loop_vinfo) |
2298 = init_cost (LOOP_VINFO_LOOP (loop_vinfo)); | 2204 = init_cost (LOOP_VINFO_LOOP (loop_vinfo)); |
2205 /* Reset accumulated rgroup information. */ | |
2206 release_vec_loop_masks (&LOOP_VINFO_MASKS (loop_vinfo)); | |
2299 /* Reset assorted flags. */ | 2207 /* Reset assorted flags. */ |
2300 LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false; | 2208 LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo) = false; |
2301 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = false; | 2209 LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) = false; |
2302 LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = 0; | 2210 LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo) = 0; |
2211 LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo) = 0; | |
2212 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = saved_can_fully_mask_p; | |
2303 | 2213 |
2304 goto start_over; | 2214 goto start_over; |
2305 } | 2215 } |
2306 | 2216 |
2307 /* Function vect_analyze_loop. | 2217 /* Function vect_analyze_loop. |
2308 | 2218 |
2309 Apply a set of analyses on LOOP, and create a loop_vec_info struct | 2219 Apply a set of analyses on LOOP, and create a loop_vec_info struct |
2310 for it. The different analyses will record information in the | 2220 for it. The different analyses will record information in the |
2311 loop_vec_info struct. If ORIG_LOOP_VINFO is not NULL epilogue must | 2221 loop_vec_info struct. If ORIG_LOOP_VINFO is not NULL epilogue must |
2312 be vectorized. */ | 2222 be vectorized. */ |
2313 loop_vec_info | 2223 opt_loop_vec_info |
2314 vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo) | 2224 vect_analyze_loop (struct loop *loop, loop_vec_info orig_loop_vinfo, |
2225 vec_info_shared *shared) | |
2315 { | 2226 { |
2316 loop_vec_info loop_vinfo; | 2227 auto_vector_sizes vector_sizes; |
2317 unsigned int vector_sizes; | |
2318 | 2228 |
2319 /* Autodetect first vector size we try. */ | 2229 /* Autodetect first vector size we try. */ |
2320 current_vector_size = 0; | 2230 current_vector_size = 0; |
2321 vector_sizes = targetm.vectorize.autovectorize_vector_sizes (); | 2231 targetm.vectorize.autovectorize_vector_sizes (&vector_sizes); |
2322 | 2232 unsigned int next_size = 0; |
2323 if (dump_enabled_p ()) | 2233 |
2324 dump_printf_loc (MSG_NOTE, vect_location, | 2234 DUMP_VECT_SCOPE ("analyze_loop_nest"); |
2325 "===== analyze_loop_nest =====\n"); | |
2326 | 2235 |
2327 if (loop_outer (loop) | 2236 if (loop_outer (loop) |
2328 && loop_vec_info_for_loop (loop_outer (loop)) | 2237 && loop_vec_info_for_loop (loop_outer (loop)) |
2329 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop)))) | 2238 && LOOP_VINFO_VECTORIZABLE_P (loop_vec_info_for_loop (loop_outer (loop)))) |
2330 { | 2239 return opt_loop_vec_info::failure_at (vect_location, |
2331 if (dump_enabled_p ()) | 2240 "outer-loop already vectorized.\n"); |
2332 dump_printf_loc (MSG_NOTE, vect_location, | 2241 |
2333 "outer-loop already vectorized.\n"); | 2242 if (!find_loop_nest (loop, &shared->loop_nest)) |
2334 return NULL; | 2243 return opt_loop_vec_info::failure_at |
2335 } | 2244 (vect_location, |
2336 | 2245 "not vectorized: loop nest containing two or more consecutive inner" |
2246 " loops cannot be vectorized\n"); | |
2247 | |
2248 unsigned n_stmts = 0; | |
2249 poly_uint64 autodetected_vector_size = 0; | |
2337 while (1) | 2250 while (1) |
2338 { | 2251 { |
2339 /* Check the CFG characteristics of the loop (nesting, entry/exit). */ | 2252 /* Check the CFG characteristics of the loop (nesting, entry/exit). */ |
2340 loop_vinfo = vect_analyze_loop_form (loop); | 2253 opt_loop_vec_info loop_vinfo |
2254 = vect_analyze_loop_form (loop, shared); | |
2341 if (!loop_vinfo) | 2255 if (!loop_vinfo) |
2342 { | 2256 { |
2343 if (dump_enabled_p ()) | 2257 if (dump_enabled_p ()) |
2344 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 2258 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
2345 "bad loop form.\n"); | 2259 "bad loop form.\n"); |
2346 return NULL; | 2260 return loop_vinfo; |
2347 } | 2261 } |
2348 | 2262 |
2349 bool fatal = false; | 2263 bool fatal = false; |
2350 | 2264 |
2351 if (orig_loop_vinfo) | 2265 if (orig_loop_vinfo) |
2352 LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = orig_loop_vinfo; | 2266 LOOP_VINFO_ORIG_LOOP_INFO (loop_vinfo) = orig_loop_vinfo; |
2353 | 2267 |
2354 if (vect_analyze_loop_2 (loop_vinfo, fatal)) | 2268 opt_result res = vect_analyze_loop_2 (loop_vinfo, fatal, &n_stmts); |
2269 if (res) | |
2355 { | 2270 { |
2356 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1; | 2271 LOOP_VINFO_VECTORIZABLE_P (loop_vinfo) = 1; |
2357 | 2272 |
2358 return loop_vinfo; | 2273 return loop_vinfo; |
2359 } | 2274 } |
2360 | 2275 |
2361 delete loop_vinfo; | 2276 delete loop_vinfo; |
2362 | 2277 |
2363 vector_sizes &= ~current_vector_size; | 2278 if (next_size == 0) |
2279 autodetected_vector_size = current_vector_size; | |
2280 | |
2281 if (next_size < vector_sizes.length () | |
2282 && known_eq (vector_sizes[next_size], autodetected_vector_size)) | |
2283 next_size += 1; | |
2284 | |
2364 if (fatal | 2285 if (fatal |
2365 || vector_sizes == 0 | 2286 || next_size == vector_sizes.length () |
2366 || current_vector_size == 0) | 2287 || known_eq (current_vector_size, 0U)) |
2367 return NULL; | 2288 return opt_loop_vec_info::propagate_failure (res); |
2368 | 2289 |
2369 /* Try the next biggest vector size. */ | 2290 /* Try the next biggest vector size. */ |
2370 current_vector_size = 1 << floor_log2 (vector_sizes); | 2291 current_vector_size = vector_sizes[next_size++]; |
2371 if (dump_enabled_p ()) | 2292 if (dump_enabled_p ()) |
2372 dump_printf_loc (MSG_NOTE, vect_location, | 2293 { |
2373 "***** Re-trying analysis with " | 2294 dump_printf_loc (MSG_NOTE, vect_location, |
2374 "vector size %d\n", current_vector_size); | 2295 "***** Re-trying analysis with " |
2296 "vector size "); | |
2297 dump_dec (MSG_NOTE, current_vector_size); | |
2298 dump_printf (MSG_NOTE, "\n"); | |
2299 } | |
2375 } | 2300 } |
2376 } | 2301 } |
2377 | 2302 |
2378 | 2303 /* Return true if there is an in-order reduction function for CODE, storing |
2379 /* Function reduction_code_for_scalar_code | 2304 it in *REDUC_FN if so. */ |
2305 | |
2306 static bool | |
2307 fold_left_reduction_fn (tree_code code, internal_fn *reduc_fn) | |
2308 { | |
2309 switch (code) | |
2310 { | |
2311 case PLUS_EXPR: | |
2312 *reduc_fn = IFN_FOLD_LEFT_PLUS; | |
2313 return true; | |
2314 | |
2315 default: | |
2316 return false; | |
2317 } | |
2318 } | |
2319 | |
2320 /* Function reduction_fn_for_scalar_code | |
2380 | 2321 |
2381 Input: | 2322 Input: |
2382 CODE - tree_code of a reduction operations. | 2323 CODE - tree_code of a reduction operations. |
2383 | 2324 |
2384 Output: | 2325 Output: |
2385 REDUC_CODE - the corresponding tree-code to be used to reduce the | 2326 REDUC_FN - the corresponding internal function to be used to reduce the |
2386 vector of partial results into a single scalar result, or ERROR_MARK | 2327 vector of partial results into a single scalar result, or IFN_LAST |
2387 if the operation is a supported reduction operation, but does not have | 2328 if the operation is a supported reduction operation, but does not have |
2388 such a tree-code. | 2329 such an internal function. |
2389 | 2330 |
2390 Return FALSE if CODE currently cannot be vectorized as reduction. */ | 2331 Return FALSE if CODE currently cannot be vectorized as reduction. */ |
2391 | 2332 |
2392 static bool | 2333 static bool |
2393 reduction_code_for_scalar_code (enum tree_code code, | 2334 reduction_fn_for_scalar_code (enum tree_code code, internal_fn *reduc_fn) |
2394 enum tree_code *reduc_code) | |
2395 { | 2335 { |
2396 switch (code) | 2336 switch (code) |
2397 { | 2337 { |
2398 case MAX_EXPR: | 2338 case MAX_EXPR: |
2399 *reduc_code = REDUC_MAX_EXPR; | 2339 *reduc_fn = IFN_REDUC_MAX; |
2400 return true; | 2340 return true; |
2401 | 2341 |
2402 case MIN_EXPR: | 2342 case MIN_EXPR: |
2403 *reduc_code = REDUC_MIN_EXPR; | 2343 *reduc_fn = IFN_REDUC_MIN; |
2404 return true; | 2344 return true; |
2405 | 2345 |
2406 case PLUS_EXPR: | 2346 case PLUS_EXPR: |
2407 *reduc_code = REDUC_PLUS_EXPR; | 2347 *reduc_fn = IFN_REDUC_PLUS; |
2408 return true; | 2348 return true; |
2349 | |
2350 case BIT_AND_EXPR: | |
2351 *reduc_fn = IFN_REDUC_AND; | |
2352 return true; | |
2353 | |
2354 case BIT_IOR_EXPR: | |
2355 *reduc_fn = IFN_REDUC_IOR; | |
2356 return true; | |
2357 | |
2358 case BIT_XOR_EXPR: | |
2359 *reduc_fn = IFN_REDUC_XOR; | |
2360 return true; | |
2409 | 2361 |
2410 case MULT_EXPR: | 2362 case MULT_EXPR: |
2411 case MINUS_EXPR: | 2363 case MINUS_EXPR: |
2412 case BIT_IOR_EXPR: | 2364 *reduc_fn = IFN_LAST; |
2413 case BIT_XOR_EXPR: | |
2414 case BIT_AND_EXPR: | |
2415 *reduc_code = ERROR_MARK; | |
2416 return true; | 2365 return true; |
2417 | 2366 |
2418 default: | 2367 default: |
2419 return false; | 2368 return false; |
2420 } | 2369 } |
2421 } | 2370 } |
2422 | 2371 |
2372 /* If there is a neutral value X such that SLP reduction NODE would not | |
2373 be affected by the introduction of additional X elements, return that X, | |
2374 otherwise return null. CODE is the code of the reduction. REDUC_CHAIN | |
2375 is true if the SLP statements perform a single reduction, false if each | |
2376 statement performs an independent reduction. */ | |
2377 | |
2378 static tree | |
2379 neutral_op_for_slp_reduction (slp_tree slp_node, tree_code code, | |
2380 bool reduc_chain) | |
2381 { | |
2382 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node); | |
2383 stmt_vec_info stmt_vinfo = stmts[0]; | |
2384 tree vector_type = STMT_VINFO_VECTYPE (stmt_vinfo); | |
2385 tree scalar_type = TREE_TYPE (vector_type); | |
2386 struct loop *loop = gimple_bb (stmt_vinfo->stmt)->loop_father; | |
2387 gcc_assert (loop); | |
2388 | |
2389 switch (code) | |
2390 { | |
2391 case WIDEN_SUM_EXPR: | |
2392 case DOT_PROD_EXPR: | |
2393 case SAD_EXPR: | |
2394 case PLUS_EXPR: | |
2395 case MINUS_EXPR: | |
2396 case BIT_IOR_EXPR: | |
2397 case BIT_XOR_EXPR: | |
2398 return build_zero_cst (scalar_type); | |
2399 | |
2400 case MULT_EXPR: | |
2401 return build_one_cst (scalar_type); | |
2402 | |
2403 case BIT_AND_EXPR: | |
2404 return build_all_ones_cst (scalar_type); | |
2405 | |
2406 case MAX_EXPR: | |
2407 case MIN_EXPR: | |
2408 /* For MIN/MAX the initial values are neutral. A reduction chain | |
2409 has only a single initial value, so that value is neutral for | |
2410 all statements. */ | |
2411 if (reduc_chain) | |
2412 return PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, | |
2413 loop_preheader_edge (loop)); | |
2414 return NULL_TREE; | |
2415 | |
2416 default: | |
2417 return NULL_TREE; | |
2418 } | |
2419 } | |
2423 | 2420 |
2424 /* Error reporting helper for vect_is_simple_reduction below. GIMPLE statement | 2421 /* Error reporting helper for vect_is_simple_reduction below. GIMPLE statement |
2425 STMT is printed with a message MSG. */ | 2422 STMT is printed with a message MSG. */ |
2426 | 2423 |
2427 static void | 2424 static void |
2428 report_vect_op (dump_flags_t msg_type, gimple *stmt, const char *msg) | 2425 report_vect_op (dump_flags_t msg_type, gimple *stmt, const char *msg) |
2429 { | 2426 { |
2430 dump_printf_loc (msg_type, vect_location, "%s", msg); | 2427 dump_printf_loc (msg_type, vect_location, "%s%G", msg, stmt); |
2431 dump_gimple_stmt (msg_type, TDF_SLIM, stmt, 0); | |
2432 } | 2428 } |
2433 | 2429 |
2430 /* DEF_STMT_INFO occurs in a loop that contains a potential reduction | |
2431 operation. Return true if the results of DEF_STMT_INFO are something | |
2432 that can be accumulated by such a reduction. */ | |
2433 | |
2434 static bool | |
2435 vect_valid_reduction_input_p (stmt_vec_info def_stmt_info) | |
2436 { | |
2437 return (is_gimple_assign (def_stmt_info->stmt) | |
2438 || is_gimple_call (def_stmt_info->stmt) | |
2439 || STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_induction_def | |
2440 || (gimple_code (def_stmt_info->stmt) == GIMPLE_PHI | |
2441 && STMT_VINFO_DEF_TYPE (def_stmt_info) == vect_internal_def | |
2442 && !is_loop_header_bb_p (gimple_bb (def_stmt_info->stmt)))); | |
2443 } | |
2434 | 2444 |
2435 /* Detect SLP reduction of the form: | 2445 /* Detect SLP reduction of the form: |
2436 | 2446 |
2437 #a1 = phi <a5, a0> | 2447 #a1 = phi <a5, a0> |
2438 a2 = operation (a1) | 2448 a2 = operation (a1) |
2453 gimple *first_stmt) | 2463 gimple *first_stmt) |
2454 { | 2464 { |
2455 struct loop *loop = (gimple_bb (phi))->loop_father; | 2465 struct loop *loop = (gimple_bb (phi))->loop_father; |
2456 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info); | 2466 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info); |
2457 enum tree_code code; | 2467 enum tree_code code; |
2458 gimple *current_stmt = NULL, *loop_use_stmt = NULL, *first, *next_stmt; | 2468 gimple *loop_use_stmt = NULL; |
2459 stmt_vec_info use_stmt_info, current_stmt_info; | 2469 stmt_vec_info use_stmt_info, current_stmt_info = NULL; |
2460 tree lhs; | 2470 tree lhs; |
2461 imm_use_iterator imm_iter; | 2471 imm_use_iterator imm_iter; |
2462 use_operand_p use_p; | 2472 use_operand_p use_p; |
2463 int nloop_uses, size = 0, n_out_of_loop_uses; | 2473 int nloop_uses, size = 0, n_out_of_loop_uses; |
2464 bool found = false; | 2474 bool found = false; |
2515 || code != gimple_assign_rhs_code (loop_use_stmt) | 2525 || code != gimple_assign_rhs_code (loop_use_stmt) |
2516 || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt))) | 2526 || !flow_bb_inside_loop_p (loop, gimple_bb (loop_use_stmt))) |
2517 return false; | 2527 return false; |
2518 | 2528 |
2519 /* Insert USE_STMT into reduction chain. */ | 2529 /* Insert USE_STMT into reduction chain. */ |
2520 use_stmt_info = vinfo_for_stmt (loop_use_stmt); | 2530 use_stmt_info = loop_info->lookup_stmt (loop_use_stmt); |
2521 if (current_stmt) | 2531 if (current_stmt_info) |
2522 { | 2532 { |
2523 current_stmt_info = vinfo_for_stmt (current_stmt); | 2533 REDUC_GROUP_NEXT_ELEMENT (current_stmt_info) = use_stmt_info; |
2524 GROUP_NEXT_ELEMENT (current_stmt_info) = loop_use_stmt; | 2534 REDUC_GROUP_FIRST_ELEMENT (use_stmt_info) |
2525 GROUP_FIRST_ELEMENT (use_stmt_info) | 2535 = REDUC_GROUP_FIRST_ELEMENT (current_stmt_info); |
2526 = GROUP_FIRST_ELEMENT (current_stmt_info); | |
2527 } | 2536 } |
2528 else | 2537 else |
2529 GROUP_FIRST_ELEMENT (use_stmt_info) = loop_use_stmt; | 2538 REDUC_GROUP_FIRST_ELEMENT (use_stmt_info) = use_stmt_info; |
2530 | 2539 |
2531 lhs = gimple_assign_lhs (loop_use_stmt); | 2540 lhs = gimple_assign_lhs (loop_use_stmt); |
2532 current_stmt = loop_use_stmt; | 2541 current_stmt_info = use_stmt_info; |
2533 size++; | 2542 size++; |
2534 } | 2543 } |
2535 | 2544 |
2536 if (!found || loop_use_stmt != phi || size < 2) | 2545 if (!found || loop_use_stmt != phi || size < 2) |
2537 return false; | 2546 return false; |
2538 | 2547 |
2539 /* Swap the operands, if needed, to make the reduction operand be the second | 2548 /* Swap the operands, if needed, to make the reduction operand be the second |
2540 operand. */ | 2549 operand. */ |
2541 lhs = PHI_RESULT (phi); | 2550 lhs = PHI_RESULT (phi); |
2542 next_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt)); | 2551 stmt_vec_info next_stmt_info = REDUC_GROUP_FIRST_ELEMENT (current_stmt_info); |
2543 while (next_stmt) | 2552 while (next_stmt_info) |
2544 { | 2553 { |
2554 gassign *next_stmt = as_a <gassign *> (next_stmt_info->stmt); | |
2545 if (gimple_assign_rhs2 (next_stmt) == lhs) | 2555 if (gimple_assign_rhs2 (next_stmt) == lhs) |
2546 { | 2556 { |
2547 tree op = gimple_assign_rhs1 (next_stmt); | 2557 tree op = gimple_assign_rhs1 (next_stmt); |
2548 gimple *def_stmt = NULL; | 2558 stmt_vec_info def_stmt_info = loop_info->lookup_def (op); |
2549 | |
2550 if (TREE_CODE (op) == SSA_NAME) | |
2551 def_stmt = SSA_NAME_DEF_STMT (op); | |
2552 | 2559 |
2553 /* Check that the other def is either defined in the loop | 2560 /* Check that the other def is either defined in the loop |
2554 ("vect_internal_def"), or it's an induction (defined by a | 2561 ("vect_internal_def"), or it's an induction (defined by a |
2555 loop-header phi-node). */ | 2562 loop-header phi-node). */ |
2556 if (def_stmt | 2563 if (def_stmt_info |
2557 && gimple_bb (def_stmt) | 2564 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt)) |
2558 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) | 2565 && vect_valid_reduction_input_p (def_stmt_info)) |
2559 && (is_gimple_assign (def_stmt) | |
2560 || is_gimple_call (def_stmt) | |
2561 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt)) | |
2562 == vect_induction_def | |
2563 || (gimple_code (def_stmt) == GIMPLE_PHI | |
2564 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt)) | |
2565 == vect_internal_def | |
2566 && !is_loop_header_bb_p (gimple_bb (def_stmt))))) | |
2567 { | 2566 { |
2568 lhs = gimple_assign_lhs (next_stmt); | 2567 lhs = gimple_assign_lhs (next_stmt); |
2569 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt)); | 2568 next_stmt_info = REDUC_GROUP_NEXT_ELEMENT (next_stmt_info); |
2570 continue; | 2569 continue; |
2571 } | 2570 } |
2572 | 2571 |
2573 return false; | 2572 return false; |
2574 } | 2573 } |
2575 else | 2574 else |
2576 { | 2575 { |
2577 tree op = gimple_assign_rhs2 (next_stmt); | 2576 tree op = gimple_assign_rhs2 (next_stmt); |
2578 gimple *def_stmt = NULL; | 2577 stmt_vec_info def_stmt_info = loop_info->lookup_def (op); |
2579 | |
2580 if (TREE_CODE (op) == SSA_NAME) | |
2581 def_stmt = SSA_NAME_DEF_STMT (op); | |
2582 | 2578 |
2583 /* Check that the other def is either defined in the loop | 2579 /* Check that the other def is either defined in the loop |
2584 ("vect_internal_def"), or it's an induction (defined by a | 2580 ("vect_internal_def"), or it's an induction (defined by a |
2585 loop-header phi-node). */ | 2581 loop-header phi-node). */ |
2586 if (def_stmt | 2582 if (def_stmt_info |
2587 && gimple_bb (def_stmt) | 2583 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt)) |
2588 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) | 2584 && vect_valid_reduction_input_p (def_stmt_info)) |
2589 && (is_gimple_assign (def_stmt) | |
2590 || is_gimple_call (def_stmt) | |
2591 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt)) | |
2592 == vect_induction_def | |
2593 || (gimple_code (def_stmt) == GIMPLE_PHI | |
2594 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt)) | |
2595 == vect_internal_def | |
2596 && !is_loop_header_bb_p (gimple_bb (def_stmt))))) | |
2597 { | 2585 { |
2598 if (dump_enabled_p ()) | 2586 if (dump_enabled_p ()) |
2599 { | 2587 dump_printf_loc (MSG_NOTE, vect_location, "swapping oprnds: %G", |
2600 dump_printf_loc (MSG_NOTE, vect_location, "swapping oprnds: "); | 2588 next_stmt); |
2601 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, next_stmt, 0); | |
2602 } | |
2603 | 2589 |
2604 swap_ssa_operands (next_stmt, | 2590 swap_ssa_operands (next_stmt, |
2605 gimple_assign_rhs1_ptr (next_stmt), | 2591 gimple_assign_rhs1_ptr (next_stmt), |
2606 gimple_assign_rhs2_ptr (next_stmt)); | 2592 gimple_assign_rhs2_ptr (next_stmt)); |
2607 update_stmt (next_stmt); | 2593 update_stmt (next_stmt); |
2612 else | 2598 else |
2613 return false; | 2599 return false; |
2614 } | 2600 } |
2615 | 2601 |
2616 lhs = gimple_assign_lhs (next_stmt); | 2602 lhs = gimple_assign_lhs (next_stmt); |
2617 next_stmt = GROUP_NEXT_ELEMENT (vinfo_for_stmt (next_stmt)); | 2603 next_stmt_info = REDUC_GROUP_NEXT_ELEMENT (next_stmt_info); |
2618 } | 2604 } |
2619 | 2605 |
2620 /* Save the chain for further analysis in SLP detection. */ | 2606 /* Save the chain for further analysis in SLP detection. */ |
2621 first = GROUP_FIRST_ELEMENT (vinfo_for_stmt (current_stmt)); | 2607 stmt_vec_info first_stmt_info |
2622 LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (first); | 2608 = REDUC_GROUP_FIRST_ELEMENT (current_stmt_info); |
2623 GROUP_SIZE (vinfo_for_stmt (first)) = size; | 2609 LOOP_VINFO_REDUCTION_CHAINS (loop_info).safe_push (first_stmt_info); |
2610 REDUC_GROUP_SIZE (first_stmt_info) = size; | |
2624 | 2611 |
2625 return true; | 2612 return true; |
2613 } | |
2614 | |
2615 /* Return true if we need an in-order reduction for operation CODE | |
2616 on type TYPE. NEED_WRAPPING_INTEGRAL_OVERFLOW is true if integer | |
2617 overflow must wrap. */ | |
2618 | |
2619 static bool | |
2620 needs_fold_left_reduction_p (tree type, tree_code code, | |
2621 bool need_wrapping_integral_overflow) | |
2622 { | |
2623 /* CHECKME: check for !flag_finite_math_only too? */ | |
2624 if (SCALAR_FLOAT_TYPE_P (type)) | |
2625 switch (code) | |
2626 { | |
2627 case MIN_EXPR: | |
2628 case MAX_EXPR: | |
2629 return false; | |
2630 | |
2631 default: | |
2632 return !flag_associative_math; | |
2633 } | |
2634 | |
2635 if (INTEGRAL_TYPE_P (type)) | |
2636 { | |
2637 if (!operation_no_trapping_overflow (type, code)) | |
2638 return true; | |
2639 if (need_wrapping_integral_overflow | |
2640 && !TYPE_OVERFLOW_WRAPS (type) | |
2641 && operation_can_overflow (code)) | |
2642 return true; | |
2643 return false; | |
2644 } | |
2645 | |
2646 if (SAT_FIXED_POINT_TYPE_P (type)) | |
2647 return true; | |
2648 | |
2649 return false; | |
2650 } | |
2651 | |
2652 /* Return true if the reduction PHI in LOOP with latch arg LOOP_ARG and | |
2653 reduction operation CODE has a handled computation expression. */ | |
2654 | |
2655 bool | |
2656 check_reduction_path (dump_user_location_t loc, loop_p loop, gphi *phi, | |
2657 tree loop_arg, enum tree_code code) | |
2658 { | |
2659 auto_vec<std::pair<ssa_op_iter, use_operand_p> > path; | |
2660 auto_bitmap visited; | |
2661 tree lookfor = PHI_RESULT (phi); | |
2662 ssa_op_iter curri; | |
2663 use_operand_p curr = op_iter_init_phiuse (&curri, phi, SSA_OP_USE); | |
2664 while (USE_FROM_PTR (curr) != loop_arg) | |
2665 curr = op_iter_next_use (&curri); | |
2666 curri.i = curri.numops; | |
2667 do | |
2668 { | |
2669 path.safe_push (std::make_pair (curri, curr)); | |
2670 tree use = USE_FROM_PTR (curr); | |
2671 if (use == lookfor) | |
2672 break; | |
2673 gimple *def = SSA_NAME_DEF_STMT (use); | |
2674 if (gimple_nop_p (def) | |
2675 || ! flow_bb_inside_loop_p (loop, gimple_bb (def))) | |
2676 { | |
2677 pop: | |
2678 do | |
2679 { | |
2680 std::pair<ssa_op_iter, use_operand_p> x = path.pop (); | |
2681 curri = x.first; | |
2682 curr = x.second; | |
2683 do | |
2684 curr = op_iter_next_use (&curri); | |
2685 /* Skip already visited or non-SSA operands (from iterating | |
2686 over PHI args). */ | |
2687 while (curr != NULL_USE_OPERAND_P | |
2688 && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME | |
2689 || ! bitmap_set_bit (visited, | |
2690 SSA_NAME_VERSION | |
2691 (USE_FROM_PTR (curr))))); | |
2692 } | |
2693 while (curr == NULL_USE_OPERAND_P && ! path.is_empty ()); | |
2694 if (curr == NULL_USE_OPERAND_P) | |
2695 break; | |
2696 } | |
2697 else | |
2698 { | |
2699 if (gimple_code (def) == GIMPLE_PHI) | |
2700 curr = op_iter_init_phiuse (&curri, as_a <gphi *>(def), SSA_OP_USE); | |
2701 else | |
2702 curr = op_iter_init_use (&curri, def, SSA_OP_USE); | |
2703 while (curr != NULL_USE_OPERAND_P | |
2704 && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME | |
2705 || ! bitmap_set_bit (visited, | |
2706 SSA_NAME_VERSION | |
2707 (USE_FROM_PTR (curr))))) | |
2708 curr = op_iter_next_use (&curri); | |
2709 if (curr == NULL_USE_OPERAND_P) | |
2710 goto pop; | |
2711 } | |
2712 } | |
2713 while (1); | |
2714 if (dump_file && (dump_flags & TDF_DETAILS)) | |
2715 { | |
2716 dump_printf_loc (MSG_NOTE, loc, "reduction path: "); | |
2717 unsigned i; | |
2718 std::pair<ssa_op_iter, use_operand_p> *x; | |
2719 FOR_EACH_VEC_ELT (path, i, x) | |
2720 dump_printf (MSG_NOTE, "%T ", USE_FROM_PTR (x->second)); | |
2721 dump_printf (MSG_NOTE, "\n"); | |
2722 } | |
2723 | |
2724 /* Check whether the reduction path detected is valid. */ | |
2725 bool fail = path.length () == 0; | |
2726 bool neg = false; | |
2727 for (unsigned i = 1; i < path.length (); ++i) | |
2728 { | |
2729 gimple *use_stmt = USE_STMT (path[i].second); | |
2730 tree op = USE_FROM_PTR (path[i].second); | |
2731 if (! has_single_use (op) | |
2732 || ! is_gimple_assign (use_stmt)) | |
2733 { | |
2734 fail = true; | |
2735 break; | |
2736 } | |
2737 if (gimple_assign_rhs_code (use_stmt) != code) | |
2738 { | |
2739 if (code == PLUS_EXPR | |
2740 && gimple_assign_rhs_code (use_stmt) == MINUS_EXPR) | |
2741 { | |
2742 /* Track whether we negate the reduction value each iteration. */ | |
2743 if (gimple_assign_rhs2 (use_stmt) == op) | |
2744 neg = ! neg; | |
2745 } | |
2746 else | |
2747 { | |
2748 fail = true; | |
2749 break; | |
2750 } | |
2751 } | |
2752 } | |
2753 return ! fail && ! neg; | |
2626 } | 2754 } |
2627 | 2755 |
2628 | 2756 |
2629 /* Function vect_is_simple_reduction | 2757 /* Function vect_is_simple_reduction |
2630 | 2758 |
2668 if (a[i] < val) | 2796 if (a[i] < val) |
2669 ret_val = a[i]; | 2797 ret_val = a[i]; |
2670 | 2798 |
2671 */ | 2799 */ |
2672 | 2800 |
2673 static gimple * | 2801 static stmt_vec_info |
2674 vect_is_simple_reduction (loop_vec_info loop_info, gimple *phi, | 2802 vect_is_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info, |
2675 bool *double_reduc, | 2803 bool *double_reduc, |
2676 bool need_wrapping_integral_overflow, | 2804 bool need_wrapping_integral_overflow, |
2677 enum vect_reduction_type *v_reduc_type) | 2805 enum vect_reduction_type *v_reduc_type) |
2678 { | 2806 { |
2807 gphi *phi = as_a <gphi *> (phi_info->stmt); | |
2679 struct loop *loop = (gimple_bb (phi))->loop_father; | 2808 struct loop *loop = (gimple_bb (phi))->loop_father; |
2680 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info); | 2809 struct loop *vect_loop = LOOP_VINFO_LOOP (loop_info); |
2681 gimple *def_stmt, *def1 = NULL, *def2 = NULL, *phi_use_stmt = NULL; | 2810 gimple *phi_use_stmt = NULL; |
2682 enum tree_code orig_code, code; | 2811 enum tree_code orig_code, code; |
2683 tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE; | 2812 tree op1, op2, op3 = NULL_TREE, op4 = NULL_TREE; |
2684 tree type; | 2813 tree type; |
2685 int nloop_uses; | 2814 int nloop_uses; |
2686 tree name; | 2815 tree name; |
2729 edge latch_e = loop_latch_edge (loop); | 2858 edge latch_e = loop_latch_edge (loop); |
2730 tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e); | 2859 tree loop_arg = PHI_ARG_DEF_FROM_EDGE (phi, latch_e); |
2731 if (TREE_CODE (loop_arg) != SSA_NAME) | 2860 if (TREE_CODE (loop_arg) != SSA_NAME) |
2732 { | 2861 { |
2733 if (dump_enabled_p ()) | 2862 if (dump_enabled_p ()) |
2734 { | 2863 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
2735 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 2864 "reduction: not ssa_name: %T\n", loop_arg); |
2736 "reduction: not ssa_name: "); | |
2737 dump_generic_expr (MSG_MISSED_OPTIMIZATION, TDF_SLIM, loop_arg); | |
2738 dump_printf (MSG_MISSED_OPTIMIZATION, "\n"); | |
2739 } | |
2740 return NULL; | 2865 return NULL; |
2741 } | 2866 } |
2742 | 2867 |
2743 def_stmt = SSA_NAME_DEF_STMT (loop_arg); | 2868 stmt_vec_info def_stmt_info = loop_info->lookup_def (loop_arg); |
2744 if (is_gimple_assign (def_stmt)) | 2869 if (!def_stmt_info |
2870 || !flow_bb_inside_loop_p (loop, gimple_bb (def_stmt_info->stmt))) | |
2871 return NULL; | |
2872 | |
2873 if (gassign *def_stmt = dyn_cast <gassign *> (def_stmt_info->stmt)) | |
2745 { | 2874 { |
2746 name = gimple_assign_lhs (def_stmt); | 2875 name = gimple_assign_lhs (def_stmt); |
2747 phi_def = false; | 2876 phi_def = false; |
2748 } | 2877 } |
2749 else if (gimple_code (def_stmt) == GIMPLE_PHI) | 2878 else if (gphi *def_stmt = dyn_cast <gphi *> (def_stmt_info->stmt)) |
2750 { | 2879 { |
2751 name = PHI_RESULT (def_stmt); | 2880 name = PHI_RESULT (def_stmt); |
2752 phi_def = true; | 2881 phi_def = true; |
2753 } | 2882 } |
2754 else | 2883 else |
2755 { | 2884 { |
2756 if (dump_enabled_p ()) | 2885 if (dump_enabled_p ()) |
2757 { | 2886 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
2758 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 2887 "reduction: unhandled reduction operation: %G", |
2759 "reduction: unhandled reduction operation: "); | 2888 def_stmt_info->stmt); |
2760 dump_gimple_stmt (MSG_MISSED_OPTIMIZATION, TDF_SLIM, def_stmt, 0); | |
2761 } | |
2762 return NULL; | 2889 return NULL; |
2763 } | 2890 } |
2764 | |
2765 if (! flow_bb_inside_loop_p (loop, gimple_bb (def_stmt))) | |
2766 return NULL; | |
2767 | 2891 |
2768 nloop_uses = 0; | 2892 nloop_uses = 0; |
2769 auto_vec<gphi *, 3> lcphis; | 2893 auto_vec<gphi *, 3> lcphis; |
2770 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name) | 2894 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, name) |
2771 { | 2895 { |
2788 | 2912 |
2789 /* If DEF_STMT is a phi node itself, we expect it to have a single argument | 2913 /* If DEF_STMT is a phi node itself, we expect it to have a single argument |
2790 defined in the inner loop. */ | 2914 defined in the inner loop. */ |
2791 if (phi_def) | 2915 if (phi_def) |
2792 { | 2916 { |
2917 gphi *def_stmt = as_a <gphi *> (def_stmt_info->stmt); | |
2793 op1 = PHI_ARG_DEF (def_stmt, 0); | 2918 op1 = PHI_ARG_DEF (def_stmt, 0); |
2794 | 2919 |
2795 if (gimple_phi_num_args (def_stmt) != 1 | 2920 if (gimple_phi_num_args (def_stmt) != 1 |
2796 || TREE_CODE (op1) != SSA_NAME) | 2921 || TREE_CODE (op1) != SSA_NAME) |
2797 { | 2922 { |
2800 "unsupported phi node definition.\n"); | 2925 "unsupported phi node definition.\n"); |
2801 | 2926 |
2802 return NULL; | 2927 return NULL; |
2803 } | 2928 } |
2804 | 2929 |
2805 def1 = SSA_NAME_DEF_STMT (op1); | 2930 gimple *def1 = SSA_NAME_DEF_STMT (op1); |
2806 if (gimple_bb (def1) | 2931 if (gimple_bb (def1) |
2807 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) | 2932 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) |
2808 && loop->inner | 2933 && loop->inner |
2809 && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1)) | 2934 && flow_bb_inside_loop_p (loop->inner, gimple_bb (def1)) |
2810 && is_gimple_assign (def1) | 2935 && is_gimple_assign (def1) |
2936 && is_a <gphi *> (phi_use_stmt) | |
2811 && flow_bb_inside_loop_p (loop->inner, gimple_bb (phi_use_stmt))) | 2937 && flow_bb_inside_loop_p (loop->inner, gimple_bb (phi_use_stmt))) |
2812 { | 2938 { |
2813 if (dump_enabled_p ()) | 2939 if (dump_enabled_p ()) |
2814 report_vect_op (MSG_NOTE, def_stmt, | 2940 report_vect_op (MSG_NOTE, def_stmt, |
2815 "detected double reduction: "); | 2941 "detected double reduction: "); |
2816 | 2942 |
2817 *double_reduc = true; | 2943 *double_reduc = true; |
2818 return def_stmt; | 2944 return def_stmt_info; |
2819 } | 2945 } |
2820 | 2946 |
2821 return NULL; | 2947 return NULL; |
2822 } | 2948 } |
2823 | 2949 |
2839 if (! flow_bb_inside_loop_p (vect_loop, gimple_bb (use_stmt))) | 2965 if (! flow_bb_inside_loop_p (vect_loop, gimple_bb (use_stmt))) |
2840 check_reduction = true; | 2966 check_reduction = true; |
2841 } | 2967 } |
2842 } | 2968 } |
2843 | 2969 |
2970 gassign *def_stmt = as_a <gassign *> (def_stmt_info->stmt); | |
2844 bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop); | 2971 bool nested_in_vect_loop = flow_loop_nested_p (vect_loop, loop); |
2845 code = orig_code = gimple_assign_rhs_code (def_stmt); | 2972 code = orig_code = gimple_assign_rhs_code (def_stmt); |
2846 | 2973 |
2847 /* We can handle "res -= x[i]", which is non-associative by | 2974 /* We can handle "res -= x[i]", which is non-associative by |
2848 simply rewriting this into "res += -x[i]". Avoid changing | 2975 simply rewriting this into "res += -x[i]". Avoid changing |
2914 && !types_compatible_p (type, TREE_TYPE (op4)))) | 3041 && !types_compatible_p (type, TREE_TYPE (op4)))) |
2915 { | 3042 { |
2916 if (dump_enabled_p ()) | 3043 if (dump_enabled_p ()) |
2917 { | 3044 { |
2918 dump_printf_loc (MSG_NOTE, vect_location, | 3045 dump_printf_loc (MSG_NOTE, vect_location, |
2919 "reduction: multiple types: operation type: "); | 3046 "reduction: multiple types: operation type: " |
2920 dump_generic_expr (MSG_NOTE, TDF_SLIM, type); | 3047 "%T, operands types: %T,%T", |
2921 dump_printf (MSG_NOTE, ", operands types: "); | 3048 type, TREE_TYPE (op1), TREE_TYPE (op2)); |
2922 dump_generic_expr (MSG_NOTE, TDF_SLIM, | |
2923 TREE_TYPE (op1)); | |
2924 dump_printf (MSG_NOTE, ","); | |
2925 dump_generic_expr (MSG_NOTE, TDF_SLIM, | |
2926 TREE_TYPE (op2)); | |
2927 if (op3) | 3049 if (op3) |
2928 { | 3050 dump_printf (MSG_NOTE, ",%T", TREE_TYPE (op3)); |
2929 dump_printf (MSG_NOTE, ","); | |
2930 dump_generic_expr (MSG_NOTE, TDF_SLIM, | |
2931 TREE_TYPE (op3)); | |
2932 } | |
2933 | 3051 |
2934 if (op4) | 3052 if (op4) |
2935 { | 3053 dump_printf (MSG_NOTE, ",%T", TREE_TYPE (op4)); |
2936 dump_printf (MSG_NOTE, ","); | |
2937 dump_generic_expr (MSG_NOTE, TDF_SLIM, | |
2938 TREE_TYPE (op4)); | |
2939 } | |
2940 dump_printf (MSG_NOTE, "\n"); | 3054 dump_printf (MSG_NOTE, "\n"); |
2941 } | 3055 } |
2942 | 3056 |
2943 return NULL; | 3057 return NULL; |
2944 } | 3058 } |
2945 | 3059 |
2946 /* Check that it's ok to change the order of the computation. | 3060 /* Check whether it's ok to change the order of the computation. |
2947 Generally, when vectorizing a reduction we change the order of the | 3061 Generally, when vectorizing a reduction we change the order of the |
2948 computation. This may change the behavior of the program in some | 3062 computation. This may change the behavior of the program in some |
2949 cases, so we need to check that this is ok. One exception is when | 3063 cases, so we need to check that this is ok. One exception is when |
2950 vectorizing an outer-loop: the inner-loop is executed sequentially, | 3064 vectorizing an outer-loop: the inner-loop is executed sequentially, |
2951 and therefore vectorizing reductions in the inner-loop during | 3065 and therefore vectorizing reductions in the inner-loop during |
2952 outer-loop vectorization is safe. */ | 3066 outer-loop vectorization is safe. */ |
2953 | 3067 if (check_reduction |
2954 if (*v_reduc_type != COND_REDUCTION | 3068 && *v_reduc_type == TREE_CODE_REDUCTION |
2955 && check_reduction) | 3069 && needs_fold_left_reduction_p (type, code, |
2956 { | 3070 need_wrapping_integral_overflow)) |
2957 /* CHECKME: check for !flag_finite_math_only too? */ | 3071 *v_reduc_type = FOLD_LEFT_REDUCTION; |
2958 if (SCALAR_FLOAT_TYPE_P (type) && !flag_associative_math) | |
2959 { | |
2960 /* Changing the order of operations changes the semantics. */ | |
2961 if (dump_enabled_p ()) | |
2962 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, | |
2963 "reduction: unsafe fp math optimization: "); | |
2964 return NULL; | |
2965 } | |
2966 else if (INTEGRAL_TYPE_P (type)) | |
2967 { | |
2968 if (!operation_no_trapping_overflow (type, code)) | |
2969 { | |
2970 /* Changing the order of operations changes the semantics. */ | |
2971 if (dump_enabled_p ()) | |
2972 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, | |
2973 "reduction: unsafe int math optimization" | |
2974 " (overflow traps): "); | |
2975 return NULL; | |
2976 } | |
2977 if (need_wrapping_integral_overflow | |
2978 && !TYPE_OVERFLOW_WRAPS (type) | |
2979 && operation_can_overflow (code)) | |
2980 { | |
2981 /* Changing the order of operations changes the semantics. */ | |
2982 if (dump_enabled_p ()) | |
2983 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, | |
2984 "reduction: unsafe int math optimization" | |
2985 " (overflow doesn't wrap): "); | |
2986 return NULL; | |
2987 } | |
2988 } | |
2989 else if (SAT_FIXED_POINT_TYPE_P (type)) | |
2990 { | |
2991 /* Changing the order of operations changes the semantics. */ | |
2992 if (dump_enabled_p ()) | |
2993 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, | |
2994 "reduction: unsafe fixed-point math optimization: "); | |
2995 return NULL; | |
2996 } | |
2997 } | |
2998 | 3072 |
2999 /* Reduction is safe. We're dealing with one of the following: | 3073 /* Reduction is safe. We're dealing with one of the following: |
3000 1) integer arithmetic and no trapv | 3074 1) integer arithmetic and no trapv |
3001 2) floating point arithmetic, and special flags permit this optimization | 3075 2) floating point arithmetic, and special flags permit this optimization |
3002 3) nested cycle (i.e., outer loop vectorization). */ | 3076 3) nested cycle (i.e., outer loop vectorization). */ |
3003 if (TREE_CODE (op1) == SSA_NAME) | 3077 stmt_vec_info def1_info = loop_info->lookup_def (op1); |
3004 def1 = SSA_NAME_DEF_STMT (op1); | 3078 stmt_vec_info def2_info = loop_info->lookup_def (op2); |
3005 | 3079 if (code != COND_EXPR && !def1_info && !def2_info) |
3006 if (TREE_CODE (op2) == SSA_NAME) | |
3007 def2 = SSA_NAME_DEF_STMT (op2); | |
3008 | |
3009 if (code != COND_EXPR | |
3010 && ((!def1 || gimple_nop_p (def1)) && (!def2 || gimple_nop_p (def2)))) | |
3011 { | 3080 { |
3012 if (dump_enabled_p ()) | 3081 if (dump_enabled_p ()) |
3013 report_vect_op (MSG_NOTE, def_stmt, "reduction: no defs for operands: "); | 3082 report_vect_op (MSG_NOTE, def_stmt, "reduction: no defs for operands: "); |
3014 return NULL; | 3083 return NULL; |
3015 } | 3084 } |
3016 | 3085 |
3017 /* Check that one def is the reduction def, defined by PHI, | 3086 /* Check that one def is the reduction def, defined by PHI, |
3018 the other def is either defined in the loop ("vect_internal_def"), | 3087 the other def is either defined in the loop ("vect_internal_def"), |
3019 or it's an induction (defined by a loop-header phi-node). */ | 3088 or it's an induction (defined by a loop-header phi-node). */ |
3020 | 3089 |
3021 if (def2 && def2 == phi | 3090 if (def2_info |
3091 && def2_info->stmt == phi | |
3022 && (code == COND_EXPR | 3092 && (code == COND_EXPR |
3023 || !def1 || gimple_nop_p (def1) | 3093 || !def1_info |
3024 || !flow_bb_inside_loop_p (loop, gimple_bb (def1)) | 3094 || !flow_bb_inside_loop_p (loop, gimple_bb (def1_info->stmt)) |
3025 || (def1 && flow_bb_inside_loop_p (loop, gimple_bb (def1)) | 3095 || vect_valid_reduction_input_p (def1_info))) |
3026 && (is_gimple_assign (def1) | |
3027 || is_gimple_call (def1) | |
3028 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) | |
3029 == vect_induction_def | |
3030 || (gimple_code (def1) == GIMPLE_PHI | |
3031 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def1)) | |
3032 == vect_internal_def | |
3033 && !is_loop_header_bb_p (gimple_bb (def1))))))) | |
3034 { | 3096 { |
3035 if (dump_enabled_p ()) | 3097 if (dump_enabled_p ()) |
3036 report_vect_op (MSG_NOTE, def_stmt, "detected reduction: "); | 3098 report_vect_op (MSG_NOTE, def_stmt, "detected reduction: "); |
3037 return def_stmt; | 3099 return def_stmt_info; |
3038 } | 3100 } |
3039 | 3101 |
3040 if (def1 && def1 == phi | 3102 if (def1_info |
3103 && def1_info->stmt == phi | |
3041 && (code == COND_EXPR | 3104 && (code == COND_EXPR |
3042 || !def2 || gimple_nop_p (def2) | 3105 || !def2_info |
3043 || !flow_bb_inside_loop_p (loop, gimple_bb (def2)) | 3106 || !flow_bb_inside_loop_p (loop, gimple_bb (def2_info->stmt)) |
3044 || (def2 && flow_bb_inside_loop_p (loop, gimple_bb (def2)) | 3107 || vect_valid_reduction_input_p (def2_info))) |
3045 && (is_gimple_assign (def2) | |
3046 || is_gimple_call (def2) | |
3047 || STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2)) | |
3048 == vect_induction_def | |
3049 || (gimple_code (def2) == GIMPLE_PHI | |
3050 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def2)) | |
3051 == vect_internal_def | |
3052 && !is_loop_header_bb_p (gimple_bb (def2))))))) | |
3053 { | 3108 { |
3054 if (! nested_in_vect_loop && orig_code != MINUS_EXPR) | 3109 if (! nested_in_vect_loop && orig_code != MINUS_EXPR) |
3055 { | 3110 { |
3056 /* Check if we can swap operands (just for simplicity - so that | 3111 /* Check if we can swap operands (just for simplicity - so that |
3057 the rest of the code can assume that the reduction variable | 3112 the rest of the code can assume that the reduction variable |
3099 { | 3154 { |
3100 if (dump_enabled_p ()) | 3155 if (dump_enabled_p ()) |
3101 report_vect_op (MSG_NOTE, def_stmt, "detected reduction: "); | 3156 report_vect_op (MSG_NOTE, def_stmt, "detected reduction: "); |
3102 } | 3157 } |
3103 | 3158 |
3104 return def_stmt; | 3159 return def_stmt_info; |
3105 } | 3160 } |
3106 | 3161 |
3107 /* Try to find SLP reduction chain. */ | 3162 /* Try to find SLP reduction chain. */ |
3108 if (! nested_in_vect_loop | 3163 if (! nested_in_vect_loop |
3109 && code != COND_EXPR | 3164 && code != COND_EXPR |
3112 { | 3167 { |
3113 if (dump_enabled_p ()) | 3168 if (dump_enabled_p ()) |
3114 report_vect_op (MSG_NOTE, def_stmt, | 3169 report_vect_op (MSG_NOTE, def_stmt, |
3115 "reduction: detected reduction chain: "); | 3170 "reduction: detected reduction chain: "); |
3116 | 3171 |
3117 return def_stmt; | 3172 return def_stmt_info; |
3118 } | 3173 } |
3119 | 3174 |
3120 /* Dissolve group eventually half-built by vect_is_slp_reduction. */ | 3175 /* Dissolve group eventually half-built by vect_is_slp_reduction. */ |
3121 gimple *first = GROUP_FIRST_ELEMENT (vinfo_for_stmt (def_stmt)); | 3176 stmt_vec_info first = REDUC_GROUP_FIRST_ELEMENT (def_stmt_info); |
3122 while (first) | 3177 while (first) |
3123 { | 3178 { |
3124 gimple *next = GROUP_NEXT_ELEMENT (vinfo_for_stmt (first)); | 3179 stmt_vec_info next = REDUC_GROUP_NEXT_ELEMENT (first); |
3125 GROUP_FIRST_ELEMENT (vinfo_for_stmt (first)) = NULL; | 3180 REDUC_GROUP_FIRST_ELEMENT (first) = NULL; |
3126 GROUP_NEXT_ELEMENT (vinfo_for_stmt (first)) = NULL; | 3181 REDUC_GROUP_NEXT_ELEMENT (first) = NULL; |
3127 first = next; | 3182 first = next; |
3128 } | 3183 } |
3129 | 3184 |
3130 /* Look for the expression computing loop_arg from loop PHI result. */ | 3185 /* Look for the expression computing loop_arg from loop PHI result. */ |
3131 auto_vec<std::pair<ssa_op_iter, use_operand_p> > path; | 3186 if (check_reduction_path (vect_location, loop, phi, loop_arg, code)) |
3132 auto_bitmap visited; | 3187 return def_stmt_info; |
3133 tree lookfor = PHI_RESULT (phi); | |
3134 ssa_op_iter curri; | |
3135 use_operand_p curr = op_iter_init_phiuse (&curri, as_a <gphi *>(phi), | |
3136 SSA_OP_USE); | |
3137 while (USE_FROM_PTR (curr) != loop_arg) | |
3138 curr = op_iter_next_use (&curri); | |
3139 curri.i = curri.numops; | |
3140 do | |
3141 { | |
3142 path.safe_push (std::make_pair (curri, curr)); | |
3143 tree use = USE_FROM_PTR (curr); | |
3144 if (use == lookfor) | |
3145 break; | |
3146 gimple *def = SSA_NAME_DEF_STMT (use); | |
3147 if (gimple_nop_p (def) | |
3148 || ! flow_bb_inside_loop_p (loop, gimple_bb (def))) | |
3149 { | |
3150 pop: | |
3151 do | |
3152 { | |
3153 std::pair<ssa_op_iter, use_operand_p> x = path.pop (); | |
3154 curri = x.first; | |
3155 curr = x.second; | |
3156 do | |
3157 curr = op_iter_next_use (&curri); | |
3158 /* Skip already visited or non-SSA operands (from iterating | |
3159 over PHI args). */ | |
3160 while (curr != NULL_USE_OPERAND_P | |
3161 && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME | |
3162 || ! bitmap_set_bit (visited, | |
3163 SSA_NAME_VERSION | |
3164 (USE_FROM_PTR (curr))))); | |
3165 } | |
3166 while (curr == NULL_USE_OPERAND_P && ! path.is_empty ()); | |
3167 if (curr == NULL_USE_OPERAND_P) | |
3168 break; | |
3169 } | |
3170 else | |
3171 { | |
3172 if (gimple_code (def) == GIMPLE_PHI) | |
3173 curr = op_iter_init_phiuse (&curri, as_a <gphi *>(def), SSA_OP_USE); | |
3174 else | |
3175 curr = op_iter_init_use (&curri, def, SSA_OP_USE); | |
3176 while (curr != NULL_USE_OPERAND_P | |
3177 && (TREE_CODE (USE_FROM_PTR (curr)) != SSA_NAME | |
3178 || ! bitmap_set_bit (visited, | |
3179 SSA_NAME_VERSION | |
3180 (USE_FROM_PTR (curr))))) | |
3181 curr = op_iter_next_use (&curri); | |
3182 if (curr == NULL_USE_OPERAND_P) | |
3183 goto pop; | |
3184 } | |
3185 } | |
3186 while (1); | |
3187 if (dump_file && (dump_flags & TDF_DETAILS)) | |
3188 { | |
3189 dump_printf_loc (MSG_NOTE, vect_location, | |
3190 "reduction path: "); | |
3191 unsigned i; | |
3192 std::pair<ssa_op_iter, use_operand_p> *x; | |
3193 FOR_EACH_VEC_ELT (path, i, x) | |
3194 { | |
3195 dump_generic_expr (MSG_NOTE, TDF_SLIM, USE_FROM_PTR (x->second)); | |
3196 dump_printf (MSG_NOTE, " "); | |
3197 } | |
3198 dump_printf (MSG_NOTE, "\n"); | |
3199 } | |
3200 | |
3201 /* Check whether the reduction path detected is valid. */ | |
3202 bool fail = path.length () == 0; | |
3203 bool neg = false; | |
3204 for (unsigned i = 1; i < path.length (); ++i) | |
3205 { | |
3206 gimple *use_stmt = USE_STMT (path[i].second); | |
3207 tree op = USE_FROM_PTR (path[i].second); | |
3208 if (! has_single_use (op) | |
3209 || ! is_gimple_assign (use_stmt)) | |
3210 { | |
3211 fail = true; | |
3212 break; | |
3213 } | |
3214 if (gimple_assign_rhs_code (use_stmt) != code) | |
3215 { | |
3216 if (code == PLUS_EXPR | |
3217 && gimple_assign_rhs_code (use_stmt) == MINUS_EXPR) | |
3218 { | |
3219 /* Track whether we negate the reduction value each iteration. */ | |
3220 if (gimple_assign_rhs2 (use_stmt) == op) | |
3221 neg = ! neg; | |
3222 } | |
3223 else | |
3224 { | |
3225 fail = true; | |
3226 break; | |
3227 } | |
3228 } | |
3229 } | |
3230 if (! fail && ! neg) | |
3231 return def_stmt; | |
3232 | 3188 |
3233 if (dump_enabled_p ()) | 3189 if (dump_enabled_p ()) |
3234 { | 3190 { |
3235 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, | 3191 report_vect_op (MSG_MISSED_OPTIMIZATION, def_stmt, |
3236 "reduction: unknown pattern: "); | 3192 "reduction: unknown pattern: "); |
3241 | 3197 |
3242 /* Wrapper around vect_is_simple_reduction, which will modify code | 3198 /* Wrapper around vect_is_simple_reduction, which will modify code |
3243 in-place if it enables detection of more reductions. Arguments | 3199 in-place if it enables detection of more reductions. Arguments |
3244 as there. */ | 3200 as there. */ |
3245 | 3201 |
3246 gimple * | 3202 stmt_vec_info |
3247 vect_force_simple_reduction (loop_vec_info loop_info, gimple *phi, | 3203 vect_force_simple_reduction (loop_vec_info loop_info, stmt_vec_info phi_info, |
3248 bool *double_reduc, | 3204 bool *double_reduc, |
3249 bool need_wrapping_integral_overflow) | 3205 bool need_wrapping_integral_overflow) |
3250 { | 3206 { |
3251 enum vect_reduction_type v_reduc_type; | 3207 enum vect_reduction_type v_reduc_type; |
3252 gimple *def = vect_is_simple_reduction (loop_info, phi, double_reduc, | 3208 stmt_vec_info def_info |
3253 need_wrapping_integral_overflow, | 3209 = vect_is_simple_reduction (loop_info, phi_info, double_reduc, |
3254 &v_reduc_type); | 3210 need_wrapping_integral_overflow, |
3255 if (def) | 3211 &v_reduc_type); |
3256 { | 3212 if (def_info) |
3257 stmt_vec_info reduc_def_info = vinfo_for_stmt (phi); | 3213 { |
3258 STMT_VINFO_REDUC_TYPE (reduc_def_info) = v_reduc_type; | 3214 STMT_VINFO_REDUC_TYPE (phi_info) = v_reduc_type; |
3259 STMT_VINFO_REDUC_DEF (reduc_def_info) = def; | 3215 STMT_VINFO_REDUC_DEF (phi_info) = def_info; |
3260 reduc_def_info = vinfo_for_stmt (def); | 3216 STMT_VINFO_REDUC_TYPE (def_info) = v_reduc_type; |
3261 STMT_VINFO_REDUC_DEF (reduc_def_info) = phi; | 3217 STMT_VINFO_REDUC_DEF (def_info) = phi_info; |
3262 } | 3218 } |
3263 return def; | 3219 return def_info; |
3264 } | 3220 } |
3265 | 3221 |
3266 /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times. */ | 3222 /* Calculate cost of peeling the loop PEEL_ITERS_PROLOGUE times. */ |
3267 int | 3223 int |
3268 vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue, | 3224 vect_get_known_peeling_cost (loop_vec_info loop_vinfo, int peel_iters_prologue, |
3270 stmt_vector_for_cost *scalar_cost_vec, | 3226 stmt_vector_for_cost *scalar_cost_vec, |
3271 stmt_vector_for_cost *prologue_cost_vec, | 3227 stmt_vector_for_cost *prologue_cost_vec, |
3272 stmt_vector_for_cost *epilogue_cost_vec) | 3228 stmt_vector_for_cost *epilogue_cost_vec) |
3273 { | 3229 { |
3274 int retval = 0; | 3230 int retval = 0; |
3275 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); | 3231 int assumed_vf = vect_vf_for_cost (loop_vinfo); |
3276 | 3232 |
3277 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) | 3233 if (!LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) |
3278 { | 3234 { |
3279 *peel_iters_epilogue = vf/2; | 3235 *peel_iters_epilogue = assumed_vf / 2; |
3280 if (dump_enabled_p ()) | 3236 if (dump_enabled_p ()) |
3281 dump_printf_loc (MSG_NOTE, vect_location, | 3237 dump_printf_loc (MSG_NOTE, vect_location, |
3282 "cost model: epilogue peel iters set to vf/2 " | 3238 "cost model: epilogue peel iters set to vf/2 " |
3283 "because loop iterations are unknown .\n"); | 3239 "because loop iterations are unknown .\n"); |
3284 | 3240 |
3292 else | 3248 else |
3293 { | 3249 { |
3294 int niters = LOOP_VINFO_INT_NITERS (loop_vinfo); | 3250 int niters = LOOP_VINFO_INT_NITERS (loop_vinfo); |
3295 peel_iters_prologue = niters < peel_iters_prologue ? | 3251 peel_iters_prologue = niters < peel_iters_prologue ? |
3296 niters : peel_iters_prologue; | 3252 niters : peel_iters_prologue; |
3297 *peel_iters_epilogue = (niters - peel_iters_prologue) % vf; | 3253 *peel_iters_epilogue = (niters - peel_iters_prologue) % assumed_vf; |
3298 /* If we need to peel for gaps, but no peeling is required, we have to | 3254 /* If we need to peel for gaps, but no peeling is required, we have to |
3299 peel VF iterations. */ | 3255 peel VF iterations. */ |
3300 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue) | 3256 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) && !*peel_iters_epilogue) |
3301 *peel_iters_epilogue = vf; | 3257 *peel_iters_epilogue = assumed_vf; |
3302 } | 3258 } |
3303 | 3259 |
3304 stmt_info_for_cost *si; | 3260 stmt_info_for_cost *si; |
3305 int j; | 3261 int j; |
3306 if (peel_iters_prologue) | 3262 if (peel_iters_prologue) |
3307 FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si) | 3263 FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si) |
3308 { | 3264 retval += record_stmt_cost (prologue_cost_vec, |
3309 stmt_vec_info stmt_info | 3265 si->count * peel_iters_prologue, |
3310 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL; | 3266 si->kind, si->stmt_info, si->misalign, |
3311 retval += record_stmt_cost (prologue_cost_vec, | 3267 vect_prologue); |
3312 si->count * peel_iters_prologue, | |
3313 si->kind, stmt_info, si->misalign, | |
3314 vect_prologue); | |
3315 } | |
3316 if (*peel_iters_epilogue) | 3268 if (*peel_iters_epilogue) |
3317 FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si) | 3269 FOR_EACH_VEC_ELT (*scalar_cost_vec, j, si) |
3318 { | 3270 retval += record_stmt_cost (epilogue_cost_vec, |
3319 stmt_vec_info stmt_info | 3271 si->count * *peel_iters_epilogue, |
3320 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL; | 3272 si->kind, si->stmt_info, si->misalign, |
3321 retval += record_stmt_cost (epilogue_cost_vec, | 3273 vect_epilogue); |
3322 si->count * *peel_iters_epilogue, | |
3323 si->kind, stmt_info, si->misalign, | |
3324 vect_epilogue); | |
3325 } | |
3326 | 3274 |
3327 return retval; | 3275 return retval; |
3328 } | 3276 } |
3329 | 3277 |
3330 /* Function vect_estimate_min_profitable_iters | 3278 /* Function vect_estimate_min_profitable_iters |
3354 int vec_outside_cost = 0; | 3302 int vec_outside_cost = 0; |
3355 unsigned vec_prologue_cost = 0; | 3303 unsigned vec_prologue_cost = 0; |
3356 unsigned vec_epilogue_cost = 0; | 3304 unsigned vec_epilogue_cost = 0; |
3357 int scalar_single_iter_cost = 0; | 3305 int scalar_single_iter_cost = 0; |
3358 int scalar_outside_cost = 0; | 3306 int scalar_outside_cost = 0; |
3359 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); | 3307 int assumed_vf = vect_vf_for_cost (loop_vinfo); |
3360 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo); | 3308 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo); |
3361 void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo); | 3309 void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo); |
3362 | 3310 |
3363 /* Cost model disabled. */ | 3311 /* Cost model disabled. */ |
3364 if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo))) | 3312 if (unlimited_cost_model (LOOP_VINFO_LOOP (loop_vinfo))) |
3391 len = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).length (); | 3339 len = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo).length (); |
3392 if (len) | 3340 if (len) |
3393 /* Count LEN - 1 ANDs and LEN comparisons. */ | 3341 /* Count LEN - 1 ANDs and LEN comparisons. */ |
3394 (void) add_stmt_cost (target_cost_data, len * 2 - 1, scalar_stmt, | 3342 (void) add_stmt_cost (target_cost_data, len * 2 - 1, scalar_stmt, |
3395 NULL, 0, vect_prologue); | 3343 NULL, 0, vect_prologue); |
3344 len = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo).length (); | |
3345 if (len) | |
3346 { | |
3347 /* Count LEN - 1 ANDs and LEN comparisons. */ | |
3348 unsigned int nstmts = len * 2 - 1; | |
3349 /* +1 for each bias that needs adding. */ | |
3350 for (unsigned int i = 0; i < len; ++i) | |
3351 if (!LOOP_VINFO_LOWER_BOUNDS (loop_vinfo)[i].unsigned_p) | |
3352 nstmts += 1; | |
3353 (void) add_stmt_cost (target_cost_data, nstmts, scalar_stmt, | |
3354 NULL, 0, vect_prologue); | |
3355 } | |
3396 dump_printf (MSG_NOTE, | 3356 dump_printf (MSG_NOTE, |
3397 "cost model: Adding cost of checks for loop " | 3357 "cost model: Adding cost of checks for loop " |
3398 "versioning aliasing.\n"); | 3358 "versioning aliasing.\n"); |
3399 } | 3359 } |
3400 | 3360 |
3423 | 3383 |
3424 scalar_single_iter_cost | 3384 scalar_single_iter_cost |
3425 = LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo); | 3385 = LOOP_VINFO_SINGLE_SCALAR_ITERATION_COST (loop_vinfo); |
3426 | 3386 |
3427 /* Add additional cost for the peeled instructions in prologue and epilogue | 3387 /* Add additional cost for the peeled instructions in prologue and epilogue |
3428 loop. | 3388 loop. (For fully-masked loops there will be no peeling.) |
3429 | 3389 |
3430 FORNOW: If we don't know the value of peel_iters for prologue or epilogue | 3390 FORNOW: If we don't know the value of peel_iters for prologue or epilogue |
3431 at compile-time - we assume it's vf/2 (the worst would be vf-1). | 3391 at compile-time - we assume it's vf/2 (the worst would be vf-1). |
3432 | 3392 |
3433 TODO: Build an expression that represents peel_iters for prologue and | 3393 TODO: Build an expression that represents peel_iters for prologue and |
3434 epilogue to be used in a run-time test. */ | 3394 epilogue to be used in a run-time test. */ |
3435 | 3395 |
3436 if (npeel < 0) | 3396 if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)) |
3437 { | 3397 { |
3438 peel_iters_prologue = vf/2; | 3398 peel_iters_prologue = 0; |
3399 peel_iters_epilogue = 0; | |
3400 | |
3401 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo)) | |
3402 { | |
3403 /* We need to peel exactly one iteration. */ | |
3404 peel_iters_epilogue += 1; | |
3405 stmt_info_for_cost *si; | |
3406 int j; | |
3407 FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), | |
3408 j, si) | |
3409 (void) add_stmt_cost (target_cost_data, si->count, | |
3410 si->kind, si->stmt_info, si->misalign, | |
3411 vect_epilogue); | |
3412 } | |
3413 } | |
3414 else if (npeel < 0) | |
3415 { | |
3416 peel_iters_prologue = assumed_vf / 2; | |
3439 dump_printf (MSG_NOTE, "cost model: " | 3417 dump_printf (MSG_NOTE, "cost model: " |
3440 "prologue peel iters set to vf/2.\n"); | 3418 "prologue peel iters set to vf/2.\n"); |
3441 | 3419 |
3442 /* If peeling for alignment is unknown, loop bound of main loop becomes | 3420 /* If peeling for alignment is unknown, loop bound of main loop becomes |
3443 unknown. */ | 3421 unknown. */ |
3444 peel_iters_epilogue = vf/2; | 3422 peel_iters_epilogue = assumed_vf / 2; |
3445 dump_printf (MSG_NOTE, "cost model: " | 3423 dump_printf (MSG_NOTE, "cost model: " |
3446 "epilogue peel iters set to vf/2 because " | 3424 "epilogue peel iters set to vf/2 because " |
3447 "peeling for alignment is unknown.\n"); | 3425 "peeling for alignment is unknown.\n"); |
3448 | 3426 |
3449 /* If peeled iterations are unknown, count a taken branch and a not taken | 3427 /* If peeled iterations are unknown, count a taken branch and a not taken |
3460 NULL, 0, vect_epilogue); | 3438 NULL, 0, vect_epilogue); |
3461 stmt_info_for_cost *si; | 3439 stmt_info_for_cost *si; |
3462 int j; | 3440 int j; |
3463 FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si) | 3441 FOR_EACH_VEC_ELT (LOOP_VINFO_SCALAR_ITERATION_COST (loop_vinfo), j, si) |
3464 { | 3442 { |
3465 struct _stmt_vec_info *stmt_info | |
3466 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL; | |
3467 (void) add_stmt_cost (target_cost_data, | 3443 (void) add_stmt_cost (target_cost_data, |
3468 si->count * peel_iters_prologue, | 3444 si->count * peel_iters_prologue, |
3469 si->kind, stmt_info, si->misalign, | 3445 si->kind, si->stmt_info, si->misalign, |
3470 vect_prologue); | 3446 vect_prologue); |
3471 (void) add_stmt_cost (target_cost_data, | 3447 (void) add_stmt_cost (target_cost_data, |
3472 si->count * peel_iters_epilogue, | 3448 si->count * peel_iters_epilogue, |
3473 si->kind, stmt_info, si->misalign, | 3449 si->kind, si->stmt_info, si->misalign, |
3474 vect_epilogue); | 3450 vect_epilogue); |
3475 } | 3451 } |
3476 } | 3452 } |
3477 else | 3453 else |
3478 { | 3454 { |
3491 (loop_vinfo), | 3467 (loop_vinfo), |
3492 &prologue_cost_vec, | 3468 &prologue_cost_vec, |
3493 &epilogue_cost_vec); | 3469 &epilogue_cost_vec); |
3494 | 3470 |
3495 FOR_EACH_VEC_ELT (prologue_cost_vec, j, si) | 3471 FOR_EACH_VEC_ELT (prologue_cost_vec, j, si) |
3496 { | 3472 (void) add_stmt_cost (data, si->count, si->kind, si->stmt_info, |
3497 struct _stmt_vec_info *stmt_info | 3473 si->misalign, vect_prologue); |
3498 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL; | |
3499 (void) add_stmt_cost (data, si->count, si->kind, stmt_info, | |
3500 si->misalign, vect_prologue); | |
3501 } | |
3502 | 3474 |
3503 FOR_EACH_VEC_ELT (epilogue_cost_vec, j, si) | 3475 FOR_EACH_VEC_ELT (epilogue_cost_vec, j, si) |
3504 { | 3476 (void) add_stmt_cost (data, si->count, si->kind, si->stmt_info, |
3505 struct _stmt_vec_info *stmt_info | 3477 si->misalign, vect_epilogue); |
3506 = si->stmt ? vinfo_for_stmt (si->stmt) : NULL; | |
3507 (void) add_stmt_cost (data, si->count, si->kind, stmt_info, | |
3508 si->misalign, vect_epilogue); | |
3509 } | |
3510 | 3478 |
3511 prologue_cost_vec.release (); | 3479 prologue_cost_vec.release (); |
3512 epilogue_cost_vec.release (); | 3480 epilogue_cost_vec.release (); |
3513 } | 3481 } |
3514 | 3482 |
3618 SIC = scalar iteration cost, VIC = vector iteration cost, | 3586 SIC = scalar iteration cost, VIC = vector iteration cost, |
3619 VOC = vector outside cost, VF = vectorization factor, | 3587 VOC = vector outside cost, VF = vectorization factor, |
3620 PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations | 3588 PL_ITERS = prologue iterations, EP_ITERS= epilogue iterations |
3621 SOC = scalar outside cost for run time cost model check. */ | 3589 SOC = scalar outside cost for run time cost model check. */ |
3622 | 3590 |
3623 if ((scalar_single_iter_cost * vf) > (int) vec_inside_cost) | 3591 if ((scalar_single_iter_cost * assumed_vf) > (int) vec_inside_cost) |
3624 { | 3592 { |
3625 if (vec_outside_cost <= 0) | 3593 min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) |
3594 * assumed_vf | |
3595 - vec_inside_cost * peel_iters_prologue | |
3596 - vec_inside_cost * peel_iters_epilogue); | |
3597 if (min_profitable_iters <= 0) | |
3626 min_profitable_iters = 0; | 3598 min_profitable_iters = 0; |
3627 else | 3599 else |
3628 { | 3600 { |
3629 min_profitable_iters = ((vec_outside_cost - scalar_outside_cost) * vf | 3601 min_profitable_iters /= ((scalar_single_iter_cost * assumed_vf) |
3630 - vec_inside_cost * peel_iters_prologue | 3602 - vec_inside_cost); |
3631 - vec_inside_cost * peel_iters_epilogue) | 3603 |
3632 / ((scalar_single_iter_cost * vf) | 3604 if ((scalar_single_iter_cost * assumed_vf * min_profitable_iters) |
3633 - vec_inside_cost); | 3605 <= (((int) vec_inside_cost * min_profitable_iters) |
3634 | 3606 + (((int) vec_outside_cost - scalar_outside_cost) |
3635 if ((scalar_single_iter_cost * vf * min_profitable_iters) | 3607 * assumed_vf))) |
3636 <= (((int) vec_inside_cost * min_profitable_iters) | 3608 min_profitable_iters++; |
3637 + (((int) vec_outside_cost - scalar_outside_cost) * vf))) | 3609 } |
3638 min_profitable_iters++; | |
3639 } | |
3640 } | 3610 } |
3641 /* vector version will never be profitable. */ | 3611 /* vector version will never be profitable. */ |
3642 else | 3612 else |
3643 { | 3613 { |
3644 if (LOOP_VINFO_LOOP (loop_vinfo)->force_vectorize) | 3614 if (LOOP_VINFO_LOOP (loop_vinfo)->force_vectorize) |
3645 warning_at (vect_location, OPT_Wopenmp_simd, "vectorization " | 3615 warning_at (vect_location.get_location_t (), OPT_Wopenmp_simd, |
3646 "did not happen for a simd loop"); | 3616 "vectorization did not happen for a simd loop"); |
3647 | 3617 |
3648 if (dump_enabled_p ()) | 3618 if (dump_enabled_p ()) |
3649 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 3619 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
3650 "cost model: the vector iteration cost = %d " | 3620 "cost model: the vector iteration cost = %d " |
3651 "divided by the scalar iteration cost = %d " | 3621 "divided by the scalar iteration cost = %d " |
3652 "is greater or equal to the vectorization factor = %d" | 3622 "is greater or equal to the vectorization factor = %d" |
3653 ".\n", | 3623 ".\n", |
3654 vec_inside_cost, scalar_single_iter_cost, vf); | 3624 vec_inside_cost, scalar_single_iter_cost, assumed_vf); |
3655 *ret_min_profitable_niters = -1; | 3625 *ret_min_profitable_niters = -1; |
3656 *ret_min_profitable_estimate = -1; | 3626 *ret_min_profitable_estimate = -1; |
3657 return; | 3627 return; |
3658 } | 3628 } |
3659 | 3629 |
3660 dump_printf (MSG_NOTE, | 3630 dump_printf (MSG_NOTE, |
3661 " Calculated minimum iters for profitability: %d\n", | 3631 " Calculated minimum iters for profitability: %d\n", |
3662 min_profitable_iters); | 3632 min_profitable_iters); |
3663 | 3633 |
3664 /* We want the vectorized loop to execute at least once. */ | 3634 if (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) |
3665 if (min_profitable_iters < (vf + peel_iters_prologue)) | 3635 && min_profitable_iters < (assumed_vf + peel_iters_prologue)) |
3666 min_profitable_iters = vf + peel_iters_prologue; | 3636 /* We want the vectorized loop to execute at least once. */ |
3637 min_profitable_iters = assumed_vf + peel_iters_prologue; | |
3667 | 3638 |
3668 if (dump_enabled_p ()) | 3639 if (dump_enabled_p ()) |
3669 dump_printf_loc (MSG_NOTE, vect_location, | 3640 dump_printf_loc (MSG_NOTE, vect_location, |
3670 " Runtime profitability threshold = %d\n", | 3641 " Runtime profitability threshold = %d\n", |
3671 min_profitable_iters); | 3642 min_profitable_iters); |
3681 | 3652 |
3682 if (vec_outside_cost <= 0) | 3653 if (vec_outside_cost <= 0) |
3683 min_profitable_estimate = 0; | 3654 min_profitable_estimate = 0; |
3684 else | 3655 else |
3685 { | 3656 { |
3686 min_profitable_estimate = ((vec_outside_cost + scalar_outside_cost) * vf | 3657 min_profitable_estimate = ((vec_outside_cost + scalar_outside_cost) |
3658 * assumed_vf | |
3687 - vec_inside_cost * peel_iters_prologue | 3659 - vec_inside_cost * peel_iters_prologue |
3688 - vec_inside_cost * peel_iters_epilogue) | 3660 - vec_inside_cost * peel_iters_epilogue) |
3689 / ((scalar_single_iter_cost * vf) | 3661 / ((scalar_single_iter_cost * assumed_vf) |
3690 - vec_inside_cost); | 3662 - vec_inside_cost); |
3691 } | 3663 } |
3692 min_profitable_estimate = MAX (min_profitable_estimate, min_profitable_iters); | 3664 min_profitable_estimate = MAX (min_profitable_estimate, min_profitable_iters); |
3693 if (dump_enabled_p ()) | 3665 if (dump_enabled_p ()) |
3694 dump_printf_loc (MSG_NOTE, vect_location, | 3666 dump_printf_loc (MSG_NOTE, vect_location, |
3700 | 3672 |
3701 /* Writes into SEL a mask for a vec_perm, equivalent to a vec_shr by OFFSET | 3673 /* Writes into SEL a mask for a vec_perm, equivalent to a vec_shr by OFFSET |
3702 vector elements (not bits) for a vector with NELT elements. */ | 3674 vector elements (not bits) for a vector with NELT elements. */ |
3703 static void | 3675 static void |
3704 calc_vec_perm_mask_for_shift (unsigned int offset, unsigned int nelt, | 3676 calc_vec_perm_mask_for_shift (unsigned int offset, unsigned int nelt, |
3705 vec_perm_indices *sel) | 3677 vec_perm_builder *sel) |
3706 { | 3678 { |
3707 unsigned int i; | 3679 /* The encoding is a single stepped pattern. Any wrap-around is handled |
3708 | 3680 by vec_perm_indices. */ |
3709 for (i = 0; i < nelt; i++) | 3681 sel->new_vector (nelt, 1, 3); |
3710 sel->quick_push ((i + offset) & (2 * nelt - 1)); | 3682 for (unsigned int i = 0; i < 3; i++) |
3683 sel->quick_push (i + offset); | |
3711 } | 3684 } |
3712 | 3685 |
3713 /* Checks whether the target supports whole-vector shifts for vectors of mode | 3686 /* Checks whether the target supports whole-vector shifts for vectors of mode |
3714 MODE. This is the case if _either_ the platform handles vec_shr_optab, _or_ | 3687 MODE. This is the case if _either_ the platform handles vec_shr_optab, _or_ |
3715 it supports vec_perm_const with masks for all necessary shift amounts. */ | 3688 it supports vec_perm_const with masks for all necessary shift amounts. */ |
3717 have_whole_vector_shift (machine_mode mode) | 3690 have_whole_vector_shift (machine_mode mode) |
3718 { | 3691 { |
3719 if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing) | 3692 if (optab_handler (vec_shr_optab, mode) != CODE_FOR_nothing) |
3720 return true; | 3693 return true; |
3721 | 3694 |
3722 if (direct_optab_handler (vec_perm_const_optab, mode) == CODE_FOR_nothing) | 3695 /* Variable-length vectors should be handled via the optab. */ |
3696 unsigned int nelt; | |
3697 if (!GET_MODE_NUNITS (mode).is_constant (&nelt)) | |
3723 return false; | 3698 return false; |
3724 | 3699 |
3725 unsigned int i, nelt = GET_MODE_NUNITS (mode); | 3700 vec_perm_builder sel; |
3726 auto_vec_perm_indices sel (nelt); | 3701 vec_perm_indices indices; |
3727 | 3702 for (unsigned int i = nelt / 2; i >= 1; i /= 2) |
3728 for (i = nelt/2; i >= 1; i/=2) | 3703 { |
3729 { | |
3730 sel.truncate (0); | |
3731 calc_vec_perm_mask_for_shift (i, nelt, &sel); | 3704 calc_vec_perm_mask_for_shift (i, nelt, &sel); |
3732 if (!can_vec_perm_p (mode, false, &sel)) | 3705 indices.new_vector (sel, 2, nelt); |
3706 if (!can_vec_perm_const_p (mode, indices, false)) | |
3733 return false; | 3707 return false; |
3734 } | 3708 } |
3735 return true; | 3709 return true; |
3736 } | 3710 } |
3737 | 3711 |
3743 Models cost for a reduction operation, including the vector ops | 3717 Models cost for a reduction operation, including the vector ops |
3744 generated within the strip-mine loop, the initial definition before | 3718 generated within the strip-mine loop, the initial definition before |
3745 the loop, and the epilogue code that must be generated. */ | 3719 the loop, and the epilogue code that must be generated. */ |
3746 | 3720 |
3747 static void | 3721 static void |
3748 vect_model_reduction_cost (stmt_vec_info stmt_info, enum tree_code reduc_code, | 3722 vect_model_reduction_cost (stmt_vec_info stmt_info, internal_fn reduc_fn, |
3749 int ncopies) | 3723 int ncopies, stmt_vector_for_cost *cost_vec) |
3750 { | 3724 { |
3751 int prologue_cost = 0, epilogue_cost = 0; | 3725 int prologue_cost = 0, epilogue_cost = 0, inside_cost; |
3752 enum tree_code code; | 3726 enum tree_code code; |
3753 optab optab; | 3727 optab optab; |
3754 tree vectype; | 3728 tree vectype; |
3755 gimple *orig_stmt; | |
3756 machine_mode mode; | 3729 machine_mode mode; |
3757 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); | 3730 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); |
3758 struct loop *loop = NULL; | 3731 struct loop *loop = NULL; |
3759 void *target_cost_data; | |
3760 | 3732 |
3761 if (loop_vinfo) | 3733 if (loop_vinfo) |
3762 { | 3734 loop = LOOP_VINFO_LOOP (loop_vinfo); |
3763 loop = LOOP_VINFO_LOOP (loop_vinfo); | |
3764 target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo); | |
3765 } | |
3766 else | |
3767 target_cost_data = BB_VINFO_TARGET_COST_DATA (STMT_VINFO_BB_VINFO (stmt_info)); | |
3768 | 3735 |
3769 /* Condition reductions generate two reductions in the loop. */ | 3736 /* Condition reductions generate two reductions in the loop. */ |
3770 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION) | 3737 vect_reduction_type reduction_type |
3738 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info); | |
3739 if (reduction_type == COND_REDUCTION) | |
3771 ncopies *= 2; | 3740 ncopies *= 2; |
3772 | |
3773 /* Cost of reduction op inside loop. */ | |
3774 unsigned inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt, | |
3775 stmt_info, 0, vect_body); | |
3776 | 3741 |
3777 vectype = STMT_VINFO_VECTYPE (stmt_info); | 3742 vectype = STMT_VINFO_VECTYPE (stmt_info); |
3778 mode = TYPE_MODE (vectype); | 3743 mode = TYPE_MODE (vectype); |
3779 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info); | 3744 stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info); |
3780 | 3745 |
3781 if (!orig_stmt) | 3746 code = gimple_assign_rhs_code (orig_stmt_info->stmt); |
3782 orig_stmt = STMT_VINFO_STMT (stmt_info); | 3747 |
3783 | 3748 if (reduction_type == EXTRACT_LAST_REDUCTION |
3784 code = gimple_assign_rhs_code (orig_stmt); | 3749 || reduction_type == FOLD_LEFT_REDUCTION) |
3785 | 3750 { |
3786 /* Add in cost for initial definition. | 3751 /* No extra instructions needed in the prologue. */ |
3787 For cond reduction we have four vectors: initial index, step, initial | 3752 prologue_cost = 0; |
3788 result of the data reduction, initial value of the index reduction. */ | 3753 |
3789 int prologue_stmts = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) | 3754 if (reduction_type == EXTRACT_LAST_REDUCTION || reduc_fn != IFN_LAST) |
3790 == COND_REDUCTION ? 4 : 1; | 3755 /* Count one reduction-like operation per vector. */ |
3791 prologue_cost += add_stmt_cost (target_cost_data, prologue_stmts, | 3756 inside_cost = record_stmt_cost (cost_vec, ncopies, vec_to_scalar, |
3792 scalar_to_vec, stmt_info, 0, | 3757 stmt_info, 0, vect_body); |
3793 vect_prologue); | 3758 else |
3759 { | |
3760 /* Use NELEMENTS extracts and NELEMENTS scalar ops. */ | |
3761 unsigned int nelements = ncopies * vect_nunits_for_cost (vectype); | |
3762 inside_cost = record_stmt_cost (cost_vec, nelements, | |
3763 vec_to_scalar, stmt_info, 0, | |
3764 vect_body); | |
3765 inside_cost += record_stmt_cost (cost_vec, nelements, | |
3766 scalar_stmt, stmt_info, 0, | |
3767 vect_body); | |
3768 } | |
3769 } | |
3770 else | |
3771 { | |
3772 /* Add in cost for initial definition. | |
3773 For cond reduction we have four vectors: initial index, step, | |
3774 initial result of the data reduction, initial value of the index | |
3775 reduction. */ | |
3776 int prologue_stmts = reduction_type == COND_REDUCTION ? 4 : 1; | |
3777 prologue_cost += record_stmt_cost (cost_vec, prologue_stmts, | |
3778 scalar_to_vec, stmt_info, 0, | |
3779 vect_prologue); | |
3780 | |
3781 /* Cost of reduction op inside loop. */ | |
3782 inside_cost = record_stmt_cost (cost_vec, ncopies, vector_stmt, | |
3783 stmt_info, 0, vect_body); | |
3784 } | |
3794 | 3785 |
3795 /* Determine cost of epilogue code. | 3786 /* Determine cost of epilogue code. |
3796 | 3787 |
3797 We have a reduction operator that will reduce the vector in one statement. | 3788 We have a reduction operator that will reduce the vector in one statement. |
3798 Also requires scalar extract. */ | 3789 Also requires scalar extract. */ |
3799 | 3790 |
3800 if (!loop || !nested_in_vect_loop_p (loop, orig_stmt)) | 3791 if (!loop || !nested_in_vect_loop_p (loop, orig_stmt_info)) |
3801 { | 3792 { |
3802 if (reduc_code != ERROR_MARK) | 3793 if (reduc_fn != IFN_LAST) |
3803 { | 3794 { |
3804 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION) | 3795 if (reduction_type == COND_REDUCTION) |
3805 { | 3796 { |
3806 /* An EQ stmt and an COND_EXPR stmt. */ | 3797 /* An EQ stmt and an COND_EXPR stmt. */ |
3807 epilogue_cost += add_stmt_cost (target_cost_data, 2, | 3798 epilogue_cost += record_stmt_cost (cost_vec, 2, |
3808 vector_stmt, stmt_info, 0, | 3799 vector_stmt, stmt_info, 0, |
3809 vect_epilogue); | 3800 vect_epilogue); |
3810 /* Reduction of the max index and a reduction of the found | 3801 /* Reduction of the max index and a reduction of the found |
3811 values. */ | 3802 values. */ |
3812 epilogue_cost += add_stmt_cost (target_cost_data, 2, | 3803 epilogue_cost += record_stmt_cost (cost_vec, 2, |
3813 vec_to_scalar, stmt_info, 0, | 3804 vec_to_scalar, stmt_info, 0, |
3814 vect_epilogue); | 3805 vect_epilogue); |
3815 /* A broadcast of the max value. */ | 3806 /* A broadcast of the max value. */ |
3816 epilogue_cost += add_stmt_cost (target_cost_data, 1, | 3807 epilogue_cost += record_stmt_cost (cost_vec, 1, |
3817 scalar_to_vec, stmt_info, 0, | 3808 scalar_to_vec, stmt_info, 0, |
3818 vect_epilogue); | 3809 vect_epilogue); |
3819 } | 3810 } |
3820 else | 3811 else |
3821 { | 3812 { |
3822 epilogue_cost += add_stmt_cost (target_cost_data, 1, vector_stmt, | 3813 epilogue_cost += record_stmt_cost (cost_vec, 1, vector_stmt, |
3823 stmt_info, 0, vect_epilogue); | 3814 stmt_info, 0, vect_epilogue); |
3824 epilogue_cost += add_stmt_cost (target_cost_data, 1, | 3815 epilogue_cost += record_stmt_cost (cost_vec, 1, |
3825 vec_to_scalar, stmt_info, 0, | 3816 vec_to_scalar, stmt_info, 0, |
3826 vect_epilogue); | 3817 vect_epilogue); |
3827 } | 3818 } |
3828 } | 3819 } |
3829 else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION) | 3820 else if (reduction_type == COND_REDUCTION) |
3830 { | 3821 { |
3831 unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype); | 3822 unsigned estimated_nunits = vect_nunits_for_cost (vectype); |
3832 /* Extraction of scalar elements. */ | 3823 /* Extraction of scalar elements. */ |
3833 epilogue_cost += add_stmt_cost (target_cost_data, 2 * nunits, | 3824 epilogue_cost += record_stmt_cost (cost_vec, |
3834 vec_to_scalar, stmt_info, 0, | 3825 2 * estimated_nunits, |
3835 vect_epilogue); | 3826 vec_to_scalar, stmt_info, 0, |
3827 vect_epilogue); | |
3836 /* Scalar max reductions via COND_EXPR / MAX_EXPR. */ | 3828 /* Scalar max reductions via COND_EXPR / MAX_EXPR. */ |
3837 epilogue_cost += add_stmt_cost (target_cost_data, 2 * nunits - 3, | 3829 epilogue_cost += record_stmt_cost (cost_vec, |
3838 scalar_stmt, stmt_info, 0, | 3830 2 * estimated_nunits - 3, |
3839 vect_epilogue); | 3831 scalar_stmt, stmt_info, 0, |
3840 } | 3832 vect_epilogue); |
3833 } | |
3834 else if (reduction_type == EXTRACT_LAST_REDUCTION | |
3835 || reduction_type == FOLD_LEFT_REDUCTION) | |
3836 /* No extra instructions need in the epilogue. */ | |
3837 ; | |
3841 else | 3838 else |
3842 { | 3839 { |
3843 int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype)); | 3840 int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype)); |
3844 tree bitsize = | 3841 tree bitsize = |
3845 TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt))); | 3842 TYPE_SIZE (TREE_TYPE (gimple_assign_lhs (orig_stmt_info->stmt))); |
3846 int element_bitsize = tree_to_uhwi (bitsize); | 3843 int element_bitsize = tree_to_uhwi (bitsize); |
3847 int nelements = vec_size_in_bits / element_bitsize; | 3844 int nelements = vec_size_in_bits / element_bitsize; |
3848 | 3845 |
3849 if (code == COND_EXPR) | 3846 if (code == COND_EXPR) |
3850 code = MAX_EXPR; | 3847 code = MAX_EXPR; |
3857 && optab_handler (optab, mode) != CODE_FOR_nothing | 3854 && optab_handler (optab, mode) != CODE_FOR_nothing |
3858 && have_whole_vector_shift (mode)) | 3855 && have_whole_vector_shift (mode)) |
3859 { | 3856 { |
3860 /* Final reduction via vector shifts and the reduction operator. | 3857 /* Final reduction via vector shifts and the reduction operator. |
3861 Also requires scalar extract. */ | 3858 Also requires scalar extract. */ |
3862 epilogue_cost += add_stmt_cost (target_cost_data, | 3859 epilogue_cost += record_stmt_cost (cost_vec, |
3863 exact_log2 (nelements) * 2, | 3860 exact_log2 (nelements) * 2, |
3864 vector_stmt, stmt_info, 0, | 3861 vector_stmt, stmt_info, 0, |
3865 vect_epilogue); | 3862 vect_epilogue); |
3866 epilogue_cost += add_stmt_cost (target_cost_data, 1, | 3863 epilogue_cost += record_stmt_cost (cost_vec, 1, |
3867 vec_to_scalar, stmt_info, 0, | 3864 vec_to_scalar, stmt_info, 0, |
3868 vect_epilogue); | 3865 vect_epilogue); |
3869 } | 3866 } |
3870 else | 3867 else |
3871 /* Use extracts and reduction op for final reduction. For N | 3868 /* Use extracts and reduction op for final reduction. For N |
3872 elements, we have N extracts and N-1 reduction ops. */ | 3869 elements, we have N extracts and N-1 reduction ops. */ |
3873 epilogue_cost += add_stmt_cost (target_cost_data, | 3870 epilogue_cost += record_stmt_cost (cost_vec, |
3874 nelements + nelements - 1, | 3871 nelements + nelements - 1, |
3875 vector_stmt, stmt_info, 0, | 3872 vector_stmt, stmt_info, 0, |
3876 vect_epilogue); | 3873 vect_epilogue); |
3877 } | 3874 } |
3878 } | 3875 } |
3879 | 3876 |
3880 if (dump_enabled_p ()) | 3877 if (dump_enabled_p ()) |
3881 dump_printf (MSG_NOTE, | 3878 dump_printf (MSG_NOTE, |
3888 /* Function vect_model_induction_cost. | 3885 /* Function vect_model_induction_cost. |
3889 | 3886 |
3890 Models cost for induction operations. */ | 3887 Models cost for induction operations. */ |
3891 | 3888 |
3892 static void | 3889 static void |
3893 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies) | 3890 vect_model_induction_cost (stmt_vec_info stmt_info, int ncopies, |
3891 stmt_vector_for_cost *cost_vec) | |
3894 { | 3892 { |
3895 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); | |
3896 void *target_cost_data = LOOP_VINFO_TARGET_COST_DATA (loop_vinfo); | |
3897 unsigned inside_cost, prologue_cost; | 3893 unsigned inside_cost, prologue_cost; |
3898 | 3894 |
3899 if (PURE_SLP_STMT (stmt_info)) | 3895 if (PURE_SLP_STMT (stmt_info)) |
3900 return; | 3896 return; |
3901 | 3897 |
3902 /* loop cost for vec_loop. */ | 3898 /* loop cost for vec_loop. */ |
3903 inside_cost = add_stmt_cost (target_cost_data, ncopies, vector_stmt, | 3899 inside_cost = record_stmt_cost (cost_vec, ncopies, vector_stmt, |
3904 stmt_info, 0, vect_body); | 3900 stmt_info, 0, vect_body); |
3905 | 3901 |
3906 /* prologue cost for vec_init and vec_step. */ | 3902 /* prologue cost for vec_init and vec_step. */ |
3907 prologue_cost = add_stmt_cost (target_cost_data, 2, scalar_to_vec, | 3903 prologue_cost = record_stmt_cost (cost_vec, 2, scalar_to_vec, |
3908 stmt_info, 0, vect_prologue); | 3904 stmt_info, 0, vect_prologue); |
3909 | 3905 |
3910 if (dump_enabled_p ()) | 3906 if (dump_enabled_p ()) |
3911 dump_printf_loc (MSG_NOTE, vect_location, | 3907 dump_printf_loc (MSG_NOTE, vect_location, |
3912 "vect_model_induction_cost: inside_cost = %d, " | 3908 "vect_model_induction_cost: inside_cost = %d, " |
3913 "prologue_cost = %d .\n", inside_cost, prologue_cost); | 3909 "prologue_cost = %d .\n", inside_cost, prologue_cost); |
3916 | 3912 |
3917 | 3913 |
3918 /* Function get_initial_def_for_reduction | 3914 /* Function get_initial_def_for_reduction |
3919 | 3915 |
3920 Input: | 3916 Input: |
3921 STMT - a stmt that performs a reduction operation in the loop. | 3917 STMT_VINFO - a stmt that performs a reduction operation in the loop. |
3922 INIT_VAL - the initial value of the reduction variable | 3918 INIT_VAL - the initial value of the reduction variable |
3923 | 3919 |
3924 Output: | 3920 Output: |
3925 ADJUSTMENT_DEF - a tree that holds a value to be added to the final result | 3921 ADJUSTMENT_DEF - a tree that holds a value to be added to the final result |
3926 of the reduction (used for adjusting the epilog - see below). | 3922 of the reduction (used for adjusting the epilog - see below). |
3927 Return a vector variable, initialized according to the operation that STMT | 3923 Return a vector variable, initialized according to the operation that |
3928 performs. This vector will be used as the initial value of the | 3924 STMT_VINFO performs. This vector will be used as the initial value |
3929 vector of partial results. | 3925 of the vector of partial results. |
3930 | 3926 |
3931 Option1 (adjust in epilog): Initialize the vector as follows: | 3927 Option1 (adjust in epilog): Initialize the vector as follows: |
3932 add/bit or/xor: [0,0,...,0,0] | 3928 add/bit or/xor: [0,0,...,0,0] |
3933 mult/bit and: [1,1,...,1,1] | 3929 mult/bit and: [1,1,...,1,1] |
3934 min/max/cond_expr: [init_val,init_val,..,init_val,init_val] | 3930 min/max/cond_expr: [init_val,init_val,..,init_val,init_val] |
3945 | 3941 |
3946 s = init_val; | 3942 s = init_val; |
3947 for (i=0;i<n;i++) | 3943 for (i=0;i<n;i++) |
3948 s = s + a[i]; | 3944 s = s + a[i]; |
3949 | 3945 |
3950 STMT is 's = s + a[i]', and the reduction variable is 's'. | 3946 STMT_VINFO is 's = s + a[i]', and the reduction variable is 's'. |
3951 For a vector of 4 units, we want to return either [0,0,0,init_val], | 3947 For a vector of 4 units, we want to return either [0,0,0,init_val], |
3952 or [0,0,0,0] and let the caller know that it needs to adjust | 3948 or [0,0,0,0] and let the caller know that it needs to adjust |
3953 the result at the end by 'init_val'. | 3949 the result at the end by 'init_val'. |
3954 | 3950 |
3955 FORNOW, we are using the 'adjust in epilog' scheme, because this way the | 3951 FORNOW, we are using the 'adjust in epilog' scheme, because this way the |
3957 ADJUSTMENT_DEF is not NULL, and Option2 otherwise. | 3953 ADJUSTMENT_DEF is not NULL, and Option2 otherwise. |
3958 | 3954 |
3959 A cost model should help decide between these two schemes. */ | 3955 A cost model should help decide between these two schemes. */ |
3960 | 3956 |
3961 tree | 3957 tree |
3962 get_initial_def_for_reduction (gimple *stmt, tree init_val, | 3958 get_initial_def_for_reduction (stmt_vec_info stmt_vinfo, tree init_val, |
3963 tree *adjustment_def) | 3959 tree *adjustment_def) |
3964 { | 3960 { |
3965 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); | |
3966 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); | 3961 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_vinfo); |
3967 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | 3962 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); |
3968 tree scalar_type = TREE_TYPE (init_val); | 3963 tree scalar_type = TREE_TYPE (init_val); |
3969 tree vectype = get_vectype_for_scalar_type (scalar_type); | 3964 tree vectype = get_vectype_for_scalar_type (scalar_type); |
3970 int nunits; | 3965 enum tree_code code = gimple_assign_rhs_code (stmt_vinfo->stmt); |
3971 enum tree_code code = gimple_assign_rhs_code (stmt); | |
3972 tree def_for_init; | 3966 tree def_for_init; |
3973 tree init_def; | 3967 tree init_def; |
3974 int i; | |
3975 bool nested_in_vect_loop = false; | |
3976 REAL_VALUE_TYPE real_init_val = dconst0; | 3968 REAL_VALUE_TYPE real_init_val = dconst0; |
3977 int int_init_val = 0; | 3969 int int_init_val = 0; |
3978 gimple *def_stmt = NULL; | |
3979 gimple_seq stmts = NULL; | 3970 gimple_seq stmts = NULL; |
3980 | 3971 |
3981 gcc_assert (vectype); | 3972 gcc_assert (vectype); |
3982 nunits = TYPE_VECTOR_SUBPARTS (vectype); | |
3983 | 3973 |
3984 gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type) | 3974 gcc_assert (POINTER_TYPE_P (scalar_type) || INTEGRAL_TYPE_P (scalar_type) |
3985 || SCALAR_FLOAT_TYPE_P (scalar_type)); | 3975 || SCALAR_FLOAT_TYPE_P (scalar_type)); |
3986 | 3976 |
3987 if (nested_in_vect_loop_p (loop, stmt)) | 3977 gcc_assert (nested_in_vect_loop_p (loop, stmt_vinfo) |
3988 nested_in_vect_loop = true; | 3978 || loop == (gimple_bb (stmt_vinfo->stmt))->loop_father); |
3989 else | 3979 |
3990 gcc_assert (loop == (gimple_bb (stmt))->loop_father); | 3980 vect_reduction_type reduction_type |
3991 | 3981 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_vinfo); |
3992 /* In case of double reduction we only create a vector variable to be put | |
3993 in the reduction phi node. The actual statement creation is done in | |
3994 vect_create_epilog_for_reduction. */ | |
3995 if (adjustment_def && nested_in_vect_loop | |
3996 && TREE_CODE (init_val) == SSA_NAME | |
3997 && (def_stmt = SSA_NAME_DEF_STMT (init_val)) | |
3998 && gimple_code (def_stmt) == GIMPLE_PHI | |
3999 && flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)) | |
4000 && vinfo_for_stmt (def_stmt) | |
4001 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_stmt)) | |
4002 == vect_double_reduction_def) | |
4003 { | |
4004 *adjustment_def = NULL; | |
4005 return vect_create_destination_var (init_val, vectype); | |
4006 } | |
4007 | |
4008 /* In case of a nested reduction do not use an adjustment def as | |
4009 that case is not supported by the epilogue generation correctly | |
4010 if ncopies is not one. */ | |
4011 if (adjustment_def && nested_in_vect_loop) | |
4012 { | |
4013 *adjustment_def = NULL; | |
4014 return vect_get_vec_def_for_operand (init_val, stmt); | |
4015 } | |
4016 | 3982 |
4017 switch (code) | 3983 switch (code) |
4018 { | 3984 { |
4019 case WIDEN_SUM_EXPR: | 3985 case WIDEN_SUM_EXPR: |
4020 case DOT_PROD_EXPR: | 3986 case DOT_PROD_EXPR: |
4024 case BIT_IOR_EXPR: | 3990 case BIT_IOR_EXPR: |
4025 case BIT_XOR_EXPR: | 3991 case BIT_XOR_EXPR: |
4026 case MULT_EXPR: | 3992 case MULT_EXPR: |
4027 case BIT_AND_EXPR: | 3993 case BIT_AND_EXPR: |
4028 { | 3994 { |
4029 /* ADJUSMENT_DEF is NULL when called from | 3995 /* ADJUSTMENT_DEF is NULL when called from |
4030 vect_create_epilog_for_reduction to vectorize double reduction. */ | 3996 vect_create_epilog_for_reduction to vectorize double reduction. */ |
4031 if (adjustment_def) | 3997 if (adjustment_def) |
4032 *adjustment_def = init_val; | 3998 *adjustment_def = init_val; |
4033 | 3999 |
4034 if (code == MULT_EXPR) | 4000 if (code == MULT_EXPR) |
4047 | 4013 |
4048 if (adjustment_def) | 4014 if (adjustment_def) |
4049 /* Option1: the first element is '0' or '1' as well. */ | 4015 /* Option1: the first element is '0' or '1' as well. */ |
4050 init_def = gimple_build_vector_from_val (&stmts, vectype, | 4016 init_def = gimple_build_vector_from_val (&stmts, vectype, |
4051 def_for_init); | 4017 def_for_init); |
4018 else if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant ()) | |
4019 { | |
4020 /* Option2 (variable length): the first element is INIT_VAL. */ | |
4021 init_def = gimple_build_vector_from_val (&stmts, vectype, | |
4022 def_for_init); | |
4023 init_def = gimple_build (&stmts, CFN_VEC_SHL_INSERT, | |
4024 vectype, init_def, init_val); | |
4025 } | |
4052 else | 4026 else |
4053 { | 4027 { |
4054 /* Option2: the first element is INIT_VAL. */ | 4028 /* Option2: the first element is INIT_VAL. */ |
4055 auto_vec<tree, 32> elts (nunits); | 4029 tree_vector_builder elts (vectype, 1, 2); |
4056 elts.quick_push (init_val); | 4030 elts.quick_push (init_val); |
4057 for (i = 1; i < nunits; ++i) | 4031 elts.quick_push (def_for_init); |
4058 elts.quick_push (def_for_init); | 4032 init_def = gimple_build_vector (&stmts, &elts); |
4059 init_def = gimple_build_vector (&stmts, vectype, elts); | |
4060 } | 4033 } |
4061 } | 4034 } |
4062 break; | 4035 break; |
4063 | 4036 |
4064 case MIN_EXPR: | 4037 case MIN_EXPR: |
4066 case COND_EXPR: | 4039 case COND_EXPR: |
4067 { | 4040 { |
4068 if (adjustment_def) | 4041 if (adjustment_def) |
4069 { | 4042 { |
4070 *adjustment_def = NULL_TREE; | 4043 *adjustment_def = NULL_TREE; |
4071 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_vinfo) != COND_REDUCTION) | 4044 if (reduction_type != COND_REDUCTION |
4045 && reduction_type != EXTRACT_LAST_REDUCTION) | |
4072 { | 4046 { |
4073 init_def = vect_get_vec_def_for_operand (init_val, stmt); | 4047 init_def = vect_get_vec_def_for_operand (init_val, stmt_vinfo); |
4074 break; | 4048 break; |
4075 } | 4049 } |
4076 } | 4050 } |
4077 init_val = gimple_convert (&stmts, TREE_TYPE (vectype), init_val); | 4051 init_val = gimple_convert (&stmts, TREE_TYPE (vectype), init_val); |
4078 init_def = gimple_build_vector_from_val (&stmts, vectype, init_val); | 4052 init_def = gimple_build_vector_from_val (&stmts, vectype, init_val); |
4087 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); | 4061 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts); |
4088 return init_def; | 4062 return init_def; |
4089 } | 4063 } |
4090 | 4064 |
4091 /* Get at the initial defs for the reduction PHIs in SLP_NODE. | 4065 /* Get at the initial defs for the reduction PHIs in SLP_NODE. |
4092 NUMBER_OF_VECTORS is the number of vector defs to create. */ | 4066 NUMBER_OF_VECTORS is the number of vector defs to create. |
4067 If NEUTRAL_OP is nonnull, introducing extra elements of that | |
4068 value will not change the result. */ | |
4093 | 4069 |
4094 static void | 4070 static void |
4095 get_initial_defs_for_reduction (slp_tree slp_node, | 4071 get_initial_defs_for_reduction (slp_tree slp_node, |
4096 vec<tree> *vec_oprnds, | 4072 vec<tree> *vec_oprnds, |
4097 unsigned int number_of_vectors, | 4073 unsigned int number_of_vectors, |
4098 enum tree_code code, bool reduc_chain) | 4074 bool reduc_chain, tree neutral_op) |
4099 { | 4075 { |
4100 vec<gimple *> stmts = SLP_TREE_SCALAR_STMTS (slp_node); | 4076 vec<stmt_vec_info> stmts = SLP_TREE_SCALAR_STMTS (slp_node); |
4101 gimple *stmt = stmts[0]; | 4077 stmt_vec_info stmt_vinfo = stmts[0]; |
4102 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); | 4078 unsigned HOST_WIDE_INT nunits; |
4103 unsigned nunits; | |
4104 unsigned j, number_of_places_left_in_vector; | 4079 unsigned j, number_of_places_left_in_vector; |
4105 tree vector_type, scalar_type; | 4080 tree vector_type; |
4106 tree vop; | 4081 tree vop; |
4107 int group_size = stmts.length (); | 4082 int group_size = stmts.length (); |
4108 unsigned int vec_num, i; | 4083 unsigned int vec_num, i; |
4109 unsigned number_of_copies = 1; | 4084 unsigned number_of_copies = 1; |
4110 vec<tree> voprnds; | 4085 vec<tree> voprnds; |
4111 voprnds.create (number_of_vectors); | 4086 voprnds.create (number_of_vectors); |
4112 tree neutral_op = NULL; | |
4113 struct loop *loop; | 4087 struct loop *loop; |
4088 auto_vec<tree, 16> permute_results; | |
4114 | 4089 |
4115 vector_type = STMT_VINFO_VECTYPE (stmt_vinfo); | 4090 vector_type = STMT_VINFO_VECTYPE (stmt_vinfo); |
4116 scalar_type = TREE_TYPE (vector_type); | |
4117 nunits = TYPE_VECTOR_SUBPARTS (vector_type); | |
4118 | 4091 |
4119 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def); | 4092 gcc_assert (STMT_VINFO_DEF_TYPE (stmt_vinfo) == vect_reduction_def); |
4120 | 4093 |
4121 loop = (gimple_bb (stmt))->loop_father; | 4094 loop = (gimple_bb (stmt_vinfo->stmt))->loop_father; |
4122 gcc_assert (loop); | 4095 gcc_assert (loop); |
4123 edge pe = loop_preheader_edge (loop); | 4096 edge pe = loop_preheader_edge (loop); |
4124 | 4097 |
4125 /* op is the reduction operand of the first stmt already. */ | 4098 gcc_assert (!reduc_chain || neutral_op); |
4126 /* For additional copies (see the explanation of NUMBER_OF_COPIES below) | |
4127 we need either neutral operands or the original operands. See | |
4128 get_initial_def_for_reduction() for details. */ | |
4129 switch (code) | |
4130 { | |
4131 case WIDEN_SUM_EXPR: | |
4132 case DOT_PROD_EXPR: | |
4133 case SAD_EXPR: | |
4134 case PLUS_EXPR: | |
4135 case MINUS_EXPR: | |
4136 case BIT_IOR_EXPR: | |
4137 case BIT_XOR_EXPR: | |
4138 neutral_op = build_zero_cst (scalar_type); | |
4139 break; | |
4140 | |
4141 case MULT_EXPR: | |
4142 neutral_op = build_one_cst (scalar_type); | |
4143 break; | |
4144 | |
4145 case BIT_AND_EXPR: | |
4146 neutral_op = build_all_ones_cst (scalar_type); | |
4147 break; | |
4148 | |
4149 /* For MIN/MAX we don't have an easy neutral operand but | |
4150 the initial values can be used fine here. Only for | |
4151 a reduction chain we have to force a neutral element. */ | |
4152 case MAX_EXPR: | |
4153 case MIN_EXPR: | |
4154 if (! reduc_chain) | |
4155 neutral_op = NULL; | |
4156 else | |
4157 neutral_op = PHI_ARG_DEF_FROM_EDGE (stmt, pe); | |
4158 break; | |
4159 | |
4160 default: | |
4161 gcc_assert (! reduc_chain); | |
4162 neutral_op = NULL; | |
4163 } | |
4164 | 4099 |
4165 /* NUMBER_OF_COPIES is the number of times we need to use the same values in | 4100 /* NUMBER_OF_COPIES is the number of times we need to use the same values in |
4166 created vectors. It is greater than 1 if unrolling is performed. | 4101 created vectors. It is greater than 1 if unrolling is performed. |
4167 | 4102 |
4168 For example, we have two scalar operands, s1 and s2 (e.g., group of | 4103 For example, we have two scalar operands, s1 and s2 (e.g., group of |
4169 strided accesses of size two), while NUNITS is four (i.e., four scalars | 4104 strided accesses of size two), while NUNITS is four (i.e., four scalars |
4170 of this type can be packed in a vector). The output vector will contain | 4105 of this type can be packed in a vector). The output vector will contain |
4171 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES | 4106 two copies of each scalar operand: {s1, s2, s1, s2}. (NUMBER_OF_COPIES |
4172 will be 2). | 4107 will be 2). |
4173 | 4108 |
4174 If GROUP_SIZE > NUNITS, the scalars will be split into several vectors | 4109 If REDUC_GROUP_SIZE > NUNITS, the scalars will be split into several |
4175 containing the operands. | 4110 vectors containing the operands. |
4176 | 4111 |
4177 For example, NUNITS is four as before, and the group size is 8 | 4112 For example, NUNITS is four as before, and the group size is 8 |
4178 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and | 4113 (s1, s2, ..., s8). We will create two vectors {s1, s2, s3, s4} and |
4179 {s5, s6, s7, s8}. */ | 4114 {s5, s6, s7, s8}. */ |
4180 | 4115 |
4116 if (!TYPE_VECTOR_SUBPARTS (vector_type).is_constant (&nunits)) | |
4117 nunits = group_size; | |
4118 | |
4181 number_of_copies = nunits * number_of_vectors / group_size; | 4119 number_of_copies = nunits * number_of_vectors / group_size; |
4182 | 4120 |
4183 number_of_places_left_in_vector = nunits; | 4121 number_of_places_left_in_vector = nunits; |
4184 auto_vec<tree, 32> elts (nunits); | 4122 bool constant_p = true; |
4123 tree_vector_builder elts (vector_type, nunits, 1); | |
4185 elts.quick_grow (nunits); | 4124 elts.quick_grow (nunits); |
4186 for (j = 0; j < number_of_copies; j++) | 4125 for (j = 0; j < number_of_copies; j++) |
4187 { | 4126 { |
4188 for (i = group_size - 1; stmts.iterate (i, &stmt); i--) | 4127 for (i = group_size - 1; stmts.iterate (i, &stmt_vinfo); i--) |
4189 { | 4128 { |
4190 tree op; | 4129 tree op; |
4191 /* Get the def before the loop. In reduction chain we have only | 4130 /* Get the def before the loop. In reduction chain we have only |
4192 one initial value. */ | 4131 one initial value. */ |
4193 if ((j != (number_of_copies - 1) | 4132 if ((j != (number_of_copies - 1) |
4194 || (reduc_chain && i != 0)) | 4133 || (reduc_chain && i != 0)) |
4195 && neutral_op) | 4134 && neutral_op) |
4196 op = neutral_op; | 4135 op = neutral_op; |
4197 else | 4136 else |
4198 op = PHI_ARG_DEF_FROM_EDGE (stmt, pe); | 4137 op = PHI_ARG_DEF_FROM_EDGE (stmt_vinfo->stmt, pe); |
4199 | 4138 |
4200 /* Create 'vect_ = {op0,op1,...,opn}'. */ | 4139 /* Create 'vect_ = {op0,op1,...,opn}'. */ |
4201 number_of_places_left_in_vector--; | 4140 number_of_places_left_in_vector--; |
4202 elts[number_of_places_left_in_vector] = op; | 4141 elts[number_of_places_left_in_vector] = op; |
4142 if (!CONSTANT_CLASS_P (op)) | |
4143 constant_p = false; | |
4203 | 4144 |
4204 if (number_of_places_left_in_vector == 0) | 4145 if (number_of_places_left_in_vector == 0) |
4205 { | 4146 { |
4206 gimple_seq ctor_seq = NULL; | 4147 gimple_seq ctor_seq = NULL; |
4207 tree init = gimple_build_vector (&ctor_seq, vector_type, elts); | 4148 tree init; |
4149 if (constant_p && !neutral_op | |
4150 ? multiple_p (TYPE_VECTOR_SUBPARTS (vector_type), nunits) | |
4151 : known_eq (TYPE_VECTOR_SUBPARTS (vector_type), nunits)) | |
4152 /* Build the vector directly from ELTS. */ | |
4153 init = gimple_build_vector (&ctor_seq, &elts); | |
4154 else if (neutral_op) | |
4155 { | |
4156 /* Build a vector of the neutral value and shift the | |
4157 other elements into place. */ | |
4158 init = gimple_build_vector_from_val (&ctor_seq, vector_type, | |
4159 neutral_op); | |
4160 int k = nunits; | |
4161 while (k > 0 && elts[k - 1] == neutral_op) | |
4162 k -= 1; | |
4163 while (k > 0) | |
4164 { | |
4165 k -= 1; | |
4166 init = gimple_build (&ctor_seq, CFN_VEC_SHL_INSERT, | |
4167 vector_type, init, elts[k]); | |
4168 } | |
4169 } | |
4170 else | |
4171 { | |
4172 /* First time round, duplicate ELTS to fill the | |
4173 required number of vectors, then cherry pick the | |
4174 appropriate result for each iteration. */ | |
4175 if (vec_oprnds->is_empty ()) | |
4176 duplicate_and_interleave (&ctor_seq, vector_type, elts, | |
4177 number_of_vectors, | |
4178 permute_results); | |
4179 init = permute_results[number_of_vectors - j - 1]; | |
4180 } | |
4208 if (ctor_seq != NULL) | 4181 if (ctor_seq != NULL) |
4209 gsi_insert_seq_on_edge_immediate (pe, ctor_seq); | 4182 gsi_insert_seq_on_edge_immediate (pe, ctor_seq); |
4210 voprnds.quick_push (init); | 4183 voprnds.quick_push (init); |
4211 | 4184 |
4212 number_of_places_left_in_vector = nunits; | 4185 number_of_places_left_in_vector = nunits; |
4186 elts.new_vector (vector_type, nunits, 1); | |
4187 elts.quick_grow (nunits); | |
4188 constant_p = true; | |
4213 } | 4189 } |
4214 } | 4190 } |
4215 } | 4191 } |
4216 | 4192 |
4217 /* Since the vectors are created in the reverse order, we should invert | 4193 /* Since the vectors are created in the reverse order, we should invert |
4258 Create code at the loop-epilog to finalize the result of a reduction | 4234 Create code at the loop-epilog to finalize the result of a reduction |
4259 computation. | 4235 computation. |
4260 | 4236 |
4261 VECT_DEFS is list of vector of partial results, i.e., the lhs's of vector | 4237 VECT_DEFS is list of vector of partial results, i.e., the lhs's of vector |
4262 reduction statements. | 4238 reduction statements. |
4263 STMT is the scalar reduction stmt that is being vectorized. | 4239 STMT_INFO is the scalar reduction stmt that is being vectorized. |
4264 NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the | 4240 NCOPIES is > 1 in case the vectorization factor (VF) is bigger than the |
4265 number of elements that we can fit in a vectype (nunits). In this case | 4241 number of elements that we can fit in a vectype (nunits). In this case |
4266 we have to generate more than one vector stmt - i.e - we need to "unroll" | 4242 we have to generate more than one vector stmt - i.e - we need to "unroll" |
4267 the vector stmt by a factor VF/nunits. For more details see documentation | 4243 the vector stmt by a factor VF/nunits. For more details see documentation |
4268 in vectorizable_operation. | 4244 in vectorizable_operation. |
4269 REDUC_CODE is the tree-code for the epilog reduction. | 4245 REDUC_FN is the internal function for the epilog reduction. |
4270 REDUCTION_PHIS is a list of the phi-nodes that carry the reduction | 4246 REDUCTION_PHIS is a list of the phi-nodes that carry the reduction |
4271 computation. | 4247 computation. |
4272 REDUC_INDEX is the index of the operand in the right hand side of the | 4248 REDUC_INDEX is the index of the operand in the right hand side of the |
4273 statement that is defined by REDUCTION_PHI. | 4249 statement that is defined by REDUCTION_PHI. |
4274 DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled. | 4250 DOUBLE_REDUC is TRUE if double reduction phi nodes should be handled. |
4275 SLP_NODE is an SLP node containing a group of reduction statements. The | 4251 SLP_NODE is an SLP node containing a group of reduction statements. The |
4276 first one in this group is STMT. | 4252 first one in this group is STMT_INFO. |
4253 INDUC_VAL is for INTEGER_INDUC_COND_REDUCTION the value to use for the case | |
4254 when the COND_EXPR is never true in the loop. For MAX_EXPR, it needs to | |
4255 be smaller than any value of the IV in the loop, for MIN_EXPR larger than | |
4256 any value of the IV in the loop. | |
4257 INDUC_CODE is the code for epilog reduction if INTEGER_INDUC_COND_REDUCTION. | |
4258 NEUTRAL_OP is the value given by neutral_op_for_slp_reduction; it is | |
4259 null if this is not an SLP reduction | |
4277 | 4260 |
4278 This function: | 4261 This function: |
4279 1. Creates the reduction def-use cycles: sets the arguments for | 4262 1. Creates the reduction def-use cycles: sets the arguments for |
4280 REDUCTION_PHIS: | 4263 REDUCTION_PHIS: |
4281 The loop-entry argument is the vectorized initial-value of the reduction. | 4264 The loop-entry argument is the vectorized initial-value of the reduction. |
4282 The loop-latch argument is taken from VECT_DEFS - the vector of partial | 4265 The loop-latch argument is taken from VECT_DEFS - the vector of partial |
4283 sums. | 4266 sums. |
4284 2. "Reduces" each vector of partial results VECT_DEFS into a single result, | 4267 2. "Reduces" each vector of partial results VECT_DEFS into a single result, |
4285 by applying the operation specified by REDUC_CODE if available, or by | 4268 by calling the function specified by REDUC_FN if available, or by |
4286 other means (whole-vector shifts or a scalar loop). | 4269 other means (whole-vector shifts or a scalar loop). |
4287 The function also creates a new phi node at the loop exit to preserve | 4270 The function also creates a new phi node at the loop exit to preserve |
4288 loop-closed form, as illustrated below. | 4271 loop-closed form, as illustrated below. |
4289 | 4272 |
4290 The flow at the entry to this function: | 4273 The flow at the entry to this function: |
4291 | 4274 |
4292 loop: | 4275 loop: |
4293 vec_def = phi <null, null> # REDUCTION_PHI | 4276 vec_def = phi <null, null> # REDUCTION_PHI |
4294 VECT_DEF = vector_stmt # vectorized form of STMT | 4277 VECT_DEF = vector_stmt # vectorized form of STMT_INFO |
4295 s_loop = scalar_stmt # (scalar) STMT | 4278 s_loop = scalar_stmt # (scalar) STMT_INFO |
4296 loop_exit: | 4279 loop_exit: |
4297 s_out0 = phi <s_loop> # (scalar) EXIT_PHI | 4280 s_out0 = phi <s_loop> # (scalar) EXIT_PHI |
4298 use <s_out0> | 4281 use <s_out0> |
4299 use <s_out0> | 4282 use <s_out0> |
4300 | 4283 |
4301 The above is transformed by this function into: | 4284 The above is transformed by this function into: |
4302 | 4285 |
4303 loop: | 4286 loop: |
4304 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI | 4287 vec_def = phi <vec_init, VECT_DEF> # REDUCTION_PHI |
4305 VECT_DEF = vector_stmt # vectorized form of STMT | 4288 VECT_DEF = vector_stmt # vectorized form of STMT_INFO |
4306 s_loop = scalar_stmt # (scalar) STMT | 4289 s_loop = scalar_stmt # (scalar) STMT_INFO |
4307 loop_exit: | 4290 loop_exit: |
4308 s_out0 = phi <s_loop> # (scalar) EXIT_PHI | 4291 s_out0 = phi <s_loop> # (scalar) EXIT_PHI |
4309 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI | 4292 v_out1 = phi <VECT_DEF> # NEW_EXIT_PHI |
4310 v_out2 = reduce <v_out1> | 4293 v_out2 = reduce <v_out1> |
4311 s_out3 = extract_field <v_out2, 0> | 4294 s_out3 = extract_field <v_out2, 0> |
4313 use <s_out4> | 4296 use <s_out4> |
4314 use <s_out4> | 4297 use <s_out4> |
4315 */ | 4298 */ |
4316 | 4299 |
4317 static void | 4300 static void |
4318 vect_create_epilog_for_reduction (vec<tree> vect_defs, gimple *stmt, | 4301 vect_create_epilog_for_reduction (vec<tree> vect_defs, |
4302 stmt_vec_info stmt_info, | |
4319 gimple *reduc_def_stmt, | 4303 gimple *reduc_def_stmt, |
4320 int ncopies, enum tree_code reduc_code, | 4304 int ncopies, internal_fn reduc_fn, |
4321 vec<gimple *> reduction_phis, | 4305 vec<stmt_vec_info> reduction_phis, |
4322 bool double_reduc, | 4306 bool double_reduc, |
4323 slp_tree slp_node, | 4307 slp_tree slp_node, |
4324 slp_instance slp_node_instance) | 4308 slp_instance slp_node_instance, |
4309 tree induc_val, enum tree_code induc_code, | |
4310 tree neutral_op) | |
4325 { | 4311 { |
4326 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
4327 stmt_vec_info prev_phi_info; | 4312 stmt_vec_info prev_phi_info; |
4328 tree vectype; | 4313 tree vectype; |
4329 machine_mode mode; | 4314 machine_mode mode; |
4330 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); | 4315 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); |
4331 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL; | 4316 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *outer_loop = NULL; |
4332 basic_block exit_bb; | 4317 basic_block exit_bb; |
4333 tree scalar_dest; | 4318 tree scalar_dest; |
4334 tree scalar_type; | 4319 tree scalar_type; |
4335 gimple *new_phi = NULL, *phi; | 4320 gimple *new_phi = NULL, *phi; |
4321 stmt_vec_info phi_info; | |
4336 gimple_stmt_iterator exit_gsi; | 4322 gimple_stmt_iterator exit_gsi; |
4337 tree vec_dest; | 4323 tree vec_dest; |
4338 tree new_temp = NULL_TREE, new_dest, new_name, new_scalar_dest; | 4324 tree new_temp = NULL_TREE, new_dest, new_name, new_scalar_dest; |
4339 gimple *epilog_stmt = NULL; | 4325 gimple *epilog_stmt = NULL; |
4340 enum tree_code code = gimple_assign_rhs_code (stmt); | 4326 enum tree_code code = gimple_assign_rhs_code (stmt_info->stmt); |
4341 gimple *exit_phi; | 4327 gimple *exit_phi; |
4342 tree bitsize; | 4328 tree bitsize; |
4343 tree adjustment_def = NULL; | 4329 tree adjustment_def = NULL; |
4344 tree vec_initial_def = NULL; | 4330 tree vec_initial_def = NULL; |
4345 tree expr, def, initial_def = NULL; | 4331 tree expr, def, initial_def = NULL; |
4346 tree orig_name, scalar_result; | 4332 tree orig_name, scalar_result; |
4347 imm_use_iterator imm_iter, phi_imm_iter; | 4333 imm_use_iterator imm_iter, phi_imm_iter; |
4348 use_operand_p use_p, phi_use_p; | 4334 use_operand_p use_p, phi_use_p; |
4349 gimple *use_stmt, *orig_stmt, *reduction_phi = NULL; | 4335 gimple *use_stmt; |
4336 stmt_vec_info reduction_phi_info = NULL; | |
4350 bool nested_in_vect_loop = false; | 4337 bool nested_in_vect_loop = false; |
4351 auto_vec<gimple *> new_phis; | 4338 auto_vec<gimple *> new_phis; |
4352 auto_vec<gimple *> inner_phis; | 4339 auto_vec<stmt_vec_info> inner_phis; |
4353 enum vect_def_type dt = vect_unknown_def_type; | |
4354 int j, i; | 4340 int j, i; |
4355 auto_vec<tree> scalar_results; | 4341 auto_vec<tree> scalar_results; |
4356 unsigned int group_size = 1, k, ratio; | 4342 unsigned int group_size = 1, k, ratio; |
4357 auto_vec<tree> vec_initial_defs; | 4343 auto_vec<tree> vec_initial_defs; |
4358 auto_vec<gimple *> phis; | 4344 auto_vec<gimple *> phis; |
4359 bool slp_reduc = false; | 4345 bool slp_reduc = false; |
4346 bool direct_slp_reduc; | |
4360 tree new_phi_result; | 4347 tree new_phi_result; |
4361 gimple *inner_phi = NULL; | 4348 stmt_vec_info inner_phi = NULL; |
4362 tree induction_index = NULL_TREE; | 4349 tree induction_index = NULL_TREE; |
4363 | 4350 |
4364 if (slp_node) | 4351 if (slp_node) |
4365 group_size = SLP_TREE_SCALAR_STMTS (slp_node).length (); | 4352 group_size = SLP_TREE_SCALAR_STMTS (slp_node).length (); |
4366 | 4353 |
4367 if (nested_in_vect_loop_p (loop, stmt)) | 4354 if (nested_in_vect_loop_p (loop, stmt_info)) |
4368 { | 4355 { |
4369 outer_loop = loop; | 4356 outer_loop = loop; |
4370 loop = loop->inner; | 4357 loop = loop->inner; |
4371 nested_in_vect_loop = true; | 4358 nested_in_vect_loop = true; |
4372 gcc_assert (!slp_node); | 4359 gcc_assert (!slp_node); |
4398 if (slp_node) | 4385 if (slp_node) |
4399 { | 4386 { |
4400 unsigned vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); | 4387 unsigned vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); |
4401 vec_initial_defs.reserve (vec_num); | 4388 vec_initial_defs.reserve (vec_num); |
4402 get_initial_defs_for_reduction (slp_node_instance->reduc_phis, | 4389 get_initial_defs_for_reduction (slp_node_instance->reduc_phis, |
4403 &vec_initial_defs, vec_num, code, | 4390 &vec_initial_defs, vec_num, |
4404 GROUP_FIRST_ELEMENT (stmt_info)); | 4391 REDUC_GROUP_FIRST_ELEMENT (stmt_info), |
4392 neutral_op); | |
4405 } | 4393 } |
4406 else | 4394 else |
4407 { | 4395 { |
4408 /* Get at the scalar def before the loop, that defines the initial value | 4396 /* Get at the scalar def before the loop, that defines the initial value |
4409 of the reduction variable. */ | 4397 of the reduction variable. */ |
4410 gimple *def_stmt; | |
4411 initial_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt, | 4398 initial_def = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt, |
4412 loop_preheader_edge (loop)); | 4399 loop_preheader_edge (loop)); |
4413 vect_is_simple_use (initial_def, loop_vinfo, &def_stmt, &initial_def_dt); | 4400 /* Optimize: if initial_def is for REDUC_MAX smaller than the base |
4414 vec_initial_def = get_initial_def_for_reduction (stmt, initial_def, | 4401 and we can't use zero for induc_val, use initial_def. Similarly |
4415 &adjustment_def); | 4402 for REDUC_MIN and initial_def larger than the base. */ |
4403 if (TREE_CODE (initial_def) == INTEGER_CST | |
4404 && (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) | |
4405 == INTEGER_INDUC_COND_REDUCTION) | |
4406 && !integer_zerop (induc_val) | |
4407 && ((induc_code == MAX_EXPR | |
4408 && tree_int_cst_lt (initial_def, induc_val)) | |
4409 || (induc_code == MIN_EXPR | |
4410 && tree_int_cst_lt (induc_val, initial_def)))) | |
4411 induc_val = initial_def; | |
4412 | |
4413 if (double_reduc) | |
4414 /* In case of double reduction we only create a vector variable | |
4415 to be put in the reduction phi node. The actual statement | |
4416 creation is done later in this function. */ | |
4417 vec_initial_def = vect_create_destination_var (initial_def, vectype); | |
4418 else if (nested_in_vect_loop) | |
4419 { | |
4420 /* Do not use an adjustment def as that case is not supported | |
4421 correctly if ncopies is not one. */ | |
4422 vect_is_simple_use (initial_def, loop_vinfo, &initial_def_dt); | |
4423 vec_initial_def = vect_get_vec_def_for_operand (initial_def, | |
4424 stmt_info); | |
4425 } | |
4426 else | |
4427 vec_initial_def | |
4428 = get_initial_def_for_reduction (stmt_info, initial_def, | |
4429 &adjustment_def); | |
4416 vec_initial_defs.create (1); | 4430 vec_initial_defs.create (1); |
4417 vec_initial_defs.quick_push (vec_initial_def); | 4431 vec_initial_defs.quick_push (vec_initial_def); |
4418 } | 4432 } |
4419 | 4433 |
4420 /* Set phi nodes arguments. */ | 4434 /* Set phi nodes arguments. */ |
4421 FOR_EACH_VEC_ELT (reduction_phis, i, phi) | 4435 FOR_EACH_VEC_ELT (reduction_phis, i, phi_info) |
4422 { | 4436 { |
4423 tree vec_init_def = vec_initial_defs[i]; | 4437 tree vec_init_def = vec_initial_defs[i]; |
4424 tree def = vect_defs[i]; | 4438 tree def = vect_defs[i]; |
4425 for (j = 0; j < ncopies; j++) | 4439 for (j = 0; j < ncopies; j++) |
4426 { | 4440 { |
4427 if (j != 0) | 4441 if (j != 0) |
4428 { | 4442 { |
4429 phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)); | 4443 phi_info = STMT_VINFO_RELATED_STMT (phi_info); |
4430 if (nested_in_vect_loop) | 4444 if (nested_in_vect_loop) |
4431 vec_init_def | 4445 vec_init_def |
4432 = vect_get_vec_def_for_stmt_copy (initial_def_dt, | 4446 = vect_get_vec_def_for_stmt_copy (loop_vinfo, vec_init_def); |
4433 vec_init_def); | |
4434 } | 4447 } |
4435 | 4448 |
4436 /* Set the loop-entry arg of the reduction-phi. */ | 4449 /* Set the loop-entry arg of the reduction-phi. */ |
4437 | 4450 |
4451 gphi *phi = as_a <gphi *> (phi_info->stmt); | |
4438 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) | 4452 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) |
4439 == INTEGER_INDUC_COND_REDUCTION) | 4453 == INTEGER_INDUC_COND_REDUCTION) |
4440 { | 4454 { |
4441 /* Initialise the reduction phi to zero. This prevents initial | 4455 /* Initialise the reduction phi to zero. This prevents initial |
4442 values of non-zero interferring with the reduction op. */ | 4456 values of non-zero interferring with the reduction op. */ |
4443 gcc_assert (ncopies == 1); | 4457 gcc_assert (ncopies == 1); |
4444 gcc_assert (i == 0); | 4458 gcc_assert (i == 0); |
4445 | 4459 |
4446 tree vec_init_def_type = TREE_TYPE (vec_init_def); | 4460 tree vec_init_def_type = TREE_TYPE (vec_init_def); |
4447 tree zero_vec = build_zero_cst (vec_init_def_type); | 4461 tree induc_val_vec |
4448 | 4462 = build_vector_from_val (vec_init_def_type, induc_val); |
4449 add_phi_arg (as_a <gphi *> (phi), zero_vec, | 4463 |
4450 loop_preheader_edge (loop), UNKNOWN_LOCATION); | 4464 add_phi_arg (phi, induc_val_vec, loop_preheader_edge (loop), |
4465 UNKNOWN_LOCATION); | |
4451 } | 4466 } |
4452 else | 4467 else |
4453 add_phi_arg (as_a <gphi *> (phi), vec_init_def, | 4468 add_phi_arg (phi, vec_init_def, loop_preheader_edge (loop), |
4454 loop_preheader_edge (loop), UNKNOWN_LOCATION); | 4469 UNKNOWN_LOCATION); |
4455 | 4470 |
4456 /* Set the loop-latch arg for the reduction-phi. */ | 4471 /* Set the loop-latch arg for the reduction-phi. */ |
4457 if (j > 0) | 4472 if (j > 0) |
4458 def = vect_get_vec_def_for_stmt_copy (vect_unknown_def_type, def); | 4473 def = vect_get_vec_def_for_stmt_copy (loop_vinfo, def); |
4459 | 4474 |
4460 add_phi_arg (as_a <gphi *> (phi), def, loop_latch_edge (loop), | 4475 add_phi_arg (phi, def, loop_latch_edge (loop), UNKNOWN_LOCATION); |
4461 UNKNOWN_LOCATION); | |
4462 | 4476 |
4463 if (dump_enabled_p ()) | 4477 if (dump_enabled_p ()) |
4464 { | 4478 dump_printf_loc (MSG_NOTE, vect_location, |
4465 dump_printf_loc (MSG_NOTE, vect_location, | 4479 "transform reduction: created def-use cycle: %G%G", |
4466 "transform reduction: created def-use cycle: "); | 4480 phi, SSA_NAME_DEF_STMT (def)); |
4467 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0); | |
4468 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, SSA_NAME_DEF_STMT (def), 0); | |
4469 } | |
4470 } | 4481 } |
4471 } | 4482 } |
4472 | 4483 |
4473 /* For cond reductions we want to create a new vector (INDEX_COND_EXPR) | 4484 /* For cond reductions we want to create a new vector (INDEX_COND_EXPR) |
4474 which is updated with the current index of the loop for every match of | 4485 which is updated with the current index of the loop for every match of |
4478 indexes. If there are no matches at all then the vector will be all | 4489 indexes. If there are no matches at all then the vector will be all |
4479 zeroes. */ | 4490 zeroes. */ |
4480 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION) | 4491 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION) |
4481 { | 4492 { |
4482 tree indx_before_incr, indx_after_incr; | 4493 tree indx_before_incr, indx_after_incr; |
4483 int nunits_out = TYPE_VECTOR_SUBPARTS (vectype); | 4494 poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype); |
4484 int k; | 4495 |
4485 | 4496 gimple *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info)->stmt; |
4486 gimple *vec_stmt = STMT_VINFO_VEC_STMT (stmt_info); | |
4487 gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR); | 4497 gcc_assert (gimple_assign_rhs_code (vec_stmt) == VEC_COND_EXPR); |
4488 | 4498 |
4489 int scalar_precision | 4499 int scalar_precision |
4490 = GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (vectype))); | 4500 = GET_MODE_PRECISION (SCALAR_TYPE_MODE (TREE_TYPE (vectype))); |
4491 tree cr_index_scalar_type = make_unsigned_type (scalar_precision); | 4501 tree cr_index_scalar_type = make_unsigned_type (scalar_precision); |
4495 /* First we create a simple vector induction variable which starts | 4505 /* First we create a simple vector induction variable which starts |
4496 with the values {1,2,3,...} (SERIES_VECT) and increments by the | 4506 with the values {1,2,3,...} (SERIES_VECT) and increments by the |
4497 vector size (STEP). */ | 4507 vector size (STEP). */ |
4498 | 4508 |
4499 /* Create a {1,2,3,...} vector. */ | 4509 /* Create a {1,2,3,...} vector. */ |
4500 auto_vec<tree, 32> vtemp (nunits_out); | 4510 tree series_vect = build_index_vector (cr_index_vector_type, 1, 1); |
4501 for (k = 0; k < nunits_out; ++k) | |
4502 vtemp.quick_push (build_int_cst (cr_index_scalar_type, k + 1)); | |
4503 tree series_vect = build_vector (cr_index_vector_type, vtemp); | |
4504 | 4511 |
4505 /* Create a vector of the step value. */ | 4512 /* Create a vector of the step value. */ |
4506 tree step = build_int_cst (cr_index_scalar_type, nunits_out); | 4513 tree step = build_int_cst (cr_index_scalar_type, nunits_out); |
4507 tree vec_step = build_vector_from_val (cr_index_vector_type, step); | 4514 tree vec_step = build_vector_from_val (cr_index_vector_type, step); |
4508 | 4515 |
4521 tree vec_zero = build_vector_from_val (cr_index_vector_type, zero); | 4528 tree vec_zero = build_vector_from_val (cr_index_vector_type, zero); |
4522 | 4529 |
4523 /* Create a vector phi node. */ | 4530 /* Create a vector phi node. */ |
4524 tree new_phi_tree = make_ssa_name (cr_index_vector_type); | 4531 tree new_phi_tree = make_ssa_name (cr_index_vector_type); |
4525 new_phi = create_phi_node (new_phi_tree, loop->header); | 4532 new_phi = create_phi_node (new_phi_tree, loop->header); |
4526 set_vinfo_for_stmt (new_phi, | 4533 loop_vinfo->add_stmt (new_phi); |
4527 new_stmt_vec_info (new_phi, loop_vinfo)); | |
4528 add_phi_arg (as_a <gphi *> (new_phi), vec_zero, | 4534 add_phi_arg (as_a <gphi *> (new_phi), vec_zero, |
4529 loop_preheader_edge (loop), UNKNOWN_LOCATION); | 4535 loop_preheader_edge (loop), UNKNOWN_LOCATION); |
4530 | 4536 |
4531 /* Now take the condition from the loops original cond_expr | 4537 /* Now take the condition from the loops original cond_expr |
4532 (VEC_STMT) and produce a new cond_expr (INDEX_COND_EXPR) which for | 4538 (VEC_STMT) and produce a new cond_expr (INDEX_COND_EXPR) which for |
4547 new_phi_tree); | 4553 new_phi_tree); |
4548 induction_index = make_ssa_name (cr_index_vector_type); | 4554 induction_index = make_ssa_name (cr_index_vector_type); |
4549 gimple *index_condition = gimple_build_assign (induction_index, | 4555 gimple *index_condition = gimple_build_assign (induction_index, |
4550 index_cond_expr); | 4556 index_cond_expr); |
4551 gsi_insert_before (&incr_gsi, index_condition, GSI_SAME_STMT); | 4557 gsi_insert_before (&incr_gsi, index_condition, GSI_SAME_STMT); |
4552 stmt_vec_info index_vec_info = new_stmt_vec_info (index_condition, | 4558 stmt_vec_info index_vec_info = loop_vinfo->add_stmt (index_condition); |
4553 loop_vinfo); | |
4554 STMT_VINFO_VECTYPE (index_vec_info) = cr_index_vector_type; | 4559 STMT_VINFO_VECTYPE (index_vec_info) = cr_index_vector_type; |
4555 set_vinfo_for_stmt (index_condition, index_vec_info); | |
4556 | 4560 |
4557 /* Update the phi with the vec cond. */ | 4561 /* Update the phi with the vec cond. */ |
4558 add_phi_arg (as_a <gphi *> (new_phi), induction_index, | 4562 add_phi_arg (as_a <gphi *> (new_phi), induction_index, |
4559 loop_latch_edge (loop), UNKNOWN_LOCATION); | 4563 loop_latch_edge (loop), UNKNOWN_LOCATION); |
4560 } | 4564 } |
4567 step 1: compute the scalar result in a vector (v_out2) | 4571 step 1: compute the scalar result in a vector (v_out2) |
4568 step 2: extract the scalar result (s_out3) from the vector (v_out2) | 4572 step 2: extract the scalar result (s_out3) from the vector (v_out2) |
4569 step 3: adjust the scalar result (s_out3) if needed. | 4573 step 3: adjust the scalar result (s_out3) if needed. |
4570 | 4574 |
4571 Step 1 can be accomplished using one the following three schemes: | 4575 Step 1 can be accomplished using one the following three schemes: |
4572 (scheme 1) using reduc_code, if available. | 4576 (scheme 1) using reduc_fn, if available. |
4573 (scheme 2) using whole-vector shifts, if available. | 4577 (scheme 2) using whole-vector shifts, if available. |
4574 (scheme 3) using a scalar loop. In this case steps 1+2 above are | 4578 (scheme 3) using a scalar loop. In this case steps 1+2 above are |
4575 combined. | 4579 combined. |
4576 | 4580 |
4577 The overall epilog code looks like this: | 4581 The overall epilog code looks like this: |
4597 { | 4601 { |
4598 for (j = 0; j < ncopies; j++) | 4602 for (j = 0; j < ncopies; j++) |
4599 { | 4603 { |
4600 tree new_def = copy_ssa_name (def); | 4604 tree new_def = copy_ssa_name (def); |
4601 phi = create_phi_node (new_def, exit_bb); | 4605 phi = create_phi_node (new_def, exit_bb); |
4602 set_vinfo_for_stmt (phi, new_stmt_vec_info (phi, loop_vinfo)); | 4606 stmt_vec_info phi_info = loop_vinfo->add_stmt (phi); |
4603 if (j == 0) | 4607 if (j == 0) |
4604 new_phis.quick_push (phi); | 4608 new_phis.quick_push (phi); |
4605 else | 4609 else |
4606 { | 4610 { |
4607 def = vect_get_vec_def_for_stmt_copy (dt, def); | 4611 def = vect_get_vec_def_for_stmt_copy (loop_vinfo, def); |
4608 STMT_VINFO_RELATED_STMT (prev_phi_info) = phi; | 4612 STMT_VINFO_RELATED_STMT (prev_phi_info) = phi_info; |
4609 } | 4613 } |
4610 | 4614 |
4611 SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def); | 4615 SET_PHI_ARG_DEF (phi, single_exit (loop)->dest_idx, def); |
4612 prev_phi_info = vinfo_for_stmt (phi); | 4616 prev_phi_info = phi_info; |
4613 } | 4617 } |
4614 } | 4618 } |
4615 | 4619 |
4616 /* The epilogue is created for the outer-loop, i.e., for the loop being | 4620 /* The epilogue is created for the outer-loop, i.e., for the loop being |
4617 vectorized. Create exit phis for the outer loop. */ | 4621 vectorized. Create exit phis for the outer loop. */ |
4620 loop = outer_loop; | 4624 loop = outer_loop; |
4621 exit_bb = single_exit (loop)->dest; | 4625 exit_bb = single_exit (loop)->dest; |
4622 inner_phis.create (vect_defs.length ()); | 4626 inner_phis.create (vect_defs.length ()); |
4623 FOR_EACH_VEC_ELT (new_phis, i, phi) | 4627 FOR_EACH_VEC_ELT (new_phis, i, phi) |
4624 { | 4628 { |
4629 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (phi); | |
4625 tree new_result = copy_ssa_name (PHI_RESULT (phi)); | 4630 tree new_result = copy_ssa_name (PHI_RESULT (phi)); |
4626 gphi *outer_phi = create_phi_node (new_result, exit_bb); | 4631 gphi *outer_phi = create_phi_node (new_result, exit_bb); |
4627 SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx, | 4632 SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx, |
4628 PHI_RESULT (phi)); | 4633 PHI_RESULT (phi)); |
4629 set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi, | 4634 prev_phi_info = loop_vinfo->add_stmt (outer_phi); |
4630 loop_vinfo)); | 4635 inner_phis.quick_push (phi_info); |
4631 inner_phis.quick_push (phi); | |
4632 new_phis[i] = outer_phi; | 4636 new_phis[i] = outer_phi; |
4633 prev_phi_info = vinfo_for_stmt (outer_phi); | 4637 while (STMT_VINFO_RELATED_STMT (phi_info)) |
4634 while (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi))) | |
4635 { | 4638 { |
4636 phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (phi)); | 4639 phi_info = STMT_VINFO_RELATED_STMT (phi_info); |
4637 new_result = copy_ssa_name (PHI_RESULT (phi)); | 4640 new_result = copy_ssa_name (PHI_RESULT (phi_info->stmt)); |
4638 outer_phi = create_phi_node (new_result, exit_bb); | 4641 outer_phi = create_phi_node (new_result, exit_bb); |
4639 SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx, | 4642 SET_PHI_ARG_DEF (outer_phi, single_exit (loop)->dest_idx, |
4640 PHI_RESULT (phi)); | 4643 PHI_RESULT (phi_info->stmt)); |
4641 set_vinfo_for_stmt (outer_phi, new_stmt_vec_info (outer_phi, | 4644 stmt_vec_info outer_phi_info = loop_vinfo->add_stmt (outer_phi); |
4642 loop_vinfo)); | 4645 STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi_info; |
4643 STMT_VINFO_RELATED_STMT (prev_phi_info) = outer_phi; | 4646 prev_phi_info = outer_phi_info; |
4644 prev_phi_info = vinfo_for_stmt (outer_phi); | |
4645 } | 4647 } |
4646 } | 4648 } |
4647 } | 4649 } |
4648 | 4650 |
4649 exit_gsi = gsi_after_labels (exit_bb); | 4651 exit_gsi = gsi_after_labels (exit_bb); |
4650 | 4652 |
4651 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3 | 4653 /* 2.2 Get the relevant tree-code to use in the epilog for schemes 2,3 |
4652 (i.e. when reduc_code is not available) and in the final adjustment | 4654 (i.e. when reduc_fn is not available) and in the final adjustment |
4653 code (if needed). Also get the original scalar reduction variable as | 4655 code (if needed). Also get the original scalar reduction variable as |
4654 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it | 4656 defined in the loop. In case STMT is a "pattern-stmt" (i.e. - it |
4655 represents a reduction pattern), the tree-code and scalar-def are | 4657 represents a reduction pattern), the tree-code and scalar-def are |
4656 taken from the original stmt that the pattern-stmt (STMT) replaces. | 4658 taken from the original stmt that the pattern-stmt (STMT) replaces. |
4657 Otherwise (it is a regular reduction) - the tree-code and scalar-def | 4659 Otherwise (it is a regular reduction) - the tree-code and scalar-def |
4658 are taken from STMT. */ | 4660 are taken from STMT. */ |
4659 | 4661 |
4660 orig_stmt = STMT_VINFO_RELATED_STMT (stmt_info); | 4662 stmt_vec_info orig_stmt_info = vect_orig_stmt (stmt_info); |
4661 if (!orig_stmt) | 4663 if (orig_stmt_info != stmt_info) |
4662 { | |
4663 /* Regular reduction */ | |
4664 orig_stmt = stmt; | |
4665 } | |
4666 else | |
4667 { | 4664 { |
4668 /* Reduction pattern */ | 4665 /* Reduction pattern */ |
4669 stmt_vec_info stmt_vinfo = vinfo_for_stmt (orig_stmt); | 4666 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info)); |
4670 gcc_assert (STMT_VINFO_IN_PATTERN_P (stmt_vinfo)); | 4667 gcc_assert (STMT_VINFO_RELATED_STMT (orig_stmt_info) == stmt_info); |
4671 gcc_assert (STMT_VINFO_RELATED_STMT (stmt_vinfo) == stmt); | 4668 } |
4672 } | 4669 |
4673 | 4670 code = gimple_assign_rhs_code (orig_stmt_info->stmt); |
4674 code = gimple_assign_rhs_code (orig_stmt); | |
4675 /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore, | 4671 /* For MINUS_EXPR the initial vector is [init_val,0,...,0], therefore, |
4676 partial results are added and not subtracted. */ | 4672 partial results are added and not subtracted. */ |
4677 if (code == MINUS_EXPR) | 4673 if (code == MINUS_EXPR) |
4678 code = PLUS_EXPR; | 4674 code = PLUS_EXPR; |
4679 | 4675 |
4680 scalar_dest = gimple_assign_lhs (orig_stmt); | 4676 scalar_dest = gimple_assign_lhs (orig_stmt_info->stmt); |
4681 scalar_type = TREE_TYPE (scalar_dest); | 4677 scalar_type = TREE_TYPE (scalar_dest); |
4682 scalar_results.create (group_size); | 4678 scalar_results.create (group_size); |
4683 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL); | 4679 new_scalar_dest = vect_create_destination_var (scalar_dest, NULL); |
4684 bitsize = TYPE_SIZE (scalar_type); | 4680 bitsize = TYPE_SIZE (scalar_type); |
4685 | 4681 |
4695 /* SLP reduction without reduction chain, e.g., | 4691 /* SLP reduction without reduction chain, e.g., |
4696 # a1 = phi <a2, a0> | 4692 # a1 = phi <a2, a0> |
4697 # b1 = phi <b2, b0> | 4693 # b1 = phi <b2, b0> |
4698 a2 = operation (a1) | 4694 a2 = operation (a1) |
4699 b2 = operation (b1) */ | 4695 b2 = operation (b1) */ |
4700 slp_reduc = (slp_node && !GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))); | 4696 slp_reduc = (slp_node && !REDUC_GROUP_FIRST_ELEMENT (stmt_info)); |
4697 | |
4698 /* True if we should implement SLP_REDUC using native reduction operations | |
4699 instead of scalar operations. */ | |
4700 direct_slp_reduc = (reduc_fn != IFN_LAST | |
4701 && slp_reduc | |
4702 && !TYPE_VECTOR_SUBPARTS (vectype).is_constant ()); | |
4701 | 4703 |
4702 /* In case of reduction chain, e.g., | 4704 /* In case of reduction chain, e.g., |
4703 # a1 = phi <a3, a0> | 4705 # a1 = phi <a3, a0> |
4704 a2 = operation (a1) | 4706 a2 = operation (a1) |
4705 a3 = operation (a2), | 4707 a3 = operation (a2), |
4706 | 4708 |
4707 we may end up with more than one vector result. Here we reduce them to | 4709 we may end up with more than one vector result. Here we reduce them to |
4708 one vector. */ | 4710 one vector. */ |
4709 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))) | 4711 if (REDUC_GROUP_FIRST_ELEMENT (stmt_info) || direct_slp_reduc) |
4710 { | 4712 { |
4711 tree first_vect = PHI_RESULT (new_phis[0]); | 4713 tree first_vect = PHI_RESULT (new_phis[0]); |
4712 gassign *new_vec_stmt = NULL; | 4714 gassign *new_vec_stmt = NULL; |
4713 vec_dest = vect_create_destination_var (scalar_dest, vectype); | 4715 vec_dest = vect_create_destination_var (scalar_dest, vectype); |
4714 for (k = 1; k < new_phis.length (); k++) | 4716 for (k = 1; k < new_phis.length (); k++) |
4734 { | 4736 { |
4735 gcc_assert (new_phis.length () == 1); | 4737 gcc_assert (new_phis.length () == 1); |
4736 tree first_vect = PHI_RESULT (new_phis[0]); | 4738 tree first_vect = PHI_RESULT (new_phis[0]); |
4737 gassign *new_vec_stmt = NULL; | 4739 gassign *new_vec_stmt = NULL; |
4738 vec_dest = vect_create_destination_var (scalar_dest, vectype); | 4740 vec_dest = vect_create_destination_var (scalar_dest, vectype); |
4739 gimple *next_phi = new_phis[0]; | 4741 stmt_vec_info next_phi_info = loop_vinfo->lookup_stmt (new_phis[0]); |
4740 for (int k = 1; k < ncopies; ++k) | 4742 for (int k = 1; k < ncopies; ++k) |
4741 { | 4743 { |
4742 next_phi = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (next_phi)); | 4744 next_phi_info = STMT_VINFO_RELATED_STMT (next_phi_info); |
4743 tree second_vect = PHI_RESULT (next_phi); | 4745 tree second_vect = PHI_RESULT (next_phi_info->stmt); |
4744 tree tem = make_ssa_name (vec_dest, new_vec_stmt); | 4746 tree tem = make_ssa_name (vec_dest, new_vec_stmt); |
4745 new_vec_stmt = gimple_build_assign (tem, code, | 4747 new_vec_stmt = gimple_build_assign (tem, code, |
4746 first_vect, second_vect); | 4748 first_vect, second_vect); |
4747 gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT); | 4749 gsi_insert_before (&exit_gsi, new_vec_stmt, GSI_SAME_STMT); |
4748 first_vect = tem; | 4750 first_vect = tem; |
4753 } | 4755 } |
4754 else | 4756 else |
4755 new_phi_result = PHI_RESULT (new_phis[0]); | 4757 new_phi_result = PHI_RESULT (new_phis[0]); |
4756 | 4758 |
4757 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION | 4759 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION |
4758 && reduc_code != ERROR_MARK) | 4760 && reduc_fn != IFN_LAST) |
4759 { | 4761 { |
4760 /* For condition reductions, we have a vector (NEW_PHI_RESULT) containing | 4762 /* For condition reductions, we have a vector (NEW_PHI_RESULT) containing |
4761 various data values where the condition matched and another vector | 4763 various data values where the condition matched and another vector |
4762 (INDUCTION_INDEX) containing all the indexes of those matches. We | 4764 (INDUCTION_INDEX) containing all the indexes of those matches. We |
4763 need to extract the last matching index (which will be the index with | 4765 need to extract the last matching index (which will be the index with |
4791 gimple *zero_vec_stmt = gimple_build_assign (zero_vec, zero_vec_rhs); | 4793 gimple *zero_vec_stmt = gimple_build_assign (zero_vec, zero_vec_rhs); |
4792 gsi_insert_before (&exit_gsi, zero_vec_stmt, GSI_SAME_STMT); | 4794 gsi_insert_before (&exit_gsi, zero_vec_stmt, GSI_SAME_STMT); |
4793 | 4795 |
4794 /* Find maximum value from the vector of found indexes. */ | 4796 /* Find maximum value from the vector of found indexes. */ |
4795 tree max_index = make_ssa_name (index_scalar_type); | 4797 tree max_index = make_ssa_name (index_scalar_type); |
4796 gimple *max_index_stmt = gimple_build_assign (max_index, REDUC_MAX_EXPR, | 4798 gcall *max_index_stmt = gimple_build_call_internal (IFN_REDUC_MAX, |
4797 induction_index); | 4799 1, induction_index); |
4800 gimple_call_set_lhs (max_index_stmt, max_index); | |
4798 gsi_insert_before (&exit_gsi, max_index_stmt, GSI_SAME_STMT); | 4801 gsi_insert_before (&exit_gsi, max_index_stmt, GSI_SAME_STMT); |
4799 | 4802 |
4800 /* Vector of {max_index, max_index, max_index,...}. */ | 4803 /* Vector of {max_index, max_index, max_index,...}. */ |
4801 tree max_index_vec = make_ssa_name (index_vec_type); | 4804 tree max_index_vec = make_ssa_name (index_vec_type); |
4802 tree max_index_vec_rhs = build_vector_from_val (index_vec_type, | 4805 tree max_index_vec_rhs = build_vector_from_val (index_vec_type, |
4847 vec_cond_cast_rhs); | 4850 vec_cond_cast_rhs); |
4848 gsi_insert_before (&exit_gsi, vec_cond_cast_stmt, GSI_SAME_STMT); | 4851 gsi_insert_before (&exit_gsi, vec_cond_cast_stmt, GSI_SAME_STMT); |
4849 | 4852 |
4850 /* Reduce down to a scalar value. */ | 4853 /* Reduce down to a scalar value. */ |
4851 tree data_reduc = make_ssa_name (scalar_type_unsigned); | 4854 tree data_reduc = make_ssa_name (scalar_type_unsigned); |
4852 optab ot = optab_for_tree_code (REDUC_MAX_EXPR, vectype_unsigned, | 4855 gcall *data_reduc_stmt = gimple_build_call_internal (IFN_REDUC_MAX, |
4853 optab_default); | 4856 1, vec_cond_cast); |
4854 gcc_assert (optab_handler (ot, TYPE_MODE (vectype_unsigned)) | 4857 gimple_call_set_lhs (data_reduc_stmt, data_reduc); |
4855 != CODE_FOR_nothing); | |
4856 gimple *data_reduc_stmt = gimple_build_assign (data_reduc, | |
4857 REDUC_MAX_EXPR, | |
4858 vec_cond_cast); | |
4859 gsi_insert_before (&exit_gsi, data_reduc_stmt, GSI_SAME_STMT); | 4858 gsi_insert_before (&exit_gsi, data_reduc_stmt, GSI_SAME_STMT); |
4860 | 4859 |
4861 /* Convert the reduced value back to the result type and set as the | 4860 /* Convert the reduced value back to the result type and set as the |
4862 result. */ | 4861 result. */ |
4863 gimple_seq stmts = NULL; | 4862 gimple_seq stmts = NULL; |
4865 data_reduc); | 4864 data_reduc); |
4866 gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); | 4865 gsi_insert_seq_before (&exit_gsi, stmts, GSI_SAME_STMT); |
4867 scalar_results.safe_push (new_temp); | 4866 scalar_results.safe_push (new_temp); |
4868 } | 4867 } |
4869 else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION | 4868 else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION |
4870 && reduc_code == ERROR_MARK) | 4869 && reduc_fn == IFN_LAST) |
4871 { | 4870 { |
4872 /* Condition redution without supported REDUC_MAX_EXPR. Generate | 4871 /* Condition reduction without supported IFN_REDUC_MAX. Generate |
4873 idx = 0; | 4872 idx = 0; |
4874 idx_val = induction_index[0]; | 4873 idx_val = induction_index[0]; |
4875 val = data_reduc[0]; | 4874 val = data_reduc[0]; |
4876 for (idx = 0, val = init, i = 0; i < nelts; ++i) | 4875 for (idx = 0, val = init, i = 0; i < nelts; ++i) |
4877 if (induction_index[i] > idx_val) | 4876 if (induction_index[i] > idx_val) |
4879 return val; */ | 4878 return val; */ |
4880 | 4879 |
4881 tree data_eltype = TREE_TYPE (TREE_TYPE (new_phi_result)); | 4880 tree data_eltype = TREE_TYPE (TREE_TYPE (new_phi_result)); |
4882 tree idx_eltype = TREE_TYPE (TREE_TYPE (induction_index)); | 4881 tree idx_eltype = TREE_TYPE (TREE_TYPE (induction_index)); |
4883 unsigned HOST_WIDE_INT el_size = tree_to_uhwi (TYPE_SIZE (idx_eltype)); | 4882 unsigned HOST_WIDE_INT el_size = tree_to_uhwi (TYPE_SIZE (idx_eltype)); |
4884 unsigned HOST_WIDE_INT v_size | 4883 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (TREE_TYPE (induction_index)); |
4885 = el_size * TYPE_VECTOR_SUBPARTS (TREE_TYPE (induction_index)); | 4884 /* Enforced by vectorizable_reduction, which ensures we have target |
4885 support before allowing a conditional reduction on variable-length | |
4886 vectors. */ | |
4887 unsigned HOST_WIDE_INT v_size = el_size * nunits.to_constant (); | |
4886 tree idx_val = NULL_TREE, val = NULL_TREE; | 4888 tree idx_val = NULL_TREE, val = NULL_TREE; |
4887 for (unsigned HOST_WIDE_INT off = 0; off < v_size; off += el_size) | 4889 for (unsigned HOST_WIDE_INT off = 0; off < v_size; off += el_size) |
4888 { | 4890 { |
4889 tree old_idx_val = idx_val; | 4891 tree old_idx_val = idx_val; |
4890 tree old_val = val; | 4892 tree old_val = val; |
4937 } | 4939 } |
4938 | 4940 |
4939 /* 2.3 Create the reduction code, using one of the three schemes described | 4941 /* 2.3 Create the reduction code, using one of the three schemes described |
4940 above. In SLP we simply need to extract all the elements from the | 4942 above. In SLP we simply need to extract all the elements from the |
4941 vector (without reducing them), so we use scalar shifts. */ | 4943 vector (without reducing them), so we use scalar shifts. */ |
4942 else if (reduc_code != ERROR_MARK && !slp_reduc) | 4944 else if (reduc_fn != IFN_LAST && !slp_reduc) |
4943 { | 4945 { |
4944 tree tmp; | 4946 tree tmp; |
4945 tree vec_elem_type; | 4947 tree vec_elem_type; |
4946 | 4948 |
4947 /* Case 1: Create: | 4949 /* Case 1: Create: |
4952 "Reduce using direct vector reduction.\n"); | 4954 "Reduce using direct vector reduction.\n"); |
4953 | 4955 |
4954 vec_elem_type = TREE_TYPE (TREE_TYPE (new_phi_result)); | 4956 vec_elem_type = TREE_TYPE (TREE_TYPE (new_phi_result)); |
4955 if (!useless_type_conversion_p (scalar_type, vec_elem_type)) | 4957 if (!useless_type_conversion_p (scalar_type, vec_elem_type)) |
4956 { | 4958 { |
4957 tree tmp_dest = | 4959 tree tmp_dest |
4958 vect_create_destination_var (scalar_dest, vec_elem_type); | 4960 = vect_create_destination_var (scalar_dest, vec_elem_type); |
4959 tmp = build1 (reduc_code, vec_elem_type, new_phi_result); | 4961 epilog_stmt = gimple_build_call_internal (reduc_fn, 1, |
4960 epilog_stmt = gimple_build_assign (tmp_dest, tmp); | 4962 new_phi_result); |
4963 gimple_set_lhs (epilog_stmt, tmp_dest); | |
4961 new_temp = make_ssa_name (tmp_dest, epilog_stmt); | 4964 new_temp = make_ssa_name (tmp_dest, epilog_stmt); |
4962 gimple_assign_set_lhs (epilog_stmt, new_temp); | 4965 gimple_set_lhs (epilog_stmt, new_temp); |
4963 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); | 4966 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); |
4964 | 4967 |
4965 tmp = build1 (NOP_EXPR, scalar_type, new_temp); | 4968 epilog_stmt = gimple_build_assign (new_scalar_dest, NOP_EXPR, |
4969 new_temp); | |
4966 } | 4970 } |
4967 else | 4971 else |
4968 tmp = build1 (reduc_code, scalar_type, new_phi_result); | 4972 { |
4969 | 4973 epilog_stmt = gimple_build_call_internal (reduc_fn, 1, |
4970 epilog_stmt = gimple_build_assign (new_scalar_dest, tmp); | 4974 new_phi_result); |
4975 gimple_set_lhs (epilog_stmt, new_scalar_dest); | |
4976 } | |
4977 | |
4971 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); | 4978 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); |
4972 gimple_assign_set_lhs (epilog_stmt, new_temp); | 4979 gimple_set_lhs (epilog_stmt, new_temp); |
4973 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); | 4980 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); |
4974 | 4981 |
4975 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) | 4982 if ((STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) |
4976 == INTEGER_INDUC_COND_REDUCTION) | 4983 == INTEGER_INDUC_COND_REDUCTION) |
4977 { | 4984 && !operand_equal_p (initial_def, induc_val, 0)) |
4978 /* Earlier we set the initial value to be zero. Check the result | 4985 { |
4979 and if it is zero then replace with the original initial | 4986 /* Earlier we set the initial value to be a vector if induc_val |
4980 value. */ | 4987 values. Check the result and if it is induc_val then replace |
4981 tree zero = build_zero_cst (scalar_type); | 4988 with the original initial value, unless induc_val is |
4982 tree zcompare = build2 (EQ_EXPR, boolean_type_node, new_temp, zero); | 4989 the same as initial_def already. */ |
4990 tree zcompare = build2 (EQ_EXPR, boolean_type_node, new_temp, | |
4991 induc_val); | |
4983 | 4992 |
4984 tmp = make_ssa_name (new_scalar_dest); | 4993 tmp = make_ssa_name (new_scalar_dest); |
4985 epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare, | 4994 epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare, |
4986 initial_def, new_temp); | 4995 initial_def, new_temp); |
4987 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); | 4996 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); |
4988 new_temp = tmp; | 4997 new_temp = tmp; |
4989 } | 4998 } |
4990 | 4999 |
4991 scalar_results.safe_push (new_temp); | 5000 scalar_results.safe_push (new_temp); |
4992 } | 5001 } |
5002 else if (direct_slp_reduc) | |
5003 { | |
5004 /* Here we create one vector for each of the REDUC_GROUP_SIZE results, | |
5005 with the elements for other SLP statements replaced with the | |
5006 neutral value. We can then do a normal reduction on each vector. */ | |
5007 | |
5008 /* Enforced by vectorizable_reduction. */ | |
5009 gcc_assert (new_phis.length () == 1); | |
5010 gcc_assert (pow2p_hwi (group_size)); | |
5011 | |
5012 slp_tree orig_phis_slp_node = slp_node_instance->reduc_phis; | |
5013 vec<stmt_vec_info> orig_phis | |
5014 = SLP_TREE_SCALAR_STMTS (orig_phis_slp_node); | |
5015 gimple_seq seq = NULL; | |
5016 | |
5017 /* Build a vector {0, 1, 2, ...}, with the same number of elements | |
5018 and the same element size as VECTYPE. */ | |
5019 tree index = build_index_vector (vectype, 0, 1); | |
5020 tree index_type = TREE_TYPE (index); | |
5021 tree index_elt_type = TREE_TYPE (index_type); | |
5022 tree mask_type = build_same_sized_truth_vector_type (index_type); | |
5023 | |
5024 /* Create a vector that, for each element, identifies which of | |
5025 the REDUC_GROUP_SIZE results should use it. */ | |
5026 tree index_mask = build_int_cst (index_elt_type, group_size - 1); | |
5027 index = gimple_build (&seq, BIT_AND_EXPR, index_type, index, | |
5028 build_vector_from_val (index_type, index_mask)); | |
5029 | |
5030 /* Get a neutral vector value. This is simply a splat of the neutral | |
5031 scalar value if we have one, otherwise the initial scalar value | |
5032 is itself a neutral value. */ | |
5033 tree vector_identity = NULL_TREE; | |
5034 if (neutral_op) | |
5035 vector_identity = gimple_build_vector_from_val (&seq, vectype, | |
5036 neutral_op); | |
5037 for (unsigned int i = 0; i < group_size; ++i) | |
5038 { | |
5039 /* If there's no univeral neutral value, we can use the | |
5040 initial scalar value from the original PHI. This is used | |
5041 for MIN and MAX reduction, for example. */ | |
5042 if (!neutral_op) | |
5043 { | |
5044 tree scalar_value | |
5045 = PHI_ARG_DEF_FROM_EDGE (orig_phis[i]->stmt, | |
5046 loop_preheader_edge (loop)); | |
5047 vector_identity = gimple_build_vector_from_val (&seq, vectype, | |
5048 scalar_value); | |
5049 } | |
5050 | |
5051 /* Calculate the equivalent of: | |
5052 | |
5053 sel[j] = (index[j] == i); | |
5054 | |
5055 which selects the elements of NEW_PHI_RESULT that should | |
5056 be included in the result. */ | |
5057 tree compare_val = build_int_cst (index_elt_type, i); | |
5058 compare_val = build_vector_from_val (index_type, compare_val); | |
5059 tree sel = gimple_build (&seq, EQ_EXPR, mask_type, | |
5060 index, compare_val); | |
5061 | |
5062 /* Calculate the equivalent of: | |
5063 | |
5064 vec = seq ? new_phi_result : vector_identity; | |
5065 | |
5066 VEC is now suitable for a full vector reduction. */ | |
5067 tree vec = gimple_build (&seq, VEC_COND_EXPR, vectype, | |
5068 sel, new_phi_result, vector_identity); | |
5069 | |
5070 /* Do the reduction and convert it to the appropriate type. */ | |
5071 tree scalar = gimple_build (&seq, as_combined_fn (reduc_fn), | |
5072 TREE_TYPE (vectype), vec); | |
5073 scalar = gimple_convert (&seq, scalar_type, scalar); | |
5074 scalar_results.safe_push (scalar); | |
5075 } | |
5076 gsi_insert_seq_before (&exit_gsi, seq, GSI_SAME_STMT); | |
5077 } | |
4993 else | 5078 else |
4994 { | 5079 { |
4995 bool reduce_with_shift = have_whole_vector_shift (mode); | 5080 bool reduce_with_shift; |
4996 int element_bitsize = tree_to_uhwi (bitsize); | |
4997 int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype)); | |
4998 tree vec_temp; | 5081 tree vec_temp; |
4999 | 5082 |
5000 /* COND reductions all do the final reduction with MAX_EXPR. */ | 5083 /* COND reductions all do the final reduction with MAX_EXPR |
5084 or MIN_EXPR. */ | |
5001 if (code == COND_EXPR) | 5085 if (code == COND_EXPR) |
5002 code = MAX_EXPR; | 5086 { |
5003 | 5087 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) |
5004 /* Regardless of whether we have a whole vector shift, if we're | 5088 == INTEGER_INDUC_COND_REDUCTION) |
5005 emulating the operation via tree-vect-generic, we don't want | 5089 code = induc_code; |
5006 to use it. Only the first round of the reduction is likely | 5090 else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) |
5007 to still be profitable via emulation. */ | 5091 == CONST_COND_REDUCTION) |
5008 /* ??? It might be better to emit a reduction tree code here, so that | 5092 code = STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info); |
5009 tree-vect-generic can expand the first round via bit tricks. */ | 5093 else |
5010 if (!VECTOR_MODE_P (mode)) | 5094 code = MAX_EXPR; |
5011 reduce_with_shift = false; | 5095 } |
5096 | |
5097 /* See if the target wants to do the final (shift) reduction | |
5098 in a vector mode of smaller size and first reduce upper/lower | |
5099 halves against each other. */ | |
5100 enum machine_mode mode1 = mode; | |
5101 tree vectype1 = vectype; | |
5102 unsigned sz = tree_to_uhwi (TYPE_SIZE_UNIT (vectype)); | |
5103 unsigned sz1 = sz; | |
5104 if (!slp_reduc | |
5105 && (mode1 = targetm.vectorize.split_reduction (mode)) != mode) | |
5106 sz1 = GET_MODE_SIZE (mode1).to_constant (); | |
5107 | |
5108 vectype1 = get_vectype_for_scalar_type_and_size (scalar_type, sz1); | |
5109 reduce_with_shift = have_whole_vector_shift (mode1); | |
5110 if (!VECTOR_MODE_P (mode1)) | |
5111 reduce_with_shift = false; | |
5012 else | 5112 else |
5013 { | 5113 { |
5014 optab optab = optab_for_tree_code (code, vectype, optab_default); | 5114 optab optab = optab_for_tree_code (code, vectype1, optab_default); |
5015 if (optab_handler (optab, mode) == CODE_FOR_nothing) | 5115 if (optab_handler (optab, mode1) == CODE_FOR_nothing) |
5016 reduce_with_shift = false; | 5116 reduce_with_shift = false; |
5017 } | 5117 } |
5118 | |
5119 /* First reduce the vector to the desired vector size we should | |
5120 do shift reduction on by combining upper and lower halves. */ | |
5121 new_temp = new_phi_result; | |
5122 while (sz > sz1) | |
5123 { | |
5124 gcc_assert (!slp_reduc); | |
5125 sz /= 2; | |
5126 vectype1 = get_vectype_for_scalar_type_and_size (scalar_type, sz); | |
5127 | |
5128 /* The target has to make sure we support lowpart/highpart | |
5129 extraction, either via direct vector extract or through | |
5130 an integer mode punning. */ | |
5131 tree dst1, dst2; | |
5132 if (convert_optab_handler (vec_extract_optab, | |
5133 TYPE_MODE (TREE_TYPE (new_temp)), | |
5134 TYPE_MODE (vectype1)) | |
5135 != CODE_FOR_nothing) | |
5136 { | |
5137 /* Extract sub-vectors directly once vec_extract becomes | |
5138 a conversion optab. */ | |
5139 dst1 = make_ssa_name (vectype1); | |
5140 epilog_stmt | |
5141 = gimple_build_assign (dst1, BIT_FIELD_REF, | |
5142 build3 (BIT_FIELD_REF, vectype1, | |
5143 new_temp, TYPE_SIZE (vectype1), | |
5144 bitsize_int (0))); | |
5145 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); | |
5146 dst2 = make_ssa_name (vectype1); | |
5147 epilog_stmt | |
5148 = gimple_build_assign (dst2, BIT_FIELD_REF, | |
5149 build3 (BIT_FIELD_REF, vectype1, | |
5150 new_temp, TYPE_SIZE (vectype1), | |
5151 bitsize_int (sz * BITS_PER_UNIT))); | |
5152 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); | |
5153 } | |
5154 else | |
5155 { | |
5156 /* Extract via punning to appropriately sized integer mode | |
5157 vector. */ | |
5158 tree eltype = build_nonstandard_integer_type (sz * BITS_PER_UNIT, | |
5159 1); | |
5160 tree etype = build_vector_type (eltype, 2); | |
5161 gcc_assert (convert_optab_handler (vec_extract_optab, | |
5162 TYPE_MODE (etype), | |
5163 TYPE_MODE (eltype)) | |
5164 != CODE_FOR_nothing); | |
5165 tree tem = make_ssa_name (etype); | |
5166 epilog_stmt = gimple_build_assign (tem, VIEW_CONVERT_EXPR, | |
5167 build1 (VIEW_CONVERT_EXPR, | |
5168 etype, new_temp)); | |
5169 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); | |
5170 new_temp = tem; | |
5171 tem = make_ssa_name (eltype); | |
5172 epilog_stmt | |
5173 = gimple_build_assign (tem, BIT_FIELD_REF, | |
5174 build3 (BIT_FIELD_REF, eltype, | |
5175 new_temp, TYPE_SIZE (eltype), | |
5176 bitsize_int (0))); | |
5177 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); | |
5178 dst1 = make_ssa_name (vectype1); | |
5179 epilog_stmt = gimple_build_assign (dst1, VIEW_CONVERT_EXPR, | |
5180 build1 (VIEW_CONVERT_EXPR, | |
5181 vectype1, tem)); | |
5182 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); | |
5183 tem = make_ssa_name (eltype); | |
5184 epilog_stmt | |
5185 = gimple_build_assign (tem, BIT_FIELD_REF, | |
5186 build3 (BIT_FIELD_REF, eltype, | |
5187 new_temp, TYPE_SIZE (eltype), | |
5188 bitsize_int (sz * BITS_PER_UNIT))); | |
5189 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); | |
5190 dst2 = make_ssa_name (vectype1); | |
5191 epilog_stmt = gimple_build_assign (dst2, VIEW_CONVERT_EXPR, | |
5192 build1 (VIEW_CONVERT_EXPR, | |
5193 vectype1, tem)); | |
5194 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); | |
5195 } | |
5196 | |
5197 new_temp = make_ssa_name (vectype1); | |
5198 epilog_stmt = gimple_build_assign (new_temp, code, dst1, dst2); | |
5199 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); | |
5200 } | |
5018 | 5201 |
5019 if (reduce_with_shift && !slp_reduc) | 5202 if (reduce_with_shift && !slp_reduc) |
5020 { | 5203 { |
5021 int nelements = vec_size_in_bits / element_bitsize; | 5204 int element_bitsize = tree_to_uhwi (bitsize); |
5022 auto_vec_perm_indices sel (nelements); | 5205 /* Enforced by vectorizable_reduction, which disallows SLP reductions |
5206 for variable-length vectors and also requires direct target support | |
5207 for loop reductions. */ | |
5208 int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype1)); | |
5209 int nelements = vec_size_in_bits / element_bitsize; | |
5210 vec_perm_builder sel; | |
5211 vec_perm_indices indices; | |
5023 | 5212 |
5024 int elt_offset; | 5213 int elt_offset; |
5025 | 5214 |
5026 tree zero_vec = build_zero_cst (vectype); | 5215 tree zero_vec = build_zero_cst (vectype1); |
5027 /* Case 2: Create: | 5216 /* Case 2: Create: |
5028 for (offset = nelements/2; offset >= 1; offset/=2) | 5217 for (offset = nelements/2; offset >= 1; offset/=2) |
5029 { | 5218 { |
5030 Create: va' = vec_shift <va, offset> | 5219 Create: va' = vec_shift <va, offset> |
5031 Create: va = vop <va, va'> | 5220 Create: va = vop <va, va'> |
5035 | 5224 |
5036 if (dump_enabled_p ()) | 5225 if (dump_enabled_p ()) |
5037 dump_printf_loc (MSG_NOTE, vect_location, | 5226 dump_printf_loc (MSG_NOTE, vect_location, |
5038 "Reduce using vector shifts\n"); | 5227 "Reduce using vector shifts\n"); |
5039 | 5228 |
5040 vec_dest = vect_create_destination_var (scalar_dest, vectype); | 5229 mode1 = TYPE_MODE (vectype1); |
5041 new_temp = new_phi_result; | 5230 vec_dest = vect_create_destination_var (scalar_dest, vectype1); |
5042 for (elt_offset = nelements / 2; | 5231 for (elt_offset = nelements / 2; |
5043 elt_offset >= 1; | 5232 elt_offset >= 1; |
5044 elt_offset /= 2) | 5233 elt_offset /= 2) |
5045 { | 5234 { |
5046 sel.truncate (0); | |
5047 calc_vec_perm_mask_for_shift (elt_offset, nelements, &sel); | 5235 calc_vec_perm_mask_for_shift (elt_offset, nelements, &sel); |
5048 tree mask = vect_gen_perm_mask_any (vectype, sel); | 5236 indices.new_vector (sel, 2, nelements); |
5237 tree mask = vect_gen_perm_mask_any (vectype1, indices); | |
5049 epilog_stmt = gimple_build_assign (vec_dest, VEC_PERM_EXPR, | 5238 epilog_stmt = gimple_build_assign (vec_dest, VEC_PERM_EXPR, |
5050 new_temp, zero_vec, mask); | 5239 new_temp, zero_vec, mask); |
5051 new_name = make_ssa_name (vec_dest, epilog_stmt); | 5240 new_name = make_ssa_name (vec_dest, epilog_stmt); |
5052 gimple_assign_set_lhs (epilog_stmt, new_name); | 5241 gimple_assign_set_lhs (epilog_stmt, new_name); |
5053 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); | 5242 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); |
5088 | 5277 |
5089 if (dump_enabled_p ()) | 5278 if (dump_enabled_p ()) |
5090 dump_printf_loc (MSG_NOTE, vect_location, | 5279 dump_printf_loc (MSG_NOTE, vect_location, |
5091 "Reduce using scalar code.\n"); | 5280 "Reduce using scalar code.\n"); |
5092 | 5281 |
5093 vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype)); | 5282 int vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype1)); |
5283 int element_bitsize = tree_to_uhwi (bitsize); | |
5094 FOR_EACH_VEC_ELT (new_phis, i, new_phi) | 5284 FOR_EACH_VEC_ELT (new_phis, i, new_phi) |
5095 { | 5285 { |
5096 int bit_offset; | 5286 int bit_offset; |
5097 if (gimple_code (new_phi) == GIMPLE_PHI) | 5287 if (gimple_code (new_phi) == GIMPLE_PHI) |
5098 vec_temp = PHI_RESULT (new_phi); | 5288 vec_temp = PHI_RESULT (new_phi); |
5099 else | 5289 else |
5100 vec_temp = gimple_assign_lhs (new_phi); | 5290 vec_temp = gimple_assign_lhs (new_phi); |
5101 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize, | 5291 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vec_temp, bitsize, |
5102 bitsize_zero_node); | 5292 bitsize_zero_node); |
5103 epilog_stmt = gimple_build_assign (new_scalar_dest, rhs); | 5293 epilog_stmt = gimple_build_assign (new_scalar_dest, rhs); |
5104 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); | 5294 new_temp = make_ssa_name (new_scalar_dest, epilog_stmt); |
5105 gimple_assign_set_lhs (epilog_stmt, new_temp); | 5295 gimple_assign_set_lhs (epilog_stmt, new_temp); |
5106 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); | 5296 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); |
5107 | 5297 |
5141 } | 5331 } |
5142 } | 5332 } |
5143 | 5333 |
5144 /* The only case where we need to reduce scalar results in SLP, is | 5334 /* The only case where we need to reduce scalar results in SLP, is |
5145 unrolling. If the size of SCALAR_RESULTS is greater than | 5335 unrolling. If the size of SCALAR_RESULTS is greater than |
5146 GROUP_SIZE, we reduce them combining elements modulo | 5336 REDUC_GROUP_SIZE, we reduce them combining elements modulo |
5147 GROUP_SIZE. */ | 5337 REDUC_GROUP_SIZE. */ |
5148 if (slp_reduc) | 5338 if (slp_reduc) |
5149 { | 5339 { |
5150 tree res, first_res, new_res; | 5340 tree res, first_res, new_res; |
5151 gimple *new_stmt; | 5341 gimple *new_stmt; |
5152 | 5342 |
5166 else | 5356 else |
5167 /* Not SLP - we have one scalar to keep in SCALAR_RESULTS. */ | 5357 /* Not SLP - we have one scalar to keep in SCALAR_RESULTS. */ |
5168 scalar_results.safe_push (new_temp); | 5358 scalar_results.safe_push (new_temp); |
5169 } | 5359 } |
5170 | 5360 |
5171 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) | 5361 if ((STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) |
5172 == INTEGER_INDUC_COND_REDUCTION) | 5362 == INTEGER_INDUC_COND_REDUCTION) |
5173 { | 5363 && !operand_equal_p (initial_def, induc_val, 0)) |
5174 /* Earlier we set the initial value to be zero. Check the result | 5364 { |
5175 and if it is zero then replace with the original initial | 5365 /* Earlier we set the initial value to be a vector if induc_val |
5176 value. */ | 5366 values. Check the result and if it is induc_val then replace |
5177 tree zero = build_zero_cst (scalar_type); | 5367 with the original initial value, unless induc_val is |
5178 tree zcompare = build2 (EQ_EXPR, boolean_type_node, new_temp, zero); | 5368 the same as initial_def already. */ |
5369 tree zcompare = build2 (EQ_EXPR, boolean_type_node, new_temp, | |
5370 induc_val); | |
5179 | 5371 |
5180 tree tmp = make_ssa_name (new_scalar_dest); | 5372 tree tmp = make_ssa_name (new_scalar_dest); |
5181 epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare, | 5373 epilog_stmt = gimple_build_assign (tmp, COND_EXPR, zcompare, |
5182 initial_def, new_temp); | 5374 initial_def, new_temp); |
5183 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); | 5375 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); |
5217 new_temp = make_ssa_name (new_dest, epilog_stmt); | 5409 new_temp = make_ssa_name (new_dest, epilog_stmt); |
5218 gimple_assign_set_lhs (epilog_stmt, new_temp); | 5410 gimple_assign_set_lhs (epilog_stmt, new_temp); |
5219 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); | 5411 gsi_insert_before (&exit_gsi, epilog_stmt, GSI_SAME_STMT); |
5220 if (nested_in_vect_loop) | 5412 if (nested_in_vect_loop) |
5221 { | 5413 { |
5222 set_vinfo_for_stmt (epilog_stmt, | 5414 stmt_vec_info epilog_stmt_info = loop_vinfo->add_stmt (epilog_stmt); |
5223 new_stmt_vec_info (epilog_stmt, loop_vinfo)); | 5415 STMT_VINFO_RELATED_STMT (epilog_stmt_info) |
5224 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (epilog_stmt)) = | 5416 = STMT_VINFO_RELATED_STMT (loop_vinfo->lookup_stmt (new_phi)); |
5225 STMT_VINFO_RELATED_STMT (vinfo_for_stmt (new_phi)); | |
5226 | 5417 |
5227 if (!double_reduc) | 5418 if (!double_reduc) |
5228 scalar_results.quick_push (new_temp); | 5419 scalar_results.quick_push (new_temp); |
5229 else | 5420 else |
5230 scalar_results[0] = new_temp; | 5421 scalar_results[0] = new_temp; |
5260 use <s_out4> | 5451 use <s_out4> |
5261 use <s_out4> */ | 5452 use <s_out4> */ |
5262 | 5453 |
5263 | 5454 |
5264 /* In SLP reduction chain we reduce vector results into one vector if | 5455 /* In SLP reduction chain we reduce vector results into one vector if |
5265 necessary, hence we set here GROUP_SIZE to 1. SCALAR_DEST is the LHS of | 5456 necessary, hence we set here REDUC_GROUP_SIZE to 1. SCALAR_DEST is the |
5266 the last stmt in the reduction chain, since we are looking for the loop | 5457 LHS of the last stmt in the reduction chain, since we are looking for |
5267 exit phi node. */ | 5458 the loop exit phi node. */ |
5268 if (GROUP_FIRST_ELEMENT (vinfo_for_stmt (stmt))) | 5459 if (REDUC_GROUP_FIRST_ELEMENT (stmt_info)) |
5269 { | 5460 { |
5270 gimple *dest_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1]; | 5461 stmt_vec_info dest_stmt_info |
5271 /* Handle reduction patterns. */ | 5462 = vect_orig_stmt (SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1]); |
5272 if (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt))) | 5463 scalar_dest = gimple_assign_lhs (dest_stmt_info->stmt); |
5273 dest_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (dest_stmt)); | |
5274 | |
5275 scalar_dest = gimple_assign_lhs (dest_stmt); | |
5276 group_size = 1; | 5464 group_size = 1; |
5277 } | 5465 } |
5278 | 5466 |
5279 /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in | 5467 /* In SLP we may have several statements in NEW_PHIS and REDUCTION_PHIS (in |
5280 case that GROUP_SIZE is greater than vectorization factor). Therefore, we | 5468 case that REDUC_GROUP_SIZE is greater than vectorization factor). |
5281 need to match SCALAR_RESULTS with corresponding statements. The first | 5469 Therefore, we need to match SCALAR_RESULTS with corresponding statements. |
5282 (GROUP_SIZE / number of new vector stmts) scalar results correspond to | 5470 The first (REDUC_GROUP_SIZE / number of new vector stmts) scalar results |
5283 the first vector stmt, etc. | 5471 correspond to the first vector stmt, etc. |
5284 (RATIO is equal to (GROUP_SIZE / number of new vector stmts)). */ | 5472 (RATIO is equal to (REDUC_GROUP_SIZE / number of new vector stmts)). */ |
5285 if (group_size > new_phis.length ()) | 5473 if (group_size > new_phis.length ()) |
5286 { | 5474 { |
5287 ratio = group_size / new_phis.length (); | 5475 ratio = group_size / new_phis.length (); |
5288 gcc_assert (!(group_size % new_phis.length ())); | 5476 gcc_assert (!(group_size % new_phis.length ())); |
5289 } | 5477 } |
5290 else | 5478 else |
5291 ratio = 1; | 5479 ratio = 1; |
5292 | 5480 |
5481 stmt_vec_info epilog_stmt_info = NULL; | |
5293 for (k = 0; k < group_size; k++) | 5482 for (k = 0; k < group_size; k++) |
5294 { | 5483 { |
5295 if (k % ratio == 0) | 5484 if (k % ratio == 0) |
5296 { | 5485 { |
5297 epilog_stmt = new_phis[k / ratio]; | 5486 epilog_stmt_info = loop_vinfo->lookup_stmt (new_phis[k / ratio]); |
5298 reduction_phi = reduction_phis[k / ratio]; | 5487 reduction_phi_info = reduction_phis[k / ratio]; |
5299 if (double_reduc) | 5488 if (double_reduc) |
5300 inner_phi = inner_phis[k / ratio]; | 5489 inner_phi = inner_phis[k / ratio]; |
5301 } | 5490 } |
5302 | 5491 |
5303 if (slp_reduc) | 5492 if (slp_reduc) |
5304 { | 5493 { |
5305 gimple *current_stmt = SLP_TREE_SCALAR_STMTS (slp_node)[k]; | 5494 stmt_vec_info scalar_stmt_info = SLP_TREE_SCALAR_STMTS (slp_node)[k]; |
5306 | 5495 |
5307 orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (current_stmt)); | 5496 orig_stmt_info = STMT_VINFO_RELATED_STMT (scalar_stmt_info); |
5308 /* SLP statements can't participate in patterns. */ | 5497 /* SLP statements can't participate in patterns. */ |
5309 gcc_assert (!orig_stmt); | 5498 gcc_assert (!orig_stmt_info); |
5310 scalar_dest = gimple_assign_lhs (current_stmt); | 5499 scalar_dest = gimple_assign_lhs (scalar_stmt_info->stmt); |
5311 } | 5500 } |
5312 | 5501 |
5313 phis.create (3); | 5502 phis.create (3); |
5314 /* Find the loop-closed-use at the loop exit of the original scalar | 5503 /* Find the loop-closed-use at the loop exit of the original scalar |
5315 result. (The reduction result is expected to have two immediate uses - | 5504 result. (The reduction result is expected to have two immediate uses - |
5324 | 5513 |
5325 FOR_EACH_VEC_ELT (phis, i, exit_phi) | 5514 FOR_EACH_VEC_ELT (phis, i, exit_phi) |
5326 { | 5515 { |
5327 if (outer_loop) | 5516 if (outer_loop) |
5328 { | 5517 { |
5329 stmt_vec_info exit_phi_vinfo = vinfo_for_stmt (exit_phi); | 5518 stmt_vec_info exit_phi_vinfo |
5519 = loop_vinfo->lookup_stmt (exit_phi); | |
5330 gphi *vect_phi; | 5520 gphi *vect_phi; |
5331 | 5521 |
5332 /* FORNOW. Currently not supporting the case that an inner-loop | 5522 /* FORNOW. Currently not supporting the case that an inner-loop |
5333 reduction is not used in the outer-loop (but only outside the | 5523 reduction is not used in the outer-loop (but only outside the |
5334 outer-loop), unless it is double reduction. */ | 5524 outer-loop), unless it is double reduction. */ |
5337 || double_reduc); | 5527 || double_reduc); |
5338 | 5528 |
5339 if (double_reduc) | 5529 if (double_reduc) |
5340 STMT_VINFO_VEC_STMT (exit_phi_vinfo) = inner_phi; | 5530 STMT_VINFO_VEC_STMT (exit_phi_vinfo) = inner_phi; |
5341 else | 5531 else |
5342 STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt; | 5532 STMT_VINFO_VEC_STMT (exit_phi_vinfo) = epilog_stmt_info; |
5343 if (!double_reduc | 5533 if (!double_reduc |
5344 || STMT_VINFO_DEF_TYPE (exit_phi_vinfo) | 5534 || STMT_VINFO_DEF_TYPE (exit_phi_vinfo) |
5345 != vect_double_reduction_def) | 5535 != vect_double_reduction_def) |
5346 continue; | 5536 continue; |
5347 | 5537 |
5361 node, i.e., stmt1 above. */ | 5551 node, i.e., stmt1 above. */ |
5362 orig_name = PHI_RESULT (exit_phi); | 5552 orig_name = PHI_RESULT (exit_phi); |
5363 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name) | 5553 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, orig_name) |
5364 { | 5554 { |
5365 stmt_vec_info use_stmt_vinfo; | 5555 stmt_vec_info use_stmt_vinfo; |
5366 stmt_vec_info new_phi_vinfo; | |
5367 tree vect_phi_init, preheader_arg, vect_phi_res; | 5556 tree vect_phi_init, preheader_arg, vect_phi_res; |
5368 basic_block bb = gimple_bb (use_stmt); | 5557 basic_block bb = gimple_bb (use_stmt); |
5369 gimple *use; | |
5370 | 5558 |
5371 /* Check that USE_STMT is really double reduction phi | 5559 /* Check that USE_STMT is really double reduction phi |
5372 node. */ | 5560 node. */ |
5373 if (gimple_code (use_stmt) != GIMPLE_PHI | 5561 if (gimple_code (use_stmt) != GIMPLE_PHI |
5374 || gimple_phi_num_args (use_stmt) != 2 | 5562 || gimple_phi_num_args (use_stmt) != 2 |
5375 || bb->loop_father != outer_loop) | 5563 || bb->loop_father != outer_loop) |
5376 continue; | 5564 continue; |
5377 use_stmt_vinfo = vinfo_for_stmt (use_stmt); | 5565 use_stmt_vinfo = loop_vinfo->lookup_stmt (use_stmt); |
5378 if (!use_stmt_vinfo | 5566 if (!use_stmt_vinfo |
5379 || STMT_VINFO_DEF_TYPE (use_stmt_vinfo) | 5567 || STMT_VINFO_DEF_TYPE (use_stmt_vinfo) |
5380 != vect_double_reduction_def) | 5568 != vect_double_reduction_def) |
5381 continue; | 5569 continue; |
5382 | 5570 |
5388 vs2 is defined by INNER_PHI, the vectorized EXIT_PHI; | 5576 vs2 is defined by INNER_PHI, the vectorized EXIT_PHI; |
5389 vs0 is created here. */ | 5577 vs0 is created here. */ |
5390 | 5578 |
5391 /* Create vector phi node. */ | 5579 /* Create vector phi node. */ |
5392 vect_phi = create_phi_node (vec_initial_def, bb); | 5580 vect_phi = create_phi_node (vec_initial_def, bb); |
5393 new_phi_vinfo = new_stmt_vec_info (vect_phi, | 5581 loop_vec_info_for_loop (outer_loop)->add_stmt (vect_phi); |
5394 loop_vec_info_for_loop (outer_loop)); | |
5395 set_vinfo_for_stmt (vect_phi, new_phi_vinfo); | |
5396 | 5582 |
5397 /* Create vs0 - initial def of the double reduction phi. */ | 5583 /* Create vs0 - initial def of the double reduction phi. */ |
5398 preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt, | 5584 preheader_arg = PHI_ARG_DEF_FROM_EDGE (use_stmt, |
5399 loop_preheader_edge (outer_loop)); | 5585 loop_preheader_edge (outer_loop)); |
5400 vect_phi_init = get_initial_def_for_reduction | 5586 vect_phi_init = get_initial_def_for_reduction |
5401 (stmt, preheader_arg, NULL); | 5587 (stmt_info, preheader_arg, NULL); |
5402 | 5588 |
5403 /* Update phi node arguments with vs0 and vs2. */ | 5589 /* Update phi node arguments with vs0 and vs2. */ |
5404 add_phi_arg (vect_phi, vect_phi_init, | 5590 add_phi_arg (vect_phi, vect_phi_init, |
5405 loop_preheader_edge (outer_loop), | 5591 loop_preheader_edge (outer_loop), |
5406 UNKNOWN_LOCATION); | 5592 UNKNOWN_LOCATION); |
5407 add_phi_arg (vect_phi, PHI_RESULT (inner_phi), | 5593 add_phi_arg (vect_phi, PHI_RESULT (inner_phi->stmt), |
5408 loop_latch_edge (outer_loop), UNKNOWN_LOCATION); | 5594 loop_latch_edge (outer_loop), UNKNOWN_LOCATION); |
5409 if (dump_enabled_p ()) | 5595 if (dump_enabled_p ()) |
5410 { | 5596 dump_printf_loc (MSG_NOTE, vect_location, |
5411 dump_printf_loc (MSG_NOTE, vect_location, | 5597 "created double reduction phi node: %G", |
5412 "created double reduction phi node: "); | 5598 vect_phi); |
5413 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, vect_phi, 0); | |
5414 } | |
5415 | 5599 |
5416 vect_phi_res = PHI_RESULT (vect_phi); | 5600 vect_phi_res = PHI_RESULT (vect_phi); |
5417 | 5601 |
5418 /* Replace the use, i.e., set the correct vs1 in the regular | 5602 /* Replace the use, i.e., set the correct vs1 in the regular |
5419 reduction phi node. FORNOW, NCOPIES is always 1, so the | 5603 reduction phi node. FORNOW, NCOPIES is always 1, so the |
5420 loop is redundant. */ | 5604 loop is redundant. */ |
5421 use = reduction_phi; | 5605 stmt_vec_info use_info = reduction_phi_info; |
5422 for (j = 0; j < ncopies; j++) | 5606 for (j = 0; j < ncopies; j++) |
5423 { | 5607 { |
5424 edge pr_edge = loop_preheader_edge (loop); | 5608 edge pr_edge = loop_preheader_edge (loop); |
5425 SET_PHI_ARG_DEF (use, pr_edge->dest_idx, vect_phi_res); | 5609 SET_PHI_ARG_DEF (as_a <gphi *> (use_info->stmt), |
5426 use = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use)); | 5610 pr_edge->dest_idx, vect_phi_res); |
5427 } | 5611 use_info = STMT_VINFO_RELATED_STMT (use_info); |
5612 } | |
5428 } | 5613 } |
5429 } | 5614 } |
5430 } | 5615 } |
5431 | 5616 |
5432 phis.release (); | 5617 phis.release (); |
5479 | 5664 |
5480 phis.release (); | 5665 phis.release (); |
5481 } | 5666 } |
5482 } | 5667 } |
5483 | 5668 |
5669 /* Return a vector of type VECTYPE that is equal to the vector select | |
5670 operation "MASK ? VEC : IDENTITY". Insert the select statements | |
5671 before GSI. */ | |
5672 | |
5673 static tree | |
5674 merge_with_identity (gimple_stmt_iterator *gsi, tree mask, tree vectype, | |
5675 tree vec, tree identity) | |
5676 { | |
5677 tree cond = make_temp_ssa_name (vectype, NULL, "cond"); | |
5678 gimple *new_stmt = gimple_build_assign (cond, VEC_COND_EXPR, | |
5679 mask, vec, identity); | |
5680 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); | |
5681 return cond; | |
5682 } | |
5683 | |
5684 /* Successively apply CODE to each element of VECTOR_RHS, in left-to-right | |
5685 order, starting with LHS. Insert the extraction statements before GSI and | |
5686 associate the new scalar SSA names with variable SCALAR_DEST. | |
5687 Return the SSA name for the result. */ | |
5688 | |
5689 static tree | |
5690 vect_expand_fold_left (gimple_stmt_iterator *gsi, tree scalar_dest, | |
5691 tree_code code, tree lhs, tree vector_rhs) | |
5692 { | |
5693 tree vectype = TREE_TYPE (vector_rhs); | |
5694 tree scalar_type = TREE_TYPE (vectype); | |
5695 tree bitsize = TYPE_SIZE (scalar_type); | |
5696 unsigned HOST_WIDE_INT vec_size_in_bits = tree_to_uhwi (TYPE_SIZE (vectype)); | |
5697 unsigned HOST_WIDE_INT element_bitsize = tree_to_uhwi (bitsize); | |
5698 | |
5699 for (unsigned HOST_WIDE_INT bit_offset = 0; | |
5700 bit_offset < vec_size_in_bits; | |
5701 bit_offset += element_bitsize) | |
5702 { | |
5703 tree bitpos = bitsize_int (bit_offset); | |
5704 tree rhs = build3 (BIT_FIELD_REF, scalar_type, vector_rhs, | |
5705 bitsize, bitpos); | |
5706 | |
5707 gassign *stmt = gimple_build_assign (scalar_dest, rhs); | |
5708 rhs = make_ssa_name (scalar_dest, stmt); | |
5709 gimple_assign_set_lhs (stmt, rhs); | |
5710 gsi_insert_before (gsi, stmt, GSI_SAME_STMT); | |
5711 | |
5712 stmt = gimple_build_assign (scalar_dest, code, lhs, rhs); | |
5713 tree new_name = make_ssa_name (scalar_dest, stmt); | |
5714 gimple_assign_set_lhs (stmt, new_name); | |
5715 gsi_insert_before (gsi, stmt, GSI_SAME_STMT); | |
5716 lhs = new_name; | |
5717 } | |
5718 return lhs; | |
5719 } | |
5720 | |
5721 /* Perform an in-order reduction (FOLD_LEFT_REDUCTION). STMT_INFO is the | |
5722 statement that sets the live-out value. REDUC_DEF_STMT is the phi | |
5723 statement. CODE is the operation performed by STMT_INFO and OPS are | |
5724 its scalar operands. REDUC_INDEX is the index of the operand in | |
5725 OPS that is set by REDUC_DEF_STMT. REDUC_FN is the function that | |
5726 implements in-order reduction, or IFN_LAST if we should open-code it. | |
5727 VECTYPE_IN is the type of the vector input. MASKS specifies the masks | |
5728 that should be used to control the operation in a fully-masked loop. */ | |
5729 | |
5730 static bool | |
5731 vectorize_fold_left_reduction (stmt_vec_info stmt_info, | |
5732 gimple_stmt_iterator *gsi, | |
5733 stmt_vec_info *vec_stmt, slp_tree slp_node, | |
5734 gimple *reduc_def_stmt, | |
5735 tree_code code, internal_fn reduc_fn, | |
5736 tree ops[3], tree vectype_in, | |
5737 int reduc_index, vec_loop_masks *masks) | |
5738 { | |
5739 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); | |
5740 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
5741 tree vectype_out = STMT_VINFO_VECTYPE (stmt_info); | |
5742 stmt_vec_info new_stmt_info = NULL; | |
5743 | |
5744 int ncopies; | |
5745 if (slp_node) | |
5746 ncopies = 1; | |
5747 else | |
5748 ncopies = vect_get_num_copies (loop_vinfo, vectype_in); | |
5749 | |
5750 gcc_assert (!nested_in_vect_loop_p (loop, stmt_info)); | |
5751 gcc_assert (ncopies == 1); | |
5752 gcc_assert (TREE_CODE_LENGTH (code) == binary_op); | |
5753 gcc_assert (reduc_index == (code == MINUS_EXPR ? 0 : 1)); | |
5754 gcc_assert (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) | |
5755 == FOLD_LEFT_REDUCTION); | |
5756 | |
5757 if (slp_node) | |
5758 gcc_assert (known_eq (TYPE_VECTOR_SUBPARTS (vectype_out), | |
5759 TYPE_VECTOR_SUBPARTS (vectype_in))); | |
5760 | |
5761 tree op0 = ops[1 - reduc_index]; | |
5762 | |
5763 int group_size = 1; | |
5764 stmt_vec_info scalar_dest_def_info; | |
5765 auto_vec<tree> vec_oprnds0; | |
5766 if (slp_node) | |
5767 { | |
5768 vect_get_vec_defs (op0, NULL_TREE, stmt_info, &vec_oprnds0, NULL, | |
5769 slp_node); | |
5770 group_size = SLP_TREE_SCALAR_STMTS (slp_node).length (); | |
5771 scalar_dest_def_info = SLP_TREE_SCALAR_STMTS (slp_node)[group_size - 1]; | |
5772 } | |
5773 else | |
5774 { | |
5775 tree loop_vec_def0 = vect_get_vec_def_for_operand (op0, stmt_info); | |
5776 vec_oprnds0.create (1); | |
5777 vec_oprnds0.quick_push (loop_vec_def0); | |
5778 scalar_dest_def_info = stmt_info; | |
5779 } | |
5780 | |
5781 tree scalar_dest = gimple_assign_lhs (scalar_dest_def_info->stmt); | |
5782 tree scalar_type = TREE_TYPE (scalar_dest); | |
5783 tree reduc_var = gimple_phi_result (reduc_def_stmt); | |
5784 | |
5785 int vec_num = vec_oprnds0.length (); | |
5786 gcc_assert (vec_num == 1 || slp_node); | |
5787 tree vec_elem_type = TREE_TYPE (vectype_out); | |
5788 gcc_checking_assert (useless_type_conversion_p (scalar_type, vec_elem_type)); | |
5789 | |
5790 tree vector_identity = NULL_TREE; | |
5791 if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)) | |
5792 vector_identity = build_zero_cst (vectype_out); | |
5793 | |
5794 tree scalar_dest_var = vect_create_destination_var (scalar_dest, NULL); | |
5795 int i; | |
5796 tree def0; | |
5797 FOR_EACH_VEC_ELT (vec_oprnds0, i, def0) | |
5798 { | |
5799 gimple *new_stmt; | |
5800 tree mask = NULL_TREE; | |
5801 if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)) | |
5802 mask = vect_get_loop_mask (gsi, masks, vec_num, vectype_in, i); | |
5803 | |
5804 /* Handle MINUS by adding the negative. */ | |
5805 if (reduc_fn != IFN_LAST && code == MINUS_EXPR) | |
5806 { | |
5807 tree negated = make_ssa_name (vectype_out); | |
5808 new_stmt = gimple_build_assign (negated, NEGATE_EXPR, def0); | |
5809 gsi_insert_before (gsi, new_stmt, GSI_SAME_STMT); | |
5810 def0 = negated; | |
5811 } | |
5812 | |
5813 if (mask) | |
5814 def0 = merge_with_identity (gsi, mask, vectype_out, def0, | |
5815 vector_identity); | |
5816 | |
5817 /* On the first iteration the input is simply the scalar phi | |
5818 result, and for subsequent iterations it is the output of | |
5819 the preceding operation. */ | |
5820 if (reduc_fn != IFN_LAST) | |
5821 { | |
5822 new_stmt = gimple_build_call_internal (reduc_fn, 2, reduc_var, def0); | |
5823 /* For chained SLP reductions the output of the previous reduction | |
5824 operation serves as the input of the next. For the final statement | |
5825 the output cannot be a temporary - we reuse the original | |
5826 scalar destination of the last statement. */ | |
5827 if (i != vec_num - 1) | |
5828 { | |
5829 gimple_set_lhs (new_stmt, scalar_dest_var); | |
5830 reduc_var = make_ssa_name (scalar_dest_var, new_stmt); | |
5831 gimple_set_lhs (new_stmt, reduc_var); | |
5832 } | |
5833 } | |
5834 else | |
5835 { | |
5836 reduc_var = vect_expand_fold_left (gsi, scalar_dest_var, code, | |
5837 reduc_var, def0); | |
5838 new_stmt = SSA_NAME_DEF_STMT (reduc_var); | |
5839 /* Remove the statement, so that we can use the same code paths | |
5840 as for statements that we've just created. */ | |
5841 gimple_stmt_iterator tmp_gsi = gsi_for_stmt (new_stmt); | |
5842 gsi_remove (&tmp_gsi, false); | |
5843 } | |
5844 | |
5845 if (i == vec_num - 1) | |
5846 { | |
5847 gimple_set_lhs (new_stmt, scalar_dest); | |
5848 new_stmt_info = vect_finish_replace_stmt (scalar_dest_def_info, | |
5849 new_stmt); | |
5850 } | |
5851 else | |
5852 new_stmt_info = vect_finish_stmt_generation (scalar_dest_def_info, | |
5853 new_stmt, gsi); | |
5854 | |
5855 if (slp_node) | |
5856 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info); | |
5857 } | |
5858 | |
5859 if (!slp_node) | |
5860 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info; | |
5861 | |
5862 return true; | |
5863 } | |
5484 | 5864 |
5485 /* Function is_nonwrapping_integer_induction. | 5865 /* Function is_nonwrapping_integer_induction. |
5486 | 5866 |
5487 Check if STMT (which is part of loop LOOP) both increments and | 5867 Check if STMT_VINO (which is part of loop LOOP) both increments and |
5488 does not cause overflow. */ | 5868 does not cause overflow. */ |
5489 | 5869 |
5490 static bool | 5870 static bool |
5491 is_nonwrapping_integer_induction (gimple *stmt, struct loop *loop) | 5871 is_nonwrapping_integer_induction (stmt_vec_info stmt_vinfo, struct loop *loop) |
5492 { | 5872 { |
5493 stmt_vec_info stmt_vinfo = vinfo_for_stmt (stmt); | 5873 gphi *phi = as_a <gphi *> (stmt_vinfo->stmt); |
5494 tree base = STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo); | 5874 tree base = STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (stmt_vinfo); |
5495 tree step = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo); | 5875 tree step = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (stmt_vinfo); |
5496 tree lhs_type = TREE_TYPE (gimple_phi_result (stmt)); | 5876 tree lhs_type = TREE_TYPE (gimple_phi_result (phi)); |
5497 widest_int ni, max_loop_value, lhs_max; | 5877 widest_int ni, max_loop_value, lhs_max; |
5498 bool overflow = false; | 5878 wi::overflow_type overflow = wi::OVF_NONE; |
5499 | 5879 |
5500 /* Make sure the loop is integer based. */ | 5880 /* Make sure the loop is integer based. */ |
5501 if (TREE_CODE (base) != INTEGER_CST | 5881 if (TREE_CODE (base) != INTEGER_CST |
5502 || TREE_CODE (step) != INTEGER_CST) | 5882 || TREE_CODE (step) != INTEGER_CST) |
5503 return false; | 5883 return false; |
5504 | 5884 |
5505 /* Check that the induction increments. */ | |
5506 if (tree_int_cst_sgn (step) == -1) | |
5507 return false; | |
5508 | |
5509 /* Check that the max size of the loop will not wrap. */ | 5885 /* Check that the max size of the loop will not wrap. */ |
5510 | 5886 |
5511 if (TYPE_OVERFLOW_UNDEFINED (lhs_type)) | 5887 if (TYPE_OVERFLOW_UNDEFINED (lhs_type)) |
5512 return true; | 5888 return true; |
5513 | 5889 |
5528 <= TYPE_PRECISION (lhs_type)); | 5904 <= TYPE_PRECISION (lhs_type)); |
5529 } | 5905 } |
5530 | 5906 |
5531 /* Function vectorizable_reduction. | 5907 /* Function vectorizable_reduction. |
5532 | 5908 |
5533 Check if STMT performs a reduction operation that can be vectorized. | 5909 Check if STMT_INFO performs a reduction operation that can be vectorized. |
5534 If VEC_STMT is also passed, vectorize the STMT: create a vectorized | 5910 If VEC_STMT is also passed, vectorize STMT_INFO: create a vectorized |
5535 stmt to replace it, put it in VEC_STMT, and insert it at GSI. | 5911 stmt to replace it, put it in VEC_STMT, and insert it at GSI. |
5536 Return FALSE if not a vectorizable STMT, TRUE otherwise. | 5912 Return true if STMT_INFO is vectorizable in this way. |
5537 | 5913 |
5538 This function also handles reduction idioms (patterns) that have been | 5914 This function also handles reduction idioms (patterns) that have been |
5539 recognized in advance during vect_pattern_recog. In this case, STMT may be | 5915 recognized in advance during vect_pattern_recog. In this case, STMT_INFO |
5540 of this form: | 5916 may be of this form: |
5541 X = pattern_expr (arg0, arg1, ..., X) | 5917 X = pattern_expr (arg0, arg1, ..., X) |
5542 and it's STMT_VINFO_RELATED_STMT points to the last stmt in the original | 5918 and its STMT_VINFO_RELATED_STMT points to the last stmt in the original |
5543 sequence that had been detected and replaced by the pattern-stmt (STMT). | 5919 sequence that had been detected and replaced by the pattern-stmt |
5920 (STMT_INFO). | |
5544 | 5921 |
5545 This function also handles reduction of condition expressions, for example: | 5922 This function also handles reduction of condition expressions, for example: |
5546 for (int i = 0; i < N; i++) | 5923 for (int i = 0; i < N; i++) |
5547 if (a[i] < value) | 5924 if (a[i] < value) |
5548 last = a[i]; | 5925 last = a[i]; |
5550 containing the loop indexes for which "a[i] < value" was true. In the | 5927 containing the loop indexes for which "a[i] < value" was true. In the |
5551 function epilogue this is reduced to a single max value and then used to | 5928 function epilogue this is reduced to a single max value and then used to |
5552 index into the vector of results. | 5929 index into the vector of results. |
5553 | 5930 |
5554 In some cases of reduction patterns, the type of the reduction variable X is | 5931 In some cases of reduction patterns, the type of the reduction variable X is |
5555 different than the type of the other arguments of STMT. | 5932 different than the type of the other arguments of STMT_INFO. |
5556 In such cases, the vectype that is used when transforming STMT into a vector | 5933 In such cases, the vectype that is used when transforming STMT_INFO into |
5557 stmt is different than the vectype that is used to determine the | 5934 a vector stmt is different than the vectype that is used to determine the |
5558 vectorization factor, because it consists of a different number of elements | 5935 vectorization factor, because it consists of a different number of elements |
5559 than the actual number of elements that are being operated upon in parallel. | 5936 than the actual number of elements that are being operated upon in parallel. |
5560 | 5937 |
5561 For example, consider an accumulation of shorts into an int accumulator. | 5938 For example, consider an accumulation of shorts into an int accumulator. |
5562 On some targets it's possible to vectorize this pattern operating on 8 | 5939 On some targets it's possible to vectorize this pattern operating on 8 |
5576 general), the following equation: | 5953 general), the following equation: |
5577 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X)) | 5954 STMT_VINFO_VECTYPE == get_vectype_for_scalar_type (TREE_TYPE (X)) |
5578 does *NOT* necessarily hold for reduction patterns. */ | 5955 does *NOT* necessarily hold for reduction patterns. */ |
5579 | 5956 |
5580 bool | 5957 bool |
5581 vectorizable_reduction (gimple *stmt, gimple_stmt_iterator *gsi, | 5958 vectorizable_reduction (stmt_vec_info stmt_info, gimple_stmt_iterator *gsi, |
5582 gimple **vec_stmt, slp_tree slp_node, | 5959 stmt_vec_info *vec_stmt, slp_tree slp_node, |
5583 slp_instance slp_node_instance) | 5960 slp_instance slp_node_instance, |
5961 stmt_vector_for_cost *cost_vec) | |
5584 { | 5962 { |
5585 tree vec_dest; | 5963 tree vec_dest; |
5586 tree scalar_dest; | 5964 tree scalar_dest; |
5587 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
5588 tree vectype_out = STMT_VINFO_VECTYPE (stmt_info); | 5965 tree vectype_out = STMT_VINFO_VECTYPE (stmt_info); |
5589 tree vectype_in = NULL_TREE; | 5966 tree vectype_in = NULL_TREE; |
5590 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); | 5967 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); |
5591 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | 5968 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); |
5592 enum tree_code code, orig_code, epilog_reduc_code; | 5969 enum tree_code code, orig_code; |
5970 internal_fn reduc_fn; | |
5593 machine_mode vec_mode; | 5971 machine_mode vec_mode; |
5594 int op_type; | 5972 int op_type; |
5595 optab optab, reduc_optab; | 5973 optab optab; |
5596 tree new_temp = NULL_TREE; | 5974 tree new_temp = NULL_TREE; |
5597 gimple *def_stmt; | |
5598 enum vect_def_type dt, cond_reduc_dt = vect_unknown_def_type; | 5975 enum vect_def_type dt, cond_reduc_dt = vect_unknown_def_type; |
5976 stmt_vec_info cond_stmt_vinfo = NULL; | |
5977 enum tree_code cond_reduc_op_code = ERROR_MARK; | |
5599 tree scalar_type; | 5978 tree scalar_type; |
5600 bool is_simple_use; | 5979 bool is_simple_use; |
5601 gimple *orig_stmt; | |
5602 stmt_vec_info orig_stmt_info = NULL; | |
5603 int i; | 5980 int i; |
5604 int ncopies; | 5981 int ncopies; |
5605 int epilog_copies; | 5982 int epilog_copies; |
5606 stmt_vec_info prev_stmt_info, prev_phi_info; | 5983 stmt_vec_info prev_stmt_info, prev_phi_info; |
5607 bool single_defuse_cycle = false; | 5984 bool single_defuse_cycle = false; |
5608 gimple *new_stmt = NULL; | 5985 stmt_vec_info new_stmt_info = NULL; |
5609 int j; | 5986 int j; |
5610 tree ops[3]; | 5987 tree ops[3]; |
5611 enum vect_def_type dts[3]; | 5988 enum vect_def_type dts[3]; |
5612 bool nested_cycle = false, found_nested_cycle_def = false; | 5989 bool nested_cycle = false, found_nested_cycle_def = false; |
5613 bool double_reduc = false; | 5990 bool double_reduc = false; |
5614 basic_block def_bb; | 5991 basic_block def_bb; |
5615 struct loop * def_stmt_loop, *outer_loop = NULL; | 5992 struct loop * def_stmt_loop; |
5616 tree def_arg; | 5993 tree def_arg; |
5617 gimple *def_arg_stmt; | |
5618 auto_vec<tree> vec_oprnds0; | 5994 auto_vec<tree> vec_oprnds0; |
5619 auto_vec<tree> vec_oprnds1; | 5995 auto_vec<tree> vec_oprnds1; |
5620 auto_vec<tree> vec_oprnds2; | 5996 auto_vec<tree> vec_oprnds2; |
5621 auto_vec<tree> vect_defs; | 5997 auto_vec<tree> vect_defs; |
5622 auto_vec<gimple *> phis; | 5998 auto_vec<stmt_vec_info> phis; |
5623 int vec_num; | 5999 int vec_num; |
5624 tree def0, tem; | 6000 tree def0, tem; |
5625 bool first_p = true; | |
5626 tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE; | 6001 tree cr_index_scalar_type = NULL_TREE, cr_index_vector_type = NULL_TREE; |
5627 tree cond_reduc_val = NULL_TREE; | 6002 tree cond_reduc_val = NULL_TREE; |
5628 | 6003 |
5629 /* Make sure it was already recognized as a reduction computation. */ | 6004 /* Make sure it was already recognized as a reduction computation. */ |
5630 if (STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_reduction_def | 6005 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_reduction_def |
5631 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (stmt)) != vect_nested_cycle) | 6006 && STMT_VINFO_DEF_TYPE (stmt_info) != vect_nested_cycle) |
5632 return false; | 6007 return false; |
5633 | 6008 |
5634 if (nested_in_vect_loop_p (loop, stmt)) | 6009 if (nested_in_vect_loop_p (loop, stmt_info)) |
5635 { | 6010 { |
5636 outer_loop = loop; | |
5637 loop = loop->inner; | 6011 loop = loop->inner; |
5638 nested_cycle = true; | 6012 nested_cycle = true; |
5639 } | 6013 } |
5640 | 6014 |
5641 /* In case of reduction chain we switch to the first stmt in the chain, but | 6015 if (REDUC_GROUP_FIRST_ELEMENT (stmt_info)) |
5642 we don't update STMT_INFO, since only the last stmt is marked as reduction | 6016 gcc_assert (slp_node |
5643 and has reduction properties. */ | 6017 && REDUC_GROUP_FIRST_ELEMENT (stmt_info) == stmt_info); |
5644 if (GROUP_FIRST_ELEMENT (stmt_info) | 6018 |
5645 && GROUP_FIRST_ELEMENT (stmt_info) != stmt) | 6019 if (gphi *phi = dyn_cast <gphi *> (stmt_info->stmt)) |
5646 { | 6020 { |
5647 stmt = GROUP_FIRST_ELEMENT (stmt_info); | 6021 tree phi_result = gimple_phi_result (phi); |
5648 first_p = false; | |
5649 } | |
5650 | |
5651 if (gimple_code (stmt) == GIMPLE_PHI) | |
5652 { | |
5653 /* Analysis is fully done on the reduction stmt invocation. */ | 6022 /* Analysis is fully done on the reduction stmt invocation. */ |
5654 if (! vec_stmt) | 6023 if (! vec_stmt) |
5655 { | 6024 { |
5656 if (slp_node) | 6025 if (slp_node) |
5657 slp_node_instance->reduc_phis = slp_node; | 6026 slp_node_instance->reduc_phis = slp_node; |
5658 | 6027 |
5659 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type; | 6028 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type; |
5660 return true; | 6029 return true; |
5661 } | 6030 } |
5662 | 6031 |
5663 gimple *reduc_stmt = STMT_VINFO_REDUC_DEF (stmt_info); | 6032 if (STMT_VINFO_REDUC_TYPE (stmt_info) == FOLD_LEFT_REDUCTION) |
5664 if (STMT_VINFO_IN_PATTERN_P (vinfo_for_stmt (reduc_stmt))) | 6033 /* Leave the scalar phi in place. Note that checking |
5665 reduc_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (reduc_stmt)); | 6034 STMT_VINFO_VEC_REDUCTION_TYPE (as below) only works |
5666 | 6035 for reductions involving a single statement. */ |
5667 gcc_assert (is_gimple_assign (reduc_stmt)); | 6036 return true; |
6037 | |
6038 stmt_vec_info reduc_stmt_info = STMT_VINFO_REDUC_DEF (stmt_info); | |
6039 reduc_stmt_info = vect_stmt_to_vectorize (reduc_stmt_info); | |
6040 | |
6041 if (STMT_VINFO_VEC_REDUCTION_TYPE (reduc_stmt_info) | |
6042 == EXTRACT_LAST_REDUCTION) | |
6043 /* Leave the scalar phi in place. */ | |
6044 return true; | |
6045 | |
6046 gassign *reduc_stmt = as_a <gassign *> (reduc_stmt_info->stmt); | |
5668 for (unsigned k = 1; k < gimple_num_ops (reduc_stmt); ++k) | 6047 for (unsigned k = 1; k < gimple_num_ops (reduc_stmt); ++k) |
5669 { | 6048 { |
5670 tree op = gimple_op (reduc_stmt, k); | 6049 tree op = gimple_op (reduc_stmt, k); |
5671 if (op == gimple_phi_result (stmt)) | 6050 if (op == phi_result) |
5672 continue; | 6051 continue; |
5673 if (k == 1 | 6052 if (k == 1 |
5674 && gimple_assign_rhs_code (reduc_stmt) == COND_EXPR) | 6053 && gimple_assign_rhs_code (reduc_stmt) == COND_EXPR) |
5675 continue; | 6054 continue; |
5676 tem = get_vectype_for_scalar_type (TREE_TYPE (op)); | 6055 if (!vectype_in |
5677 if (! vectype_in | 6056 || (GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_in))) |
5678 || TYPE_VECTOR_SUBPARTS (tem) < TYPE_VECTOR_SUBPARTS (vectype_in)) | 6057 < GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (op))))) |
5679 vectype_in = tem; | 6058 vectype_in = get_vectype_for_scalar_type (TREE_TYPE (op)); |
5680 break; | 6059 break; |
5681 } | 6060 } |
5682 gcc_assert (vectype_in); | 6061 gcc_assert (vectype_in); |
5683 | 6062 |
5684 if (slp_node) | 6063 if (slp_node) |
5685 ncopies = 1; | 6064 ncopies = 1; |
5686 else | 6065 else |
5687 ncopies = vect_get_num_copies (loop_vinfo, vectype_in); | 6066 ncopies = vect_get_num_copies (loop_vinfo, vectype_in); |
5688 | 6067 |
5689 use_operand_p use_p; | 6068 stmt_vec_info use_stmt_info; |
5690 gimple *use_stmt; | |
5691 if (ncopies > 1 | 6069 if (ncopies > 1 |
5692 && (STMT_VINFO_RELEVANT (vinfo_for_stmt (reduc_stmt)) | 6070 && STMT_VINFO_RELEVANT (reduc_stmt_info) <= vect_used_only_live |
5693 <= vect_used_only_live) | 6071 && (use_stmt_info = loop_vinfo->lookup_single_use (phi_result)) |
5694 && single_imm_use (gimple_phi_result (stmt), &use_p, &use_stmt) | 6072 && vect_stmt_to_vectorize (use_stmt_info) == reduc_stmt_info) |
5695 && (use_stmt == reduc_stmt | |
5696 || (STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) | |
5697 == reduc_stmt))) | |
5698 single_defuse_cycle = true; | 6073 single_defuse_cycle = true; |
5699 | 6074 |
5700 /* Create the destination vector */ | 6075 /* Create the destination vector */ |
5701 scalar_dest = gimple_assign_lhs (reduc_stmt); | 6076 scalar_dest = gimple_assign_lhs (reduc_stmt); |
5702 vec_dest = vect_create_destination_var (scalar_dest, vectype_out); | 6077 vec_dest = vect_create_destination_var (scalar_dest, vectype_out); |
5703 | 6078 |
5704 if (slp_node) | 6079 if (slp_node) |
5705 /* The size vect_schedule_slp_instance computes is off for us. */ | 6080 /* The size vect_schedule_slp_instance computes is off for us. */ |
5706 vec_num = ((LOOP_VINFO_VECT_FACTOR (loop_vinfo) | 6081 vec_num = vect_get_num_vectors |
5707 * SLP_TREE_SCALAR_STMTS (slp_node).length ()) | 6082 (LOOP_VINFO_VECT_FACTOR (loop_vinfo) |
5708 / TYPE_VECTOR_SUBPARTS (vectype_in)); | 6083 * SLP_TREE_SCALAR_STMTS (slp_node).length (), |
6084 vectype_in); | |
5709 else | 6085 else |
5710 vec_num = 1; | 6086 vec_num = 1; |
5711 | 6087 |
5712 /* Generate the reduction PHIs upfront. */ | 6088 /* Generate the reduction PHIs upfront. */ |
5713 prev_phi_info = NULL; | 6089 prev_phi_info = NULL; |
5718 for (i = 0; i < vec_num; i++) | 6094 for (i = 0; i < vec_num; i++) |
5719 { | 6095 { |
5720 /* Create the reduction-phi that defines the reduction | 6096 /* Create the reduction-phi that defines the reduction |
5721 operand. */ | 6097 operand. */ |
5722 gimple *new_phi = create_phi_node (vec_dest, loop->header); | 6098 gimple *new_phi = create_phi_node (vec_dest, loop->header); |
5723 set_vinfo_for_stmt (new_phi, | 6099 stmt_vec_info new_phi_info = loop_vinfo->add_stmt (new_phi); |
5724 new_stmt_vec_info (new_phi, loop_vinfo)); | |
5725 | 6100 |
5726 if (slp_node) | 6101 if (slp_node) |
5727 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_phi); | 6102 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_phi_info); |
5728 else | 6103 else |
5729 { | 6104 { |
5730 if (j == 0) | 6105 if (j == 0) |
5731 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_phi; | 6106 STMT_VINFO_VEC_STMT (stmt_info) |
6107 = *vec_stmt = new_phi_info; | |
5732 else | 6108 else |
5733 STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi; | 6109 STMT_VINFO_RELATED_STMT (prev_phi_info) = new_phi_info; |
5734 prev_phi_info = vinfo_for_stmt (new_phi); | 6110 prev_phi_info = new_phi_info; |
5735 } | 6111 } |
5736 } | 6112 } |
5737 } | 6113 } |
5738 } | 6114 } |
5739 | 6115 |
5742 | 6118 |
5743 /* 1. Is vectorizable reduction? */ | 6119 /* 1. Is vectorizable reduction? */ |
5744 /* Not supportable if the reduction variable is used in the loop, unless | 6120 /* Not supportable if the reduction variable is used in the loop, unless |
5745 it's a reduction chain. */ | 6121 it's a reduction chain. */ |
5746 if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer | 6122 if (STMT_VINFO_RELEVANT (stmt_info) > vect_used_in_outer |
5747 && !GROUP_FIRST_ELEMENT (stmt_info)) | 6123 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info)) |
5748 return false; | 6124 return false; |
5749 | 6125 |
5750 /* Reductions that are not used even in an enclosing outer-loop, | 6126 /* Reductions that are not used even in an enclosing outer-loop, |
5751 are expected to be "live" (used out of the loop). */ | 6127 are expected to be "live" (used out of the loop). */ |
5752 if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope | 6128 if (STMT_VINFO_RELEVANT (stmt_info) == vect_unused_in_scope |
5758 Check if STMT represents a pattern that has been recognized | 6134 Check if STMT represents a pattern that has been recognized |
5759 in earlier analysis stages. For stmts that represent a pattern, | 6135 in earlier analysis stages. For stmts that represent a pattern, |
5760 the STMT_VINFO_RELATED_STMT field records the last stmt in | 6136 the STMT_VINFO_RELATED_STMT field records the last stmt in |
5761 the original sequence that constitutes the pattern. */ | 6137 the original sequence that constitutes the pattern. */ |
5762 | 6138 |
5763 orig_stmt = STMT_VINFO_RELATED_STMT (vinfo_for_stmt (stmt)); | 6139 stmt_vec_info orig_stmt_info = STMT_VINFO_RELATED_STMT (stmt_info); |
5764 if (orig_stmt) | 6140 if (orig_stmt_info) |
5765 { | 6141 { |
5766 orig_stmt_info = vinfo_for_stmt (orig_stmt); | |
5767 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info)); | 6142 gcc_assert (STMT_VINFO_IN_PATTERN_P (orig_stmt_info)); |
5768 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info)); | 6143 gcc_assert (!STMT_VINFO_IN_PATTERN_P (stmt_info)); |
5769 } | 6144 } |
5770 | 6145 |
5771 /* 3. Check the operands of the operation. The first operands are defined | 6146 /* 3. Check the operands of the operation. The first operands are defined |
5772 inside the loop body. The last operand is the reduction variable, | 6147 inside the loop body. The last operand is the reduction variable, |
5773 which is defined by the loop-header-phi. */ | 6148 which is defined by the loop-header-phi. */ |
5774 | 6149 |
5775 gcc_assert (is_gimple_assign (stmt)); | 6150 gassign *stmt = as_a <gassign *> (stmt_info->stmt); |
5776 | 6151 |
5777 /* Flatten RHS. */ | 6152 /* Flatten RHS. */ |
5778 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))) | 6153 switch (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))) |
5779 { | 6154 { |
5780 case GIMPLE_BINARY_RHS: | 6155 case GIMPLE_BINARY_RHS: |
5816 | 6191 |
5817 /* All uses but the last are expected to be defined in the loop. | 6192 /* All uses but the last are expected to be defined in the loop. |
5818 The last use is the reduction variable. In case of nested cycle this | 6193 The last use is the reduction variable. In case of nested cycle this |
5819 assumption is not true: we use reduc_index to record the index of the | 6194 assumption is not true: we use reduc_index to record the index of the |
5820 reduction variable. */ | 6195 reduction variable. */ |
5821 gimple *reduc_def_stmt = NULL; | 6196 stmt_vec_info reduc_def_info = NULL; |
5822 int reduc_index = -1; | 6197 int reduc_index = -1; |
5823 for (i = 0; i < op_type; i++) | 6198 for (i = 0; i < op_type; i++) |
5824 { | 6199 { |
5825 /* The condition of COND_EXPR is checked in vectorizable_condition(). */ | 6200 /* The condition of COND_EXPR is checked in vectorizable_condition(). */ |
5826 if (i == 0 && code == COND_EXPR) | 6201 if (i == 0 && code == COND_EXPR) |
5827 continue; | 6202 continue; |
5828 | 6203 |
5829 is_simple_use = vect_is_simple_use (ops[i], loop_vinfo, | 6204 stmt_vec_info def_stmt_info; |
5830 &def_stmt, &dts[i], &tem); | 6205 is_simple_use = vect_is_simple_use (ops[i], loop_vinfo, &dts[i], &tem, |
6206 &def_stmt_info); | |
5831 dt = dts[i]; | 6207 dt = dts[i]; |
5832 gcc_assert (is_simple_use); | 6208 gcc_assert (is_simple_use); |
5833 if (dt == vect_reduction_def) | 6209 if (dt == vect_reduction_def) |
5834 { | 6210 { |
5835 reduc_def_stmt = def_stmt; | 6211 reduc_def_info = def_stmt_info; |
5836 reduc_index = i; | 6212 reduc_index = i; |
5837 continue; | 6213 continue; |
5838 } | 6214 } |
5839 else if (tem) | 6215 else if (tem) |
5840 { | 6216 { |
5841 /* To properly compute ncopies we are interested in the widest | 6217 /* To properly compute ncopies we are interested in the widest |
5842 input type in case we're looking at a widening accumulation. */ | 6218 input type in case we're looking at a widening accumulation. */ |
5843 if (!vectype_in | 6219 if (!vectype_in |
5844 || TYPE_VECTOR_SUBPARTS (vectype_in) > TYPE_VECTOR_SUBPARTS (tem)) | 6220 || (GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (vectype_in))) |
6221 < GET_MODE_SIZE (SCALAR_TYPE_MODE (TREE_TYPE (tem))))) | |
5845 vectype_in = tem; | 6222 vectype_in = tem; |
5846 } | 6223 } |
5847 | 6224 |
5848 if (dt != vect_internal_def | 6225 if (dt != vect_internal_def |
5849 && dt != vect_external_def | 6226 && dt != vect_external_def |
5851 && dt != vect_induction_def | 6228 && dt != vect_induction_def |
5852 && !(dt == vect_nested_cycle && nested_cycle)) | 6229 && !(dt == vect_nested_cycle && nested_cycle)) |
5853 return false; | 6230 return false; |
5854 | 6231 |
5855 if (dt == vect_nested_cycle) | 6232 if (dt == vect_nested_cycle) |
5856 { | 6233 { |
5857 found_nested_cycle_def = true; | 6234 found_nested_cycle_def = true; |
5858 reduc_def_stmt = def_stmt; | 6235 reduc_def_info = def_stmt_info; |
5859 reduc_index = i; | 6236 reduc_index = i; |
5860 } | 6237 } |
5861 | 6238 |
5862 if (i == 1 && code == COND_EXPR) | 6239 if (i == 1 && code == COND_EXPR) |
5863 { | 6240 { |
5864 /* Record how value of COND_EXPR is defined. */ | 6241 /* Record how value of COND_EXPR is defined. */ |
5865 if (dt == vect_constant_def) | 6242 if (dt == vect_constant_def) |
5866 { | 6243 { |
5867 cond_reduc_dt = dt; | 6244 cond_reduc_dt = dt; |
5868 cond_reduc_val = ops[i]; | 6245 cond_reduc_val = ops[i]; |
5869 } | 6246 } |
5870 if (dt == vect_induction_def && def_stmt != NULL | 6247 if (dt == vect_induction_def |
5871 && is_nonwrapping_integer_induction (def_stmt, loop)) | 6248 && def_stmt_info |
5872 cond_reduc_dt = dt; | 6249 && is_nonwrapping_integer_induction (def_stmt_info, loop)) |
6250 { | |
6251 cond_reduc_dt = dt; | |
6252 cond_stmt_vinfo = def_stmt_info; | |
6253 } | |
5873 } | 6254 } |
5874 } | 6255 } |
5875 | 6256 |
5876 if (!vectype_in) | 6257 if (!vectype_in) |
5877 vectype_in = vectype_out; | 6258 vectype_in = vectype_out; |
5878 | 6259 |
5879 /* When vectorizing a reduction chain w/o SLP the reduction PHI is not | 6260 /* When vectorizing a reduction chain w/o SLP the reduction PHI is not |
5880 directy used in stmt. */ | 6261 directy used in stmt. */ |
5881 if (reduc_index == -1) | 6262 if (reduc_index == -1) |
5882 { | 6263 { |
5883 if (orig_stmt) | 6264 if (STMT_VINFO_REDUC_TYPE (stmt_info) == FOLD_LEFT_REDUCTION) |
5884 reduc_def_stmt = STMT_VINFO_REDUC_DEF (orig_stmt_info); | 6265 { |
6266 if (dump_enabled_p ()) | |
6267 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
6268 "in-order reduction chain without SLP.\n"); | |
6269 return false; | |
6270 } | |
6271 | |
6272 if (orig_stmt_info) | |
6273 reduc_def_info = STMT_VINFO_REDUC_DEF (orig_stmt_info); | |
5885 else | 6274 else |
5886 reduc_def_stmt = STMT_VINFO_REDUC_DEF (stmt_info); | 6275 reduc_def_info = STMT_VINFO_REDUC_DEF (stmt_info); |
5887 } | 6276 } |
5888 | 6277 |
5889 if (! reduc_def_stmt || gimple_code (reduc_def_stmt) != GIMPLE_PHI) | 6278 if (! reduc_def_info) |
6279 return false; | |
6280 | |
6281 gphi *reduc_def_phi = dyn_cast <gphi *> (reduc_def_info->stmt); | |
6282 if (!reduc_def_phi) | |
5890 return false; | 6283 return false; |
5891 | 6284 |
5892 if (!(reduc_index == -1 | 6285 if (!(reduc_index == -1 |
5893 || dts[reduc_index] == vect_reduction_def | 6286 || dts[reduc_index] == vect_reduction_def |
5894 || dts[reduc_index] == vect_nested_cycle | 6287 || dts[reduc_index] == vect_nested_cycle |
5899 && nested_cycle && found_nested_cycle_def))) | 6292 && nested_cycle && found_nested_cycle_def))) |
5900 { | 6293 { |
5901 /* For pattern recognized stmts, orig_stmt might be a reduction, | 6294 /* For pattern recognized stmts, orig_stmt might be a reduction, |
5902 but some helper statements for the pattern might not, or | 6295 but some helper statements for the pattern might not, or |
5903 might be COND_EXPRs with reduction uses in the condition. */ | 6296 might be COND_EXPRs with reduction uses in the condition. */ |
5904 gcc_assert (orig_stmt); | 6297 gcc_assert (orig_stmt_info); |
5905 return false; | 6298 return false; |
5906 } | 6299 } |
5907 | 6300 |
5908 stmt_vec_info reduc_def_info = vinfo_for_stmt (reduc_def_stmt); | 6301 /* PHIs should not participate in patterns. */ |
6302 gcc_assert (!STMT_VINFO_RELATED_STMT (reduc_def_info)); | |
5909 enum vect_reduction_type v_reduc_type | 6303 enum vect_reduction_type v_reduc_type |
5910 = STMT_VINFO_REDUC_TYPE (reduc_def_info); | 6304 = STMT_VINFO_REDUC_TYPE (reduc_def_info); |
5911 gimple *tmp = STMT_VINFO_REDUC_DEF (reduc_def_info); | 6305 stmt_vec_info tmp = STMT_VINFO_REDUC_DEF (reduc_def_info); |
5912 | 6306 |
5913 STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = v_reduc_type; | 6307 STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = v_reduc_type; |
5914 /* If we have a condition reduction, see if we can simplify it further. */ | 6308 /* If we have a condition reduction, see if we can simplify it further. */ |
5915 if (v_reduc_type == COND_REDUCTION) | 6309 if (v_reduc_type == COND_REDUCTION) |
5916 { | 6310 { |
5917 if (cond_reduc_dt == vect_induction_def) | 6311 /* TODO: We can't yet handle reduction chains, since we need to treat |
6312 each COND_EXPR in the chain specially, not just the last one. | |
6313 E.g. for: | |
6314 | |
6315 x_1 = PHI <x_3, ...> | |
6316 x_2 = a_2 ? ... : x_1; | |
6317 x_3 = a_3 ? ... : x_2; | |
6318 | |
6319 we're interested in the last element in x_3 for which a_2 || a_3 | |
6320 is true, whereas the current reduction chain handling would | |
6321 vectorize x_2 as a normal VEC_COND_EXPR and only treat x_3 | |
6322 as a reduction operation. */ | |
6323 if (reduc_index == -1) | |
5918 { | 6324 { |
5919 if (dump_enabled_p ()) | 6325 if (dump_enabled_p ()) |
5920 dump_printf_loc (MSG_NOTE, vect_location, | 6326 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
5921 "condition expression based on " | 6327 "conditional reduction chains not supported\n"); |
5922 "integer induction.\n"); | 6328 return false; |
5923 STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) | 6329 } |
5924 = INTEGER_INDUC_COND_REDUCTION; | 6330 |
5925 } | 6331 /* vect_is_simple_reduction ensured that operand 2 is the |
6332 loop-carried operand. */ | |
6333 gcc_assert (reduc_index == 2); | |
5926 | 6334 |
5927 /* Loop peeling modifies initial value of reduction PHI, which | 6335 /* Loop peeling modifies initial value of reduction PHI, which |
5928 makes the reduction stmt to be transformed different to the | 6336 makes the reduction stmt to be transformed different to the |
5929 original stmt analyzed. We need to record reduction code for | 6337 original stmt analyzed. We need to record reduction code for |
5930 CONST_COND_REDUCTION type reduction at analyzing stage, thus | 6338 CONST_COND_REDUCTION type reduction at analyzing stage, thus |
5934 { | 6342 { |
5935 /* Also set the reduction type to CONST_COND_REDUCTION. */ | 6343 /* Also set the reduction type to CONST_COND_REDUCTION. */ |
5936 gcc_assert (cond_reduc_dt == vect_constant_def); | 6344 gcc_assert (cond_reduc_dt == vect_constant_def); |
5937 STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = CONST_COND_REDUCTION; | 6345 STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = CONST_COND_REDUCTION; |
5938 } | 6346 } |
6347 else if (direct_internal_fn_supported_p (IFN_FOLD_EXTRACT_LAST, | |
6348 vectype_in, OPTIMIZE_FOR_SPEED)) | |
6349 { | |
6350 if (dump_enabled_p ()) | |
6351 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
6352 "optimizing condition reduction with" | |
6353 " FOLD_EXTRACT_LAST.\n"); | |
6354 STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) = EXTRACT_LAST_REDUCTION; | |
6355 } | |
6356 else if (cond_reduc_dt == vect_induction_def) | |
6357 { | |
6358 tree base | |
6359 = STMT_VINFO_LOOP_PHI_EVOLUTION_BASE_UNCHANGED (cond_stmt_vinfo); | |
6360 tree step = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (cond_stmt_vinfo); | |
6361 | |
6362 gcc_assert (TREE_CODE (base) == INTEGER_CST | |
6363 && TREE_CODE (step) == INTEGER_CST); | |
6364 cond_reduc_val = NULL_TREE; | |
6365 /* Find a suitable value, for MAX_EXPR below base, for MIN_EXPR | |
6366 above base; punt if base is the minimum value of the type for | |
6367 MAX_EXPR or maximum value of the type for MIN_EXPR for now. */ | |
6368 if (tree_int_cst_sgn (step) == -1) | |
6369 { | |
6370 cond_reduc_op_code = MIN_EXPR; | |
6371 if (tree_int_cst_sgn (base) == -1) | |
6372 cond_reduc_val = build_int_cst (TREE_TYPE (base), 0); | |
6373 else if (tree_int_cst_lt (base, | |
6374 TYPE_MAX_VALUE (TREE_TYPE (base)))) | |
6375 cond_reduc_val | |
6376 = int_const_binop (PLUS_EXPR, base, integer_one_node); | |
6377 } | |
6378 else | |
6379 { | |
6380 cond_reduc_op_code = MAX_EXPR; | |
6381 if (tree_int_cst_sgn (base) == 1) | |
6382 cond_reduc_val = build_int_cst (TREE_TYPE (base), 0); | |
6383 else if (tree_int_cst_lt (TYPE_MIN_VALUE (TREE_TYPE (base)), | |
6384 base)) | |
6385 cond_reduc_val | |
6386 = int_const_binop (MINUS_EXPR, base, integer_one_node); | |
6387 } | |
6388 if (cond_reduc_val) | |
6389 { | |
6390 if (dump_enabled_p ()) | |
6391 dump_printf_loc (MSG_NOTE, vect_location, | |
6392 "condition expression based on " | |
6393 "integer induction.\n"); | |
6394 STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) | |
6395 = INTEGER_INDUC_COND_REDUCTION; | |
6396 } | |
6397 } | |
5939 else if (cond_reduc_dt == vect_constant_def) | 6398 else if (cond_reduc_dt == vect_constant_def) |
5940 { | 6399 { |
5941 enum vect_def_type cond_initial_dt; | 6400 enum vect_def_type cond_initial_dt; |
5942 gimple *def_stmt = SSA_NAME_DEF_STMT (ops[reduc_index]); | 6401 gimple *def_stmt = SSA_NAME_DEF_STMT (ops[reduc_index]); |
5943 tree cond_initial_val | 6402 tree cond_initial_val |
5944 = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop)); | 6403 = PHI_ARG_DEF_FROM_EDGE (def_stmt, loop_preheader_edge (loop)); |
5945 | 6404 |
5946 gcc_assert (cond_reduc_val != NULL_TREE); | 6405 gcc_assert (cond_reduc_val != NULL_TREE); |
5947 vect_is_simple_use (cond_initial_val, loop_vinfo, | 6406 vect_is_simple_use (cond_initial_val, loop_vinfo, &cond_initial_dt); |
5948 &def_stmt, &cond_initial_dt); | |
5949 if (cond_initial_dt == vect_constant_def | 6407 if (cond_initial_dt == vect_constant_def |
5950 && types_compatible_p (TREE_TYPE (cond_initial_val), | 6408 && types_compatible_p (TREE_TYPE (cond_initial_val), |
5951 TREE_TYPE (cond_reduc_val))) | 6409 TREE_TYPE (cond_reduc_val))) |
5952 { | 6410 { |
5953 tree e = fold_binary (LE_EXPR, boolean_type_node, | 6411 tree e = fold_binary (LE_EXPR, boolean_type_node, |
5966 } | 6424 } |
5967 } | 6425 } |
5968 } | 6426 } |
5969 } | 6427 } |
5970 | 6428 |
5971 if (orig_stmt) | 6429 if (orig_stmt_info) |
5972 gcc_assert (tmp == orig_stmt | 6430 gcc_assert (tmp == orig_stmt_info |
5973 || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == orig_stmt); | 6431 || REDUC_GROUP_FIRST_ELEMENT (tmp) == orig_stmt_info); |
5974 else | 6432 else |
5975 /* We changed STMT to be the first stmt in reduction chain, hence we | 6433 /* We changed STMT to be the first stmt in reduction chain, hence we |
5976 check that in this case the first element in the chain is STMT. */ | 6434 check that in this case the first element in the chain is STMT. */ |
5977 gcc_assert (stmt == tmp | 6435 gcc_assert (tmp == stmt_info |
5978 || GROUP_FIRST_ELEMENT (vinfo_for_stmt (tmp)) == stmt); | 6436 || REDUC_GROUP_FIRST_ELEMENT (tmp) == stmt_info); |
5979 | 6437 |
5980 if (STMT_VINFO_LIVE_P (vinfo_for_stmt (reduc_def_stmt))) | 6438 if (STMT_VINFO_LIVE_P (reduc_def_info)) |
5981 return false; | 6439 return false; |
5982 | 6440 |
5983 if (slp_node) | 6441 if (slp_node) |
5984 ncopies = 1; | 6442 ncopies = 1; |
5985 else | 6443 else |
5986 ncopies = vect_get_num_copies (loop_vinfo, vectype_in); | 6444 ncopies = vect_get_num_copies (loop_vinfo, vectype_in); |
5987 | 6445 |
5988 gcc_assert (ncopies >= 1); | 6446 gcc_assert (ncopies >= 1); |
5989 | 6447 |
5990 vec_mode = TYPE_MODE (vectype_in); | 6448 vec_mode = TYPE_MODE (vectype_in); |
6449 poly_uint64 nunits_out = TYPE_VECTOR_SUBPARTS (vectype_out); | |
5991 | 6450 |
5992 if (code == COND_EXPR) | 6451 if (code == COND_EXPR) |
5993 { | 6452 { |
5994 /* Only call during the analysis stage, otherwise we'll lose | 6453 /* Only call during the analysis stage, otherwise we'll lose |
5995 STMT_VINFO_TYPE. */ | 6454 STMT_VINFO_TYPE. */ |
5996 if (!vec_stmt && !vectorizable_condition (stmt, gsi, NULL, | 6455 if (!vec_stmt && !vectorizable_condition (stmt_info, gsi, NULL, |
5997 ops[reduc_index], 0, NULL)) | 6456 ops[reduc_index], 0, NULL, |
6457 cost_vec)) | |
5998 { | 6458 { |
5999 if (dump_enabled_p ()) | 6459 if (dump_enabled_p ()) |
6000 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 6460 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
6001 "unsupported condition in reduction\n"); | 6461 "unsupported condition in reduction\n"); |
6002 return false; | 6462 return false; |
6031 if (optab_handler (optab, vec_mode) == CODE_FOR_nothing) | 6491 if (optab_handler (optab, vec_mode) == CODE_FOR_nothing) |
6032 { | 6492 { |
6033 if (dump_enabled_p ()) | 6493 if (dump_enabled_p ()) |
6034 dump_printf (MSG_NOTE, "op not supported by target.\n"); | 6494 dump_printf (MSG_NOTE, "op not supported by target.\n"); |
6035 | 6495 |
6036 if (GET_MODE_SIZE (vec_mode) != UNITS_PER_WORD | 6496 if (maybe_ne (GET_MODE_SIZE (vec_mode), UNITS_PER_WORD) |
6037 || !vect_worthwhile_without_simd_p (loop_vinfo, code)) | 6497 || !vect_worthwhile_without_simd_p (loop_vinfo, code)) |
6038 return false; | 6498 return false; |
6039 | 6499 |
6040 if (dump_enabled_p ()) | 6500 if (dump_enabled_p ()) |
6041 dump_printf (MSG_NOTE, "proceeding using word mode.\n"); | 6501 dump_printf (MSG_NOTE, "proceeding using word mode.\n"); |
6086 the arguments are the same as the type of the reduction variable. | 6546 the arguments are the same as the type of the reduction variable. |
6087 For "regular" reductions we can therefore use the same vector type | 6547 For "regular" reductions we can therefore use the same vector type |
6088 (and also the same tree-code) when generating the epilog code and | 6548 (and also the same tree-code) when generating the epilog code and |
6089 when generating the code inside the loop. */ | 6549 when generating the code inside the loop. */ |
6090 | 6550 |
6091 if (orig_stmt) | 6551 vect_reduction_type reduction_type |
6552 = STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info); | |
6553 if (orig_stmt_info | |
6554 && (reduction_type == TREE_CODE_REDUCTION | |
6555 || reduction_type == FOLD_LEFT_REDUCTION)) | |
6092 { | 6556 { |
6093 /* This is a reduction pattern: get the vectype from the type of the | 6557 /* This is a reduction pattern: get the vectype from the type of the |
6094 reduction variable, and get the tree-code from orig_stmt. */ | 6558 reduction variable, and get the tree-code from orig_stmt. */ |
6095 gcc_assert (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) | 6559 orig_code = gimple_assign_rhs_code (orig_stmt_info->stmt); |
6096 == TREE_CODE_REDUCTION); | |
6097 orig_code = gimple_assign_rhs_code (orig_stmt); | |
6098 gcc_assert (vectype_out); | 6560 gcc_assert (vectype_out); |
6099 vec_mode = TYPE_MODE (vectype_out); | 6561 vec_mode = TYPE_MODE (vectype_out); |
6100 } | 6562 } |
6101 else | 6563 else |
6102 { | 6564 { |
6107 if (code == MINUS_EXPR) | 6569 if (code == MINUS_EXPR) |
6108 orig_code = PLUS_EXPR; | 6570 orig_code = PLUS_EXPR; |
6109 | 6571 |
6110 /* For simple condition reductions, replace with the actual expression | 6572 /* For simple condition reductions, replace with the actual expression |
6111 we want to base our reduction around. */ | 6573 we want to base our reduction around. */ |
6112 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == CONST_COND_REDUCTION) | 6574 if (reduction_type == CONST_COND_REDUCTION) |
6113 { | 6575 { |
6114 orig_code = STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info); | 6576 orig_code = STMT_VINFO_VEC_CONST_COND_REDUC_CODE (stmt_info); |
6115 gcc_assert (orig_code == MAX_EXPR || orig_code == MIN_EXPR); | 6577 gcc_assert (orig_code == MAX_EXPR || orig_code == MIN_EXPR); |
6116 } | 6578 } |
6117 else if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) | 6579 else if (reduction_type == INTEGER_INDUC_COND_REDUCTION) |
6118 == INTEGER_INDUC_COND_REDUCTION) | 6580 orig_code = cond_reduc_op_code; |
6119 orig_code = MAX_EXPR; | |
6120 } | 6581 } |
6121 | 6582 |
6122 if (nested_cycle) | 6583 if (nested_cycle) |
6123 { | 6584 { |
6124 def_bb = gimple_bb (reduc_def_stmt); | 6585 def_bb = gimple_bb (reduc_def_phi); |
6125 def_stmt_loop = def_bb->loop_father; | 6586 def_stmt_loop = def_bb->loop_father; |
6126 def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_stmt, | 6587 def_arg = PHI_ARG_DEF_FROM_EDGE (reduc_def_phi, |
6127 loop_preheader_edge (def_stmt_loop)); | 6588 loop_preheader_edge (def_stmt_loop)); |
6128 if (TREE_CODE (def_arg) == SSA_NAME | 6589 stmt_vec_info def_arg_stmt_info = loop_vinfo->lookup_def (def_arg); |
6129 && (def_arg_stmt = SSA_NAME_DEF_STMT (def_arg)) | 6590 if (def_arg_stmt_info |
6130 && gimple_code (def_arg_stmt) == GIMPLE_PHI | 6591 && (STMT_VINFO_DEF_TYPE (def_arg_stmt_info) |
6131 && flow_bb_inside_loop_p (outer_loop, gimple_bb (def_arg_stmt)) | 6592 == vect_double_reduction_def)) |
6132 && vinfo_for_stmt (def_arg_stmt) | |
6133 && STMT_VINFO_DEF_TYPE (vinfo_for_stmt (def_arg_stmt)) | |
6134 == vect_double_reduction_def) | |
6135 double_reduc = true; | 6593 double_reduc = true; |
6136 } | 6594 } |
6137 | 6595 |
6138 epilog_reduc_code = ERROR_MARK; | 6596 reduc_fn = IFN_LAST; |
6139 | 6597 |
6140 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) != COND_REDUCTION) | 6598 if (reduction_type == TREE_CODE_REDUCTION |
6141 { | 6599 || reduction_type == FOLD_LEFT_REDUCTION |
6142 if (reduction_code_for_scalar_code (orig_code, &epilog_reduc_code)) | 6600 || reduction_type == INTEGER_INDUC_COND_REDUCTION |
6143 { | 6601 || reduction_type == CONST_COND_REDUCTION) |
6144 reduc_optab = optab_for_tree_code (epilog_reduc_code, vectype_out, | 6602 { |
6145 optab_default); | 6603 if (reduction_type == FOLD_LEFT_REDUCTION |
6146 if (!reduc_optab) | 6604 ? fold_left_reduction_fn (orig_code, &reduc_fn) |
6147 { | 6605 : reduction_fn_for_scalar_code (orig_code, &reduc_fn)) |
6148 if (dump_enabled_p ()) | 6606 { |
6149 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 6607 if (reduc_fn != IFN_LAST |
6150 "no optab for reduction.\n"); | 6608 && !direct_internal_fn_supported_p (reduc_fn, vectype_out, |
6151 | 6609 OPTIMIZE_FOR_SPEED)) |
6152 epilog_reduc_code = ERROR_MARK; | |
6153 } | |
6154 else if (optab_handler (reduc_optab, vec_mode) == CODE_FOR_nothing) | |
6155 { | 6610 { |
6156 if (dump_enabled_p ()) | 6611 if (dump_enabled_p ()) |
6157 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 6612 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
6158 "reduc op not supported by target.\n"); | 6613 "reduc op not supported by target.\n"); |
6159 | 6614 |
6160 epilog_reduc_code = ERROR_MARK; | 6615 reduc_fn = IFN_LAST; |
6161 } | 6616 } |
6162 } | 6617 } |
6163 else | 6618 else |
6164 { | 6619 { |
6165 if (!nested_cycle || double_reduc) | 6620 if (!nested_cycle || double_reduc) |
6170 | 6625 |
6171 return false; | 6626 return false; |
6172 } | 6627 } |
6173 } | 6628 } |
6174 } | 6629 } |
6175 else | 6630 else if (reduction_type == COND_REDUCTION) |
6176 { | 6631 { |
6177 int scalar_precision | 6632 int scalar_precision |
6178 = GET_MODE_PRECISION (SCALAR_TYPE_MODE (scalar_type)); | 6633 = GET_MODE_PRECISION (SCALAR_TYPE_MODE (scalar_type)); |
6179 cr_index_scalar_type = make_unsigned_type (scalar_precision); | 6634 cr_index_scalar_type = make_unsigned_type (scalar_precision); |
6180 cr_index_vector_type = build_vector_type | 6635 cr_index_vector_type = build_vector_type (cr_index_scalar_type, |
6181 (cr_index_scalar_type, TYPE_VECTOR_SUBPARTS (vectype_out)); | 6636 nunits_out); |
6182 | 6637 |
6183 optab = optab_for_tree_code (REDUC_MAX_EXPR, cr_index_vector_type, | 6638 if (direct_internal_fn_supported_p (IFN_REDUC_MAX, cr_index_vector_type, |
6184 optab_default); | 6639 OPTIMIZE_FOR_SPEED)) |
6185 if (optab_handler (optab, TYPE_MODE (cr_index_vector_type)) | 6640 reduc_fn = IFN_REDUC_MAX; |
6186 != CODE_FOR_nothing) | 6641 } |
6187 epilog_reduc_code = REDUC_MAX_EXPR; | 6642 |
6188 } | 6643 if (reduction_type != EXTRACT_LAST_REDUCTION |
6189 | 6644 && (!nested_cycle || double_reduc) |
6190 if ((double_reduc | 6645 && reduc_fn == IFN_LAST |
6191 || STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) != TREE_CODE_REDUCTION) | 6646 && !nunits_out.is_constant ()) |
6647 { | |
6648 if (dump_enabled_p ()) | |
6649 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
6650 "missing target support for reduction on" | |
6651 " variable-length vectors.\n"); | |
6652 return false; | |
6653 } | |
6654 | |
6655 if ((double_reduc || reduction_type != TREE_CODE_REDUCTION) | |
6192 && ncopies > 1) | 6656 && ncopies > 1) |
6193 { | 6657 { |
6194 if (dump_enabled_p ()) | 6658 if (dump_enabled_p ()) |
6195 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 6659 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
6196 "multiple types in double reduction or condition " | 6660 "multiple types in double reduction or condition " |
6197 "reduction.\n"); | 6661 "reduction.\n"); |
6198 return false; | 6662 return false; |
6663 } | |
6664 | |
6665 /* For SLP reductions, see if there is a neutral value we can use. */ | |
6666 tree neutral_op = NULL_TREE; | |
6667 if (slp_node) | |
6668 neutral_op = neutral_op_for_slp_reduction | |
6669 (slp_node_instance->reduc_phis, code, | |
6670 REDUC_GROUP_FIRST_ELEMENT (stmt_info) != NULL); | |
6671 | |
6672 if (double_reduc && reduction_type == FOLD_LEFT_REDUCTION) | |
6673 { | |
6674 /* We can't support in-order reductions of code such as this: | |
6675 | |
6676 for (int i = 0; i < n1; ++i) | |
6677 for (int j = 0; j < n2; ++j) | |
6678 l += a[j]; | |
6679 | |
6680 since GCC effectively transforms the loop when vectorizing: | |
6681 | |
6682 for (int i = 0; i < n1 / VF; ++i) | |
6683 for (int j = 0; j < n2; ++j) | |
6684 for (int k = 0; k < VF; ++k) | |
6685 l += a[j]; | |
6686 | |
6687 which is a reassociation of the original operation. */ | |
6688 if (dump_enabled_p ()) | |
6689 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
6690 "in-order double reduction not supported.\n"); | |
6691 | |
6692 return false; | |
6693 } | |
6694 | |
6695 if (reduction_type == FOLD_LEFT_REDUCTION | |
6696 && slp_node | |
6697 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info)) | |
6698 { | |
6699 /* We cannot use in-order reductions in this case because there is | |
6700 an implicit reassociation of the operations involved. */ | |
6701 if (dump_enabled_p ()) | |
6702 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
6703 "in-order unchained SLP reductions not supported.\n"); | |
6704 return false; | |
6705 } | |
6706 | |
6707 /* For double reductions, and for SLP reductions with a neutral value, | |
6708 we construct a variable-length initial vector by loading a vector | |
6709 full of the neutral value and then shift-and-inserting the start | |
6710 values into the low-numbered elements. */ | |
6711 if ((double_reduc || neutral_op) | |
6712 && !nunits_out.is_constant () | |
6713 && !direct_internal_fn_supported_p (IFN_VEC_SHL_INSERT, | |
6714 vectype_out, OPTIMIZE_FOR_SPEED)) | |
6715 { | |
6716 if (dump_enabled_p ()) | |
6717 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
6718 "reduction on variable-length vectors requires" | |
6719 " target support for a vector-shift-and-insert" | |
6720 " operation.\n"); | |
6721 return false; | |
6722 } | |
6723 | |
6724 /* Check extra constraints for variable-length unchained SLP reductions. */ | |
6725 if (STMT_SLP_TYPE (stmt_info) | |
6726 && !REDUC_GROUP_FIRST_ELEMENT (stmt_info) | |
6727 && !nunits_out.is_constant ()) | |
6728 { | |
6729 /* We checked above that we could build the initial vector when | |
6730 there's a neutral element value. Check here for the case in | |
6731 which each SLP statement has its own initial value and in which | |
6732 that value needs to be repeated for every instance of the | |
6733 statement within the initial vector. */ | |
6734 unsigned int group_size = SLP_TREE_SCALAR_STMTS (slp_node).length (); | |
6735 scalar_mode elt_mode = SCALAR_TYPE_MODE (TREE_TYPE (vectype_out)); | |
6736 if (!neutral_op | |
6737 && !can_duplicate_and_interleave_p (group_size, elt_mode)) | |
6738 { | |
6739 if (dump_enabled_p ()) | |
6740 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
6741 "unsupported form of SLP reduction for" | |
6742 " variable-length vectors: cannot build" | |
6743 " initial vector.\n"); | |
6744 return false; | |
6745 } | |
6746 /* The epilogue code relies on the number of elements being a multiple | |
6747 of the group size. The duplicate-and-interleave approach to setting | |
6748 up the the initial vector does too. */ | |
6749 if (!multiple_p (nunits_out, group_size)) | |
6750 { | |
6751 if (dump_enabled_p ()) | |
6752 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
6753 "unsupported form of SLP reduction for" | |
6754 " variable-length vectors: the vector size" | |
6755 " is not a multiple of the number of results.\n"); | |
6756 return false; | |
6757 } | |
6199 } | 6758 } |
6200 | 6759 |
6201 /* In case of widenning multiplication by a constant, we update the type | 6760 /* In case of widenning multiplication by a constant, we update the type |
6202 of the constant to be the type of the other operand. We check that the | 6761 of the constant to be the type of the other operand. We check that the |
6203 constant fits the type in the pattern recognition pass. */ | 6762 constant fits the type in the pattern recognition pass. */ |
6216 | 6775 |
6217 return false; | 6776 return false; |
6218 } | 6777 } |
6219 } | 6778 } |
6220 | 6779 |
6221 if (STMT_VINFO_VEC_REDUCTION_TYPE (stmt_info) == COND_REDUCTION) | 6780 if (reduction_type == COND_REDUCTION) |
6222 { | 6781 { |
6223 widest_int ni; | 6782 widest_int ni; |
6224 | 6783 |
6225 if (! max_loop_iterations (loop, &ni)) | 6784 if (! max_loop_iterations (loop, &ni)) |
6226 { | 6785 { |
6273 from the vectorized reduction operation generated in the previous iteration. | 6832 from the vectorized reduction operation generated in the previous iteration. |
6274 | 6833 |
6275 This only works when we see both the reduction PHI and its only consumer | 6834 This only works when we see both the reduction PHI and its only consumer |
6276 in vectorizable_reduction and there are no intermediate stmts | 6835 in vectorizable_reduction and there are no intermediate stmts |
6277 participating. */ | 6836 participating. */ |
6278 use_operand_p use_p; | 6837 stmt_vec_info use_stmt_info; |
6279 gimple *use_stmt; | 6838 tree reduc_phi_result = gimple_phi_result (reduc_def_phi); |
6280 if (ncopies > 1 | 6839 if (ncopies > 1 |
6281 && (STMT_VINFO_RELEVANT (stmt_info) <= vect_used_only_live) | 6840 && (STMT_VINFO_RELEVANT (stmt_info) <= vect_used_only_live) |
6282 && single_imm_use (gimple_phi_result (reduc_def_stmt), &use_p, &use_stmt) | 6841 && (use_stmt_info = loop_vinfo->lookup_single_use (reduc_phi_result)) |
6283 && (use_stmt == stmt | 6842 && vect_stmt_to_vectorize (use_stmt_info) == stmt_info) |
6284 || STMT_VINFO_RELATED_STMT (vinfo_for_stmt (use_stmt)) == stmt)) | |
6285 { | 6843 { |
6286 single_defuse_cycle = true; | 6844 single_defuse_cycle = true; |
6287 epilog_copies = 1; | 6845 epilog_copies = 1; |
6288 } | 6846 } |
6289 else | 6847 else |
6302 "multi def-use cycle not possible for lane-reducing " | 6860 "multi def-use cycle not possible for lane-reducing " |
6303 "reduction operation\n"); | 6861 "reduction operation\n"); |
6304 return false; | 6862 return false; |
6305 } | 6863 } |
6306 | 6864 |
6865 if (slp_node) | |
6866 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); | |
6867 else | |
6868 vec_num = 1; | |
6869 | |
6870 internal_fn cond_fn = get_conditional_internal_fn (code); | |
6871 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo); | |
6872 | |
6307 if (!vec_stmt) /* transformation not required. */ | 6873 if (!vec_stmt) /* transformation not required. */ |
6308 { | 6874 { |
6309 if (first_p) | 6875 vect_model_reduction_cost (stmt_info, reduc_fn, ncopies, cost_vec); |
6310 vect_model_reduction_cost (stmt_info, epilog_reduc_code, ncopies); | 6876 if (loop_vinfo && LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)) |
6877 { | |
6878 if (reduction_type != FOLD_LEFT_REDUCTION | |
6879 && (cond_fn == IFN_LAST | |
6880 || !direct_internal_fn_supported_p (cond_fn, vectype_in, | |
6881 OPTIMIZE_FOR_SPEED))) | |
6882 { | |
6883 if (dump_enabled_p ()) | |
6884 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
6885 "can't use a fully-masked loop because no" | |
6886 " conditional operation is available.\n"); | |
6887 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; | |
6888 } | |
6889 else if (reduc_index == -1) | |
6890 { | |
6891 if (dump_enabled_p ()) | |
6892 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
6893 "can't use a fully-masked loop for chained" | |
6894 " reductions.\n"); | |
6895 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; | |
6896 } | |
6897 else | |
6898 vect_record_loop_mask (loop_vinfo, masks, ncopies * vec_num, | |
6899 vectype_in); | |
6900 } | |
6901 if (dump_enabled_p () | |
6902 && reduction_type == FOLD_LEFT_REDUCTION) | |
6903 dump_printf_loc (MSG_NOTE, vect_location, | |
6904 "using an in-order (fold-left) reduction.\n"); | |
6311 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type; | 6905 STMT_VINFO_TYPE (stmt_info) = reduc_vec_info_type; |
6312 return true; | 6906 return true; |
6313 } | 6907 } |
6314 | 6908 |
6315 /* Transform. */ | 6909 /* Transform. */ |
6319 | 6913 |
6320 /* FORNOW: Multiple types are not supported for condition. */ | 6914 /* FORNOW: Multiple types are not supported for condition. */ |
6321 if (code == COND_EXPR) | 6915 if (code == COND_EXPR) |
6322 gcc_assert (ncopies == 1); | 6916 gcc_assert (ncopies == 1); |
6323 | 6917 |
6918 bool masked_loop_p = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo); | |
6919 | |
6920 if (reduction_type == FOLD_LEFT_REDUCTION) | |
6921 return vectorize_fold_left_reduction | |
6922 (stmt_info, gsi, vec_stmt, slp_node, reduc_def_phi, code, | |
6923 reduc_fn, ops, vectype_in, reduc_index, masks); | |
6924 | |
6925 if (reduction_type == EXTRACT_LAST_REDUCTION) | |
6926 { | |
6927 gcc_assert (!slp_node); | |
6928 return vectorizable_condition (stmt_info, gsi, vec_stmt, | |
6929 NULL, reduc_index, NULL, NULL); | |
6930 } | |
6931 | |
6324 /* Create the destination vector */ | 6932 /* Create the destination vector */ |
6325 vec_dest = vect_create_destination_var (scalar_dest, vectype_out); | 6933 vec_dest = vect_create_destination_var (scalar_dest, vectype_out); |
6326 | 6934 |
6327 prev_stmt_info = NULL; | 6935 prev_stmt_info = NULL; |
6328 prev_phi_info = NULL; | 6936 prev_phi_info = NULL; |
6329 if (slp_node) | 6937 if (!slp_node) |
6330 vec_num = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); | 6938 { |
6331 else | |
6332 { | |
6333 vec_num = 1; | |
6334 vec_oprnds0.create (1); | 6939 vec_oprnds0.create (1); |
6335 vec_oprnds1.create (1); | 6940 vec_oprnds1.create (1); |
6336 if (op_type == ternary_op) | 6941 if (op_type == ternary_op) |
6337 vec_oprnds2.create (1); | 6942 vec_oprnds2.create (1); |
6338 } | 6943 } |
6343 vect_defs.quick_push (NULL_TREE); | 6948 vect_defs.quick_push (NULL_TREE); |
6344 | 6949 |
6345 if (slp_node) | 6950 if (slp_node) |
6346 phis.splice (SLP_TREE_VEC_STMTS (slp_node_instance->reduc_phis)); | 6951 phis.splice (SLP_TREE_VEC_STMTS (slp_node_instance->reduc_phis)); |
6347 else | 6952 else |
6348 phis.quick_push (STMT_VINFO_VEC_STMT (vinfo_for_stmt (reduc_def_stmt))); | 6953 phis.quick_push (STMT_VINFO_VEC_STMT (reduc_def_info)); |
6349 | 6954 |
6350 for (j = 0; j < ncopies; j++) | 6955 for (j = 0; j < ncopies; j++) |
6351 { | 6956 { |
6352 if (code == COND_EXPR) | 6957 if (code == COND_EXPR) |
6353 { | 6958 { |
6354 gcc_assert (!slp_node); | 6959 gcc_assert (!slp_node); |
6355 vectorizable_condition (stmt, gsi, vec_stmt, | 6960 vectorizable_condition (stmt_info, gsi, vec_stmt, |
6356 PHI_RESULT (phis[0]), | 6961 PHI_RESULT (phis[0]->stmt), |
6357 reduc_index, NULL); | 6962 reduc_index, NULL, NULL); |
6358 /* Multiple types are not supported for condition. */ | 6963 /* Multiple types are not supported for condition. */ |
6359 break; | 6964 break; |
6360 } | 6965 } |
6361 | 6966 |
6362 /* Handle uses. */ | 6967 /* Handle uses. */ |
6387 } | 6992 } |
6388 } | 6993 } |
6389 else | 6994 else |
6390 { | 6995 { |
6391 vec_oprnds0.quick_push | 6996 vec_oprnds0.quick_push |
6392 (vect_get_vec_def_for_operand (ops[0], stmt)); | 6997 (vect_get_vec_def_for_operand (ops[0], stmt_info)); |
6393 vec_oprnds1.quick_push | 6998 vec_oprnds1.quick_push |
6394 (vect_get_vec_def_for_operand (ops[1], stmt)); | 6999 (vect_get_vec_def_for_operand (ops[1], stmt_info)); |
6395 if (op_type == ternary_op) | 7000 if (op_type == ternary_op) |
6396 vec_oprnds2.quick_push | 7001 vec_oprnds2.quick_push |
6397 (vect_get_vec_def_for_operand (ops[2], stmt)); | 7002 (vect_get_vec_def_for_operand (ops[2], stmt_info)); |
6398 } | 7003 } |
6399 } | 7004 } |
6400 else | 7005 else |
6401 { | 7006 { |
6402 if (!slp_node) | 7007 if (!slp_node) |
6403 { | 7008 { |
6404 gcc_assert (reduc_index != -1 || ! single_defuse_cycle); | 7009 gcc_assert (reduc_index != -1 || ! single_defuse_cycle); |
6405 | 7010 |
6406 if (single_defuse_cycle && reduc_index == 0) | 7011 if (single_defuse_cycle && reduc_index == 0) |
6407 vec_oprnds0[0] = gimple_assign_lhs (new_stmt); | 7012 vec_oprnds0[0] = gimple_get_lhs (new_stmt_info->stmt); |
6408 else | 7013 else |
6409 vec_oprnds0[0] | 7014 vec_oprnds0[0] |
6410 = vect_get_vec_def_for_stmt_copy (dts[0], vec_oprnds0[0]); | 7015 = vect_get_vec_def_for_stmt_copy (loop_vinfo, |
7016 vec_oprnds0[0]); | |
6411 if (single_defuse_cycle && reduc_index == 1) | 7017 if (single_defuse_cycle && reduc_index == 1) |
6412 vec_oprnds1[0] = gimple_assign_lhs (new_stmt); | 7018 vec_oprnds1[0] = gimple_get_lhs (new_stmt_info->stmt); |
6413 else | 7019 else |
6414 vec_oprnds1[0] | 7020 vec_oprnds1[0] |
6415 = vect_get_vec_def_for_stmt_copy (dts[1], vec_oprnds1[0]); | 7021 = vect_get_vec_def_for_stmt_copy (loop_vinfo, |
7022 vec_oprnds1[0]); | |
6416 if (op_type == ternary_op) | 7023 if (op_type == ternary_op) |
6417 { | 7024 { |
6418 if (single_defuse_cycle && reduc_index == 2) | 7025 if (single_defuse_cycle && reduc_index == 2) |
6419 vec_oprnds2[0] = gimple_assign_lhs (new_stmt); | 7026 vec_oprnds2[0] = gimple_get_lhs (new_stmt_info->stmt); |
6420 else | 7027 else |
6421 vec_oprnds2[0] | 7028 vec_oprnds2[0] |
6422 = vect_get_vec_def_for_stmt_copy (dts[2], vec_oprnds2[0]); | 7029 = vect_get_vec_def_for_stmt_copy (loop_vinfo, |
7030 vec_oprnds2[0]); | |
6423 } | 7031 } |
6424 } | 7032 } |
6425 } | 7033 } |
6426 | 7034 |
6427 FOR_EACH_VEC_ELT (vec_oprnds0, i, def0) | 7035 FOR_EACH_VEC_ELT (vec_oprnds0, i, def0) |
6428 { | 7036 { |
6429 tree vop[3] = { def0, vec_oprnds1[i], NULL_TREE }; | 7037 tree vop[3] = { def0, vec_oprnds1[i], NULL_TREE }; |
6430 if (op_type == ternary_op) | 7038 if (masked_loop_p) |
6431 vop[2] = vec_oprnds2[i]; | 7039 { |
6432 | 7040 /* Make sure that the reduction accumulator is vop[0]. */ |
6433 new_temp = make_ssa_name (vec_dest, new_stmt); | 7041 if (reduc_index == 1) |
6434 new_stmt = gimple_build_assign (new_temp, code, | 7042 { |
6435 vop[0], vop[1], vop[2]); | 7043 gcc_assert (commutative_tree_code (code)); |
6436 vect_finish_stmt_generation (stmt, new_stmt, gsi); | 7044 std::swap (vop[0], vop[1]); |
7045 } | |
7046 tree mask = vect_get_loop_mask (gsi, masks, vec_num * ncopies, | |
7047 vectype_in, i * ncopies + j); | |
7048 gcall *call = gimple_build_call_internal (cond_fn, 4, mask, | |
7049 vop[0], vop[1], | |
7050 vop[0]); | |
7051 new_temp = make_ssa_name (vec_dest, call); | |
7052 gimple_call_set_lhs (call, new_temp); | |
7053 gimple_call_set_nothrow (call, true); | |
7054 new_stmt_info | |
7055 = vect_finish_stmt_generation (stmt_info, call, gsi); | |
7056 } | |
7057 else | |
7058 { | |
7059 if (op_type == ternary_op) | |
7060 vop[2] = vec_oprnds2[i]; | |
7061 | |
7062 gassign *new_stmt = gimple_build_assign (vec_dest, code, | |
7063 vop[0], vop[1], vop[2]); | |
7064 new_temp = make_ssa_name (vec_dest, new_stmt); | |
7065 gimple_assign_set_lhs (new_stmt, new_temp); | |
7066 new_stmt_info | |
7067 = vect_finish_stmt_generation (stmt_info, new_stmt, gsi); | |
7068 } | |
6437 | 7069 |
6438 if (slp_node) | 7070 if (slp_node) |
6439 { | 7071 { |
6440 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); | 7072 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt_info); |
6441 vect_defs.quick_push (new_temp); | 7073 vect_defs.quick_push (new_temp); |
6442 } | 7074 } |
6443 else | 7075 else |
6444 vect_defs[0] = new_temp; | 7076 vect_defs[0] = new_temp; |
6445 } | 7077 } |
6446 | 7078 |
6447 if (slp_node) | 7079 if (slp_node) |
6448 continue; | 7080 continue; |
6449 | 7081 |
6450 if (j == 0) | 7082 if (j == 0) |
6451 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt; | 7083 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = new_stmt_info; |
6452 else | 7084 else |
6453 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt; | 7085 STMT_VINFO_RELATED_STMT (prev_stmt_info) = new_stmt_info; |
6454 | 7086 |
6455 prev_stmt_info = vinfo_for_stmt (new_stmt); | 7087 prev_stmt_info = new_stmt_info; |
6456 } | 7088 } |
6457 | 7089 |
6458 /* Finalize the reduction-phi (set its arguments) and create the | 7090 /* Finalize the reduction-phi (set its arguments) and create the |
6459 epilog reduction code. */ | 7091 epilog reduction code. */ |
6460 if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node) | 7092 if ((!single_defuse_cycle || code == COND_EXPR) && !slp_node) |
6461 vect_defs[0] = gimple_assign_lhs (*vec_stmt); | 7093 vect_defs[0] = gimple_get_lhs ((*vec_stmt)->stmt); |
6462 | 7094 |
6463 vect_create_epilog_for_reduction (vect_defs, stmt, reduc_def_stmt, | 7095 vect_create_epilog_for_reduction (vect_defs, stmt_info, reduc_def_phi, |
6464 epilog_copies, | 7096 epilog_copies, reduc_fn, phis, |
6465 epilog_reduc_code, phis, | 7097 double_reduc, slp_node, slp_node_instance, |
6466 double_reduc, slp_node, slp_node_instance); | 7098 cond_reduc_val, cond_reduc_op_code, |
7099 neutral_op); | |
6467 | 7100 |
6468 return true; | 7101 return true; |
6469 } | 7102 } |
6470 | 7103 |
6471 /* Function vect_min_worthwhile_factor. | 7104 /* Function vect_min_worthwhile_factor. |
6472 | 7105 |
6473 For a loop where we could vectorize the operation indicated by CODE, | 7106 For a loop where we could vectorize the operation indicated by CODE, |
6474 return the minimum vectorization factor that makes it worthwhile | 7107 return the minimum vectorization factor that makes it worthwhile |
6475 to use generic vectors. */ | 7108 to use generic vectors. */ |
6476 int | 7109 static unsigned int |
6477 vect_min_worthwhile_factor (enum tree_code code) | 7110 vect_min_worthwhile_factor (enum tree_code code) |
6478 { | 7111 { |
6479 switch (code) | 7112 switch (code) |
6480 { | 7113 { |
6481 case PLUS_EXPR: | 7114 case PLUS_EXPR: |
6500 | 7133 |
6501 bool | 7134 bool |
6502 vect_worthwhile_without_simd_p (vec_info *vinfo, tree_code code) | 7135 vect_worthwhile_without_simd_p (vec_info *vinfo, tree_code code) |
6503 { | 7136 { |
6504 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); | 7137 loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo); |
7138 unsigned HOST_WIDE_INT value; | |
6505 return (loop_vinfo | 7139 return (loop_vinfo |
6506 && (LOOP_VINFO_VECT_FACTOR (loop_vinfo) | 7140 && LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&value) |
6507 >= vect_min_worthwhile_factor (code))); | 7141 && value >= vect_min_worthwhile_factor (code)); |
6508 } | 7142 } |
6509 | 7143 |
6510 /* Function vectorizable_induction | 7144 /* Function vectorizable_induction |
6511 | 7145 |
6512 Check if PHI performs an induction computation that can be vectorized. | 7146 Check if STMT_INFO performs an induction computation that can be vectorized. |
6513 If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized | 7147 If VEC_STMT is also passed, vectorize the induction PHI: create a vectorized |
6514 phi to replace it, put it in VEC_STMT, and add it to the same basic block. | 7148 phi to replace it, put it in VEC_STMT, and add it to the same basic block. |
6515 Return FALSE if not a vectorizable STMT, TRUE otherwise. */ | 7149 Return true if STMT_INFO is vectorizable in this way. */ |
6516 | 7150 |
6517 bool | 7151 bool |
6518 vectorizable_induction (gimple *phi, | 7152 vectorizable_induction (stmt_vec_info stmt_info, |
6519 gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED, | 7153 gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED, |
6520 gimple **vec_stmt, slp_tree slp_node) | 7154 stmt_vec_info *vec_stmt, slp_tree slp_node, |
7155 stmt_vector_for_cost *cost_vec) | |
6521 { | 7156 { |
6522 stmt_vec_info stmt_info = vinfo_for_stmt (phi); | |
6523 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); | 7157 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); |
6524 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | 7158 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); |
6525 unsigned ncopies; | 7159 unsigned ncopies; |
6526 bool nested_in_vect_loop = false; | 7160 bool nested_in_vect_loop = false; |
6527 struct loop *iv_loop; | 7161 struct loop *iv_loop; |
6532 tree new_name; | 7166 tree new_name; |
6533 gimple *new_stmt; | 7167 gimple *new_stmt; |
6534 gphi *induction_phi; | 7168 gphi *induction_phi; |
6535 tree induc_def, vec_dest; | 7169 tree induc_def, vec_dest; |
6536 tree init_expr, step_expr; | 7170 tree init_expr, step_expr; |
6537 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); | 7171 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); |
6538 unsigned i; | 7172 unsigned i; |
6539 tree expr; | 7173 tree expr; |
6540 gimple_seq stmts; | 7174 gimple_seq stmts; |
6541 imm_use_iterator imm_iter; | 7175 imm_use_iterator imm_iter; |
6542 use_operand_p use_p; | 7176 use_operand_p use_p; |
6543 gimple *exit_phi; | 7177 gimple *exit_phi; |
6544 edge latch_e; | 7178 edge latch_e; |
6545 tree loop_arg; | 7179 tree loop_arg; |
6546 gimple_stmt_iterator si; | 7180 gimple_stmt_iterator si; |
6547 basic_block bb = gimple_bb (phi); | 7181 |
6548 | 7182 gphi *phi = dyn_cast <gphi *> (stmt_info->stmt); |
6549 if (gimple_code (phi) != GIMPLE_PHI) | 7183 if (!phi) |
6550 return false; | 7184 return false; |
6551 | 7185 |
6552 if (!STMT_VINFO_RELEVANT_P (stmt_info)) | 7186 if (!STMT_VINFO_RELEVANT_P (stmt_info)) |
6553 return false; | 7187 return false; |
6554 | 7188 |
6555 /* Make sure it was recognized as induction computation. */ | 7189 /* Make sure it was recognized as induction computation. */ |
6556 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def) | 7190 if (STMT_VINFO_DEF_TYPE (stmt_info) != vect_induction_def) |
6557 return false; | 7191 return false; |
6558 | 7192 |
6559 tree vectype = STMT_VINFO_VECTYPE (stmt_info); | 7193 tree vectype = STMT_VINFO_VECTYPE (stmt_info); |
6560 unsigned nunits = TYPE_VECTOR_SUBPARTS (vectype); | 7194 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); |
6561 | 7195 |
6562 if (slp_node) | 7196 if (slp_node) |
6563 ncopies = 1; | 7197 ncopies = 1; |
6564 else | 7198 else |
6565 ncopies = vect_get_num_copies (loop_vinfo, vectype); | 7199 ncopies = vect_get_num_copies (loop_vinfo, vectype); |
6566 gcc_assert (ncopies >= 1); | 7200 gcc_assert (ncopies >= 1); |
6567 | 7201 |
6568 /* FORNOW. These restrictions should be relaxed. */ | 7202 /* FORNOW. These restrictions should be relaxed. */ |
6569 if (nested_in_vect_loop_p (loop, phi)) | 7203 if (nested_in_vect_loop_p (loop, stmt_info)) |
6570 { | 7204 { |
6571 imm_use_iterator imm_iter; | 7205 imm_use_iterator imm_iter; |
6572 use_operand_p use_p; | 7206 use_operand_p use_p; |
6573 gimple *exit_phi; | 7207 gimple *exit_phi; |
6574 edge latch_e; | 7208 edge latch_e; |
6601 break; | 7235 break; |
6602 } | 7236 } |
6603 } | 7237 } |
6604 if (exit_phi) | 7238 if (exit_phi) |
6605 { | 7239 { |
6606 stmt_vec_info exit_phi_vinfo = vinfo_for_stmt (exit_phi); | 7240 stmt_vec_info exit_phi_vinfo = loop_vinfo->lookup_stmt (exit_phi); |
6607 if (!(STMT_VINFO_RELEVANT_P (exit_phi_vinfo) | 7241 if (!(STMT_VINFO_RELEVANT_P (exit_phi_vinfo) |
6608 && !STMT_VINFO_LIVE_P (exit_phi_vinfo))) | 7242 && !STMT_VINFO_LIVE_P (exit_phi_vinfo))) |
6609 { | 7243 { |
6610 if (dump_enabled_p ()) | 7244 if (dump_enabled_p ()) |
6611 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | 7245 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, |
6620 } | 7254 } |
6621 else | 7255 else |
6622 iv_loop = loop; | 7256 iv_loop = loop; |
6623 gcc_assert (iv_loop == (gimple_bb (phi))->loop_father); | 7257 gcc_assert (iv_loop == (gimple_bb (phi))->loop_father); |
6624 | 7258 |
7259 if (slp_node && !nunits.is_constant ()) | |
7260 { | |
7261 /* The current SLP code creates the initial value element-by-element. */ | |
7262 if (dump_enabled_p ()) | |
7263 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
7264 "SLP induction not supported for variable-length" | |
7265 " vectors.\n"); | |
7266 return false; | |
7267 } | |
7268 | |
6625 if (!vec_stmt) /* transformation not required. */ | 7269 if (!vec_stmt) /* transformation not required. */ |
6626 { | 7270 { |
6627 STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type; | 7271 STMT_VINFO_TYPE (stmt_info) = induc_vec_info_type; |
6628 if (dump_enabled_p ()) | 7272 DUMP_VECT_SCOPE ("vectorizable_induction"); |
6629 dump_printf_loc (MSG_NOTE, vect_location, | 7273 vect_model_induction_cost (stmt_info, ncopies, cost_vec); |
6630 "=== vectorizable_induction ===\n"); | |
6631 vect_model_induction_cost (stmt_info, ncopies); | |
6632 return true; | 7274 return true; |
6633 } | 7275 } |
6634 | 7276 |
6635 /* Transform. */ | 7277 /* Transform. */ |
6636 | 7278 |
6650 | 7292 |
6651 pe = loop_preheader_edge (iv_loop); | 7293 pe = loop_preheader_edge (iv_loop); |
6652 init_expr = PHI_ARG_DEF_FROM_EDGE (phi, | 7294 init_expr = PHI_ARG_DEF_FROM_EDGE (phi, |
6653 loop_preheader_edge (iv_loop)); | 7295 loop_preheader_edge (iv_loop)); |
6654 | 7296 |
7297 stmts = NULL; | |
7298 if (!nested_in_vect_loop) | |
7299 { | |
7300 /* Convert the initial value to the desired type. */ | |
7301 tree new_type = TREE_TYPE (vectype); | |
7302 init_expr = gimple_convert (&stmts, new_type, init_expr); | |
7303 | |
7304 /* If we are using the loop mask to "peel" for alignment then we need | |
7305 to adjust the start value here. */ | |
7306 tree skip_niters = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo); | |
7307 if (skip_niters != NULL_TREE) | |
7308 { | |
7309 if (FLOAT_TYPE_P (vectype)) | |
7310 skip_niters = gimple_build (&stmts, FLOAT_EXPR, new_type, | |
7311 skip_niters); | |
7312 else | |
7313 skip_niters = gimple_convert (&stmts, new_type, skip_niters); | |
7314 tree skip_step = gimple_build (&stmts, MULT_EXPR, new_type, | |
7315 skip_niters, step_expr); | |
7316 init_expr = gimple_build (&stmts, MINUS_EXPR, new_type, | |
7317 init_expr, skip_step); | |
7318 } | |
7319 } | |
7320 | |
6655 /* Convert the step to the desired type. */ | 7321 /* Convert the step to the desired type. */ |
6656 stmts = NULL; | |
6657 step_expr = gimple_convert (&stmts, TREE_TYPE (vectype), step_expr); | 7322 step_expr = gimple_convert (&stmts, TREE_TYPE (vectype), step_expr); |
7323 | |
6658 if (stmts) | 7324 if (stmts) |
6659 { | 7325 { |
6660 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); | 7326 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); |
6661 gcc_assert (!new_bb); | 7327 gcc_assert (!new_bb); |
6662 } | 7328 } |
6663 | 7329 |
6664 /* Find the first insertion point in the BB. */ | 7330 /* Find the first insertion point in the BB. */ |
7331 basic_block bb = gimple_bb (phi); | |
6665 si = gsi_after_labels (bb); | 7332 si = gsi_after_labels (bb); |
6666 | 7333 |
6667 /* For SLP induction we have to generate several IVs as for example | 7334 /* For SLP induction we have to generate several IVs as for example |
6668 with group size 3 we need [i, i, i, i + S] [i + S, i + S, i + 2*S, i + 2*S] | 7335 with group size 3 we need [i, i, i, i + S] [i + S, i + S, i + 2*S, i + 2*S] |
6669 [i + 2*S, i + 3*S, i + 3*S, i + 3*S]. The step is the same uniform | 7336 [i + 2*S, i + 3*S, i + 3*S, i + 3*S]. The step is the same uniform |
6670 [VF*S, VF*S, VF*S, VF*S] for all. */ | 7337 [VF*S, VF*S, VF*S, VF*S] for all. */ |
6671 if (slp_node) | 7338 if (slp_node) |
6672 { | 7339 { |
6673 /* Convert the init to the desired type. */ | 7340 /* Enforced above. */ |
6674 stmts = NULL; | 7341 unsigned int const_nunits = nunits.to_constant (); |
6675 init_expr = gimple_convert (&stmts, TREE_TYPE (vectype), init_expr); | |
6676 if (stmts) | |
6677 { | |
6678 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); | |
6679 gcc_assert (!new_bb); | |
6680 } | |
6681 | 7342 |
6682 /* Generate [VF*S, VF*S, ... ]. */ | 7343 /* Generate [VF*S, VF*S, ... ]. */ |
6683 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))) | 7344 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))) |
6684 { | 7345 { |
6685 expr = build_int_cst (integer_type_node, vf); | 7346 expr = build_int_cst (integer_type_node, vf); |
6688 else | 7349 else |
6689 expr = build_int_cst (TREE_TYPE (step_expr), vf); | 7350 expr = build_int_cst (TREE_TYPE (step_expr), vf); |
6690 new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr), | 7351 new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr), |
6691 expr, step_expr); | 7352 expr, step_expr); |
6692 if (! CONSTANT_CLASS_P (new_name)) | 7353 if (! CONSTANT_CLASS_P (new_name)) |
6693 new_name = vect_init_vector (phi, new_name, | 7354 new_name = vect_init_vector (stmt_info, new_name, |
6694 TREE_TYPE (step_expr), NULL); | 7355 TREE_TYPE (step_expr), NULL); |
6695 new_vec = build_vector_from_val (vectype, new_name); | 7356 new_vec = build_vector_from_val (vectype, new_name); |
6696 vec_step = vect_init_vector (phi, new_vec, vectype, NULL); | 7357 vec_step = vect_init_vector (stmt_info, new_vec, vectype, NULL); |
6697 | 7358 |
6698 /* Now generate the IVs. */ | 7359 /* Now generate the IVs. */ |
6699 unsigned group_size = SLP_TREE_SCALAR_STMTS (slp_node).length (); | 7360 unsigned group_size = SLP_TREE_SCALAR_STMTS (slp_node).length (); |
6700 unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); | 7361 unsigned nvects = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); |
6701 unsigned elts = nunits * nvects; | 7362 unsigned elts = const_nunits * nvects; |
6702 unsigned nivs = least_common_multiple (group_size, nunits) / nunits; | 7363 unsigned nivs = least_common_multiple (group_size, |
7364 const_nunits) / const_nunits; | |
6703 gcc_assert (elts % group_size == 0); | 7365 gcc_assert (elts % group_size == 0); |
6704 tree elt = init_expr; | 7366 tree elt = init_expr; |
6705 unsigned ivn; | 7367 unsigned ivn; |
6706 for (ivn = 0; ivn < nivs; ++ivn) | 7368 for (ivn = 0; ivn < nivs; ++ivn) |
6707 { | 7369 { |
6708 auto_vec<tree, 32> elts (nunits); | 7370 tree_vector_builder elts (vectype, const_nunits, 1); |
6709 stmts = NULL; | 7371 stmts = NULL; |
6710 for (unsigned eltn = 0; eltn < nunits; ++eltn) | 7372 for (unsigned eltn = 0; eltn < const_nunits; ++eltn) |
6711 { | 7373 { |
6712 if (ivn*nunits + eltn >= group_size | 7374 if (ivn*const_nunits + eltn >= group_size |
6713 && (ivn*nunits + eltn) % group_size == 0) | 7375 && (ivn * const_nunits + eltn) % group_size == 0) |
6714 elt = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (elt), | 7376 elt = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (elt), |
6715 elt, step_expr); | 7377 elt, step_expr); |
6716 elts.quick_push (elt); | 7378 elts.quick_push (elt); |
6717 } | 7379 } |
6718 vec_init = gimple_build_vector (&stmts, vectype, elts); | 7380 vec_init = gimple_build_vector (&stmts, &elts); |
6719 if (stmts) | 7381 if (stmts) |
6720 { | 7382 { |
6721 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); | 7383 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); |
6722 gcc_assert (!new_bb); | 7384 gcc_assert (!new_bb); |
6723 } | 7385 } |
6724 | 7386 |
6725 /* Create the induction-phi that defines the induction-operand. */ | 7387 /* Create the induction-phi that defines the induction-operand. */ |
6726 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_"); | 7388 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_"); |
6727 induction_phi = create_phi_node (vec_dest, iv_loop->header); | 7389 induction_phi = create_phi_node (vec_dest, iv_loop->header); |
6728 set_vinfo_for_stmt (induction_phi, | 7390 stmt_vec_info induction_phi_info |
6729 new_stmt_vec_info (induction_phi, loop_vinfo)); | 7391 = loop_vinfo->add_stmt (induction_phi); |
6730 induc_def = PHI_RESULT (induction_phi); | 7392 induc_def = PHI_RESULT (induction_phi); |
6731 | 7393 |
6732 /* Create the iv update inside the loop */ | 7394 /* Create the iv update inside the loop */ |
6733 vec_def = make_ssa_name (vec_dest); | 7395 vec_def = make_ssa_name (vec_dest); |
6734 new_stmt = gimple_build_assign (vec_def, PLUS_EXPR, induc_def, vec_step); | 7396 new_stmt = gimple_build_assign (vec_def, PLUS_EXPR, induc_def, vec_step); |
6735 gsi_insert_before (&si, new_stmt, GSI_SAME_STMT); | 7397 gsi_insert_before (&si, new_stmt, GSI_SAME_STMT); |
6736 set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo)); | 7398 loop_vinfo->add_stmt (new_stmt); |
6737 | 7399 |
6738 /* Set the arguments of the phi node: */ | 7400 /* Set the arguments of the phi node: */ |
6739 add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION); | 7401 add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION); |
6740 add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop), | 7402 add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop), |
6741 UNKNOWN_LOCATION); | 7403 UNKNOWN_LOCATION); |
6742 | 7404 |
6743 SLP_TREE_VEC_STMTS (slp_node).quick_push (induction_phi); | 7405 SLP_TREE_VEC_STMTS (slp_node).quick_push (induction_phi_info); |
6744 } | 7406 } |
6745 | 7407 |
6746 /* Re-use IVs when we can. */ | 7408 /* Re-use IVs when we can. */ |
6747 if (ivn < nvects) | 7409 if (ivn < nvects) |
6748 { | 7410 { |
6749 unsigned vfp | 7411 unsigned vfp |
6750 = least_common_multiple (group_size, nunits) / group_size; | 7412 = least_common_multiple (group_size, const_nunits) / group_size; |
6751 /* Generate [VF'*S, VF'*S, ... ]. */ | 7413 /* Generate [VF'*S, VF'*S, ... ]. */ |
6752 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))) | 7414 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))) |
6753 { | 7415 { |
6754 expr = build_int_cst (integer_type_node, vfp); | 7416 expr = build_int_cst (integer_type_node, vfp); |
6755 expr = fold_convert (TREE_TYPE (step_expr), expr); | 7417 expr = fold_convert (TREE_TYPE (step_expr), expr); |
6757 else | 7419 else |
6758 expr = build_int_cst (TREE_TYPE (step_expr), vfp); | 7420 expr = build_int_cst (TREE_TYPE (step_expr), vfp); |
6759 new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr), | 7421 new_name = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr), |
6760 expr, step_expr); | 7422 expr, step_expr); |
6761 if (! CONSTANT_CLASS_P (new_name)) | 7423 if (! CONSTANT_CLASS_P (new_name)) |
6762 new_name = vect_init_vector (phi, new_name, | 7424 new_name = vect_init_vector (stmt_info, new_name, |
6763 TREE_TYPE (step_expr), NULL); | 7425 TREE_TYPE (step_expr), NULL); |
6764 new_vec = build_vector_from_val (vectype, new_name); | 7426 new_vec = build_vector_from_val (vectype, new_name); |
6765 vec_step = vect_init_vector (phi, new_vec, vectype, NULL); | 7427 vec_step = vect_init_vector (stmt_info, new_vec, vectype, NULL); |
6766 for (; ivn < nvects; ++ivn) | 7428 for (; ivn < nvects; ++ivn) |
6767 { | 7429 { |
6768 gimple *iv = SLP_TREE_VEC_STMTS (slp_node)[ivn - nivs]; | 7430 gimple *iv = SLP_TREE_VEC_STMTS (slp_node)[ivn - nivs]->stmt; |
6769 tree def; | 7431 tree def; |
6770 if (gimple_code (iv) == GIMPLE_PHI) | 7432 if (gimple_code (iv) == GIMPLE_PHI) |
6771 def = gimple_phi_result (iv); | 7433 def = gimple_phi_result (iv); |
6772 else | 7434 else |
6773 def = gimple_assign_lhs (iv); | 7435 def = gimple_assign_lhs (iv); |
6779 else | 7441 else |
6780 { | 7442 { |
6781 gimple_stmt_iterator tgsi = gsi_for_stmt (iv); | 7443 gimple_stmt_iterator tgsi = gsi_for_stmt (iv); |
6782 gsi_insert_after (&tgsi, new_stmt, GSI_CONTINUE_LINKING); | 7444 gsi_insert_after (&tgsi, new_stmt, GSI_CONTINUE_LINKING); |
6783 } | 7445 } |
6784 set_vinfo_for_stmt (new_stmt, | 7446 SLP_TREE_VEC_STMTS (slp_node).quick_push |
6785 new_stmt_vec_info (new_stmt, loop_vinfo)); | 7447 (loop_vinfo->add_stmt (new_stmt)); |
6786 SLP_TREE_VEC_STMTS (slp_node).quick_push (new_stmt); | |
6787 } | 7448 } |
6788 } | 7449 } |
6789 | 7450 |
6790 return true; | 7451 return true; |
6791 } | 7452 } |
6794 if (nested_in_vect_loop) | 7455 if (nested_in_vect_loop) |
6795 { | 7456 { |
6796 /* iv_loop is nested in the loop to be vectorized. init_expr had already | 7457 /* iv_loop is nested in the loop to be vectorized. init_expr had already |
6797 been created during vectorization of previous stmts. We obtain it | 7458 been created during vectorization of previous stmts. We obtain it |
6798 from the STMT_VINFO_VEC_STMT of the defining stmt. */ | 7459 from the STMT_VINFO_VEC_STMT of the defining stmt. */ |
6799 vec_init = vect_get_vec_def_for_operand (init_expr, phi); | 7460 vec_init = vect_get_vec_def_for_operand (init_expr, stmt_info); |
6800 /* If the initial value is not of proper type, convert it. */ | 7461 /* If the initial value is not of proper type, convert it. */ |
6801 if (!useless_type_conversion_p (vectype, TREE_TYPE (vec_init))) | 7462 if (!useless_type_conversion_p (vectype, TREE_TYPE (vec_init))) |
6802 { | 7463 { |
6803 new_stmt | 7464 new_stmt |
6804 = gimple_build_assign (vect_get_new_ssa_name (vectype, | 7465 = gimple_build_assign (vect_get_new_ssa_name (vectype, |
6809 vec_init)); | 7470 vec_init)); |
6810 vec_init = gimple_assign_lhs (new_stmt); | 7471 vec_init = gimple_assign_lhs (new_stmt); |
6811 new_bb = gsi_insert_on_edge_immediate (loop_preheader_edge (iv_loop), | 7472 new_bb = gsi_insert_on_edge_immediate (loop_preheader_edge (iv_loop), |
6812 new_stmt); | 7473 new_stmt); |
6813 gcc_assert (!new_bb); | 7474 gcc_assert (!new_bb); |
6814 set_vinfo_for_stmt (new_stmt, | 7475 loop_vinfo->add_stmt (new_stmt); |
6815 new_stmt_vec_info (new_stmt, loop_vinfo)); | |
6816 } | 7476 } |
6817 } | 7477 } |
6818 else | 7478 else |
6819 { | 7479 { |
6820 /* iv_loop is the loop to be vectorized. Create: | 7480 /* iv_loop is the loop to be vectorized. Create: |
6821 vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr) */ | 7481 vec_init = [X, X+S, X+2*S, X+3*S] (S = step_expr, X = init_expr) */ |
6822 stmts = NULL; | 7482 stmts = NULL; |
6823 new_name = gimple_convert (&stmts, TREE_TYPE (vectype), init_expr); | 7483 new_name = gimple_convert (&stmts, TREE_TYPE (vectype), init_expr); |
6824 | 7484 |
6825 auto_vec<tree, 32> elts (nunits); | 7485 unsigned HOST_WIDE_INT const_nunits; |
6826 elts.quick_push (new_name); | 7486 if (nunits.is_constant (&const_nunits)) |
6827 for (i = 1; i < nunits; i++) | 7487 { |
6828 { | 7488 tree_vector_builder elts (vectype, const_nunits, 1); |
6829 /* Create: new_name_i = new_name + step_expr */ | |
6830 new_name = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (new_name), | |
6831 new_name, step_expr); | |
6832 elts.quick_push (new_name); | 7489 elts.quick_push (new_name); |
6833 } | 7490 for (i = 1; i < const_nunits; i++) |
6834 /* Create a vector from [new_name_0, new_name_1, ..., | 7491 { |
6835 new_name_nunits-1] */ | 7492 /* Create: new_name_i = new_name + step_expr */ |
6836 vec_init = gimple_build_vector (&stmts, vectype, elts); | 7493 new_name = gimple_build (&stmts, PLUS_EXPR, TREE_TYPE (new_name), |
7494 new_name, step_expr); | |
7495 elts.quick_push (new_name); | |
7496 } | |
7497 /* Create a vector from [new_name_0, new_name_1, ..., | |
7498 new_name_nunits-1] */ | |
7499 vec_init = gimple_build_vector (&stmts, &elts); | |
7500 } | |
7501 else if (INTEGRAL_TYPE_P (TREE_TYPE (step_expr))) | |
7502 /* Build the initial value directly from a VEC_SERIES_EXPR. */ | |
7503 vec_init = gimple_build (&stmts, VEC_SERIES_EXPR, vectype, | |
7504 new_name, step_expr); | |
7505 else | |
7506 { | |
7507 /* Build: | |
7508 [base, base, base, ...] | |
7509 + (vectype) [0, 1, 2, ...] * [step, step, step, ...]. */ | |
7510 gcc_assert (SCALAR_FLOAT_TYPE_P (TREE_TYPE (step_expr))); | |
7511 gcc_assert (flag_associative_math); | |
7512 tree index = build_index_vector (vectype, 0, 1); | |
7513 tree base_vec = gimple_build_vector_from_val (&stmts, vectype, | |
7514 new_name); | |
7515 tree step_vec = gimple_build_vector_from_val (&stmts, vectype, | |
7516 step_expr); | |
7517 vec_init = gimple_build (&stmts, FLOAT_EXPR, vectype, index); | |
7518 vec_init = gimple_build (&stmts, MULT_EXPR, vectype, | |
7519 vec_init, step_vec); | |
7520 vec_init = gimple_build (&stmts, PLUS_EXPR, vectype, | |
7521 vec_init, base_vec); | |
7522 } | |
7523 | |
6837 if (stmts) | 7524 if (stmts) |
6838 { | 7525 { |
6839 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); | 7526 new_bb = gsi_insert_seq_on_edge_immediate (pe, stmts); |
6840 gcc_assert (!new_bb); | 7527 gcc_assert (!new_bb); |
6841 } | 7528 } |
6870 | 7557 |
6871 t = unshare_expr (new_name); | 7558 t = unshare_expr (new_name); |
6872 gcc_assert (CONSTANT_CLASS_P (new_name) | 7559 gcc_assert (CONSTANT_CLASS_P (new_name) |
6873 || TREE_CODE (new_name) == SSA_NAME); | 7560 || TREE_CODE (new_name) == SSA_NAME); |
6874 new_vec = build_vector_from_val (vectype, t); | 7561 new_vec = build_vector_from_val (vectype, t); |
6875 vec_step = vect_init_vector (phi, new_vec, vectype, NULL); | 7562 vec_step = vect_init_vector (stmt_info, new_vec, vectype, NULL); |
6876 | 7563 |
6877 | 7564 |
6878 /* Create the following def-use cycle: | 7565 /* Create the following def-use cycle: |
6879 loop prolog: | 7566 loop prolog: |
6880 vec_init = ... | 7567 vec_init = ... |
6887 vec_loop = vec_iv + vec_step; */ | 7574 vec_loop = vec_iv + vec_step; */ |
6888 | 7575 |
6889 /* Create the induction-phi that defines the induction-operand. */ | 7576 /* Create the induction-phi that defines the induction-operand. */ |
6890 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_"); | 7577 vec_dest = vect_get_new_vect_var (vectype, vect_simple_var, "vec_iv_"); |
6891 induction_phi = create_phi_node (vec_dest, iv_loop->header); | 7578 induction_phi = create_phi_node (vec_dest, iv_loop->header); |
6892 set_vinfo_for_stmt (induction_phi, | 7579 stmt_vec_info induction_phi_info = loop_vinfo->add_stmt (induction_phi); |
6893 new_stmt_vec_info (induction_phi, loop_vinfo)); | |
6894 induc_def = PHI_RESULT (induction_phi); | 7580 induc_def = PHI_RESULT (induction_phi); |
6895 | 7581 |
6896 /* Create the iv update inside the loop */ | 7582 /* Create the iv update inside the loop */ |
6897 vec_def = make_ssa_name (vec_dest); | 7583 vec_def = make_ssa_name (vec_dest); |
6898 new_stmt = gimple_build_assign (vec_def, PLUS_EXPR, induc_def, vec_step); | 7584 new_stmt = gimple_build_assign (vec_def, PLUS_EXPR, induc_def, vec_step); |
6899 gsi_insert_before (&si, new_stmt, GSI_SAME_STMT); | 7585 gsi_insert_before (&si, new_stmt, GSI_SAME_STMT); |
6900 set_vinfo_for_stmt (new_stmt, new_stmt_vec_info (new_stmt, loop_vinfo)); | 7586 stmt_vec_info new_stmt_info = loop_vinfo->add_stmt (new_stmt); |
6901 | 7587 |
6902 /* Set the arguments of the phi node: */ | 7588 /* Set the arguments of the phi node: */ |
6903 add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION); | 7589 add_phi_arg (induction_phi, vec_init, pe, UNKNOWN_LOCATION); |
6904 add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop), | 7590 add_phi_arg (induction_phi, vec_def, loop_latch_edge (iv_loop), |
6905 UNKNOWN_LOCATION); | 7591 UNKNOWN_LOCATION); |
6906 | 7592 |
6907 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = induction_phi; | 7593 STMT_VINFO_VEC_STMT (stmt_info) = *vec_stmt = induction_phi_info; |
6908 | 7594 |
6909 /* In case that vectorization factor (VF) is bigger than the number | 7595 /* In case that vectorization factor (VF) is bigger than the number |
6910 of elements that we can fit in a vectype (nunits), we have to generate | 7596 of elements that we can fit in a vectype (nunits), we have to generate |
6911 more than one vector stmt - i.e - we need to "unroll" the | 7597 more than one vector stmt - i.e - we need to "unroll" the |
6912 vector stmt by a factor VF/nunits. For more details see documentation | 7598 vector stmt by a factor VF/nunits. For more details see documentation |
6937 | 7623 |
6938 t = unshare_expr (new_name); | 7624 t = unshare_expr (new_name); |
6939 gcc_assert (CONSTANT_CLASS_P (new_name) | 7625 gcc_assert (CONSTANT_CLASS_P (new_name) |
6940 || TREE_CODE (new_name) == SSA_NAME); | 7626 || TREE_CODE (new_name) == SSA_NAME); |
6941 new_vec = build_vector_from_val (vectype, t); | 7627 new_vec = build_vector_from_val (vectype, t); |
6942 vec_step = vect_init_vector (phi, new_vec, vectype, NULL); | 7628 vec_step = vect_init_vector (stmt_info, new_vec, vectype, NULL); |
6943 | 7629 |
6944 vec_def = induc_def; | 7630 vec_def = induc_def; |
6945 prev_stmt_vinfo = vinfo_for_stmt (induction_phi); | 7631 prev_stmt_vinfo = induction_phi_info; |
6946 for (i = 1; i < ncopies; i++) | 7632 for (i = 1; i < ncopies; i++) |
6947 { | 7633 { |
6948 /* vec_i = vec_prev + vec_step */ | 7634 /* vec_i = vec_prev + vec_step */ |
6949 new_stmt = gimple_build_assign (vec_dest, PLUS_EXPR, | 7635 new_stmt = gimple_build_assign (vec_dest, PLUS_EXPR, |
6950 vec_def, vec_step); | 7636 vec_def, vec_step); |
6951 vec_def = make_ssa_name (vec_dest, new_stmt); | 7637 vec_def = make_ssa_name (vec_dest, new_stmt); |
6952 gimple_assign_set_lhs (new_stmt, vec_def); | 7638 gimple_assign_set_lhs (new_stmt, vec_def); |
6953 | 7639 |
6954 gsi_insert_before (&si, new_stmt, GSI_SAME_STMT); | 7640 gsi_insert_before (&si, new_stmt, GSI_SAME_STMT); |
6955 set_vinfo_for_stmt (new_stmt, | 7641 new_stmt_info = loop_vinfo->add_stmt (new_stmt); |
6956 new_stmt_vec_info (new_stmt, loop_vinfo)); | 7642 STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt_info; |
6957 STMT_VINFO_RELATED_STMT (prev_stmt_vinfo) = new_stmt; | 7643 prev_stmt_vinfo = new_stmt_info; |
6958 prev_stmt_vinfo = vinfo_for_stmt (new_stmt); | |
6959 } | 7644 } |
6960 } | 7645 } |
6961 | 7646 |
6962 if (nested_in_vect_loop) | 7647 if (nested_in_vect_loop) |
6963 { | 7648 { |
6976 break; | 7661 break; |
6977 } | 7662 } |
6978 } | 7663 } |
6979 if (exit_phi) | 7664 if (exit_phi) |
6980 { | 7665 { |
6981 stmt_vec_info stmt_vinfo = vinfo_for_stmt (exit_phi); | 7666 stmt_vec_info stmt_vinfo = loop_vinfo->lookup_stmt (exit_phi); |
6982 /* FORNOW. Currently not supporting the case that an inner-loop induction | 7667 /* FORNOW. Currently not supporting the case that an inner-loop induction |
6983 is not used in the outer-loop (i.e. only outside the outer-loop). */ | 7668 is not used in the outer-loop (i.e. only outside the outer-loop). */ |
6984 gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo) | 7669 gcc_assert (STMT_VINFO_RELEVANT_P (stmt_vinfo) |
6985 && !STMT_VINFO_LIVE_P (stmt_vinfo)); | 7670 && !STMT_VINFO_LIVE_P (stmt_vinfo)); |
6986 | 7671 |
6987 STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt; | 7672 STMT_VINFO_VEC_STMT (stmt_vinfo) = new_stmt_info; |
6988 if (dump_enabled_p ()) | 7673 if (dump_enabled_p ()) |
6989 { | 7674 dump_printf_loc (MSG_NOTE, vect_location, |
6990 dump_printf_loc (MSG_NOTE, vect_location, | 7675 "vector of inductions after inner-loop:%G", |
6991 "vector of inductions after inner-loop:"); | 7676 new_stmt); |
6992 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, new_stmt, 0); | |
6993 } | |
6994 } | 7677 } |
6995 } | 7678 } |
6996 | 7679 |
6997 | 7680 |
6998 if (dump_enabled_p ()) | 7681 if (dump_enabled_p ()) |
6999 { | 7682 dump_printf_loc (MSG_NOTE, vect_location, |
7000 dump_printf_loc (MSG_NOTE, vect_location, | 7683 "transform induction: created def-use cycle: %G%G", |
7001 "transform induction: created def-use cycle: "); | 7684 induction_phi, SSA_NAME_DEF_STMT (vec_def)); |
7002 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, induction_phi, 0); | |
7003 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, | |
7004 SSA_NAME_DEF_STMT (vec_def), 0); | |
7005 } | |
7006 | 7685 |
7007 return true; | 7686 return true; |
7008 } | 7687 } |
7009 | 7688 |
7010 /* Function vectorizable_live_operation. | 7689 /* Function vectorizable_live_operation. |
7011 | 7690 |
7012 STMT computes a value that is used outside the loop. Check if | 7691 STMT_INFO computes a value that is used outside the loop. Check if |
7013 it can be supported. */ | 7692 it can be supported. */ |
7014 | 7693 |
7015 bool | 7694 bool |
7016 vectorizable_live_operation (gimple *stmt, | 7695 vectorizable_live_operation (stmt_vec_info stmt_info, |
7017 gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED, | 7696 gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED, |
7018 slp_tree slp_node, int slp_index, | 7697 slp_tree slp_node, int slp_index, |
7019 gimple **vec_stmt) | 7698 stmt_vec_info *vec_stmt, |
7699 stmt_vector_for_cost *) | |
7020 { | 7700 { |
7021 stmt_vec_info stmt_info = vinfo_for_stmt (stmt); | |
7022 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); | 7701 loop_vec_info loop_vinfo = STMT_VINFO_LOOP_VINFO (stmt_info); |
7023 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | 7702 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); |
7024 imm_use_iterator imm_iter; | 7703 imm_use_iterator imm_iter; |
7025 tree lhs, lhs_type, bitsize, vec_bitsize; | 7704 tree lhs, lhs_type, bitsize, vec_bitsize; |
7026 tree vectype = STMT_VINFO_VECTYPE (stmt_info); | 7705 tree vectype = STMT_VINFO_VECTYPE (stmt_info); |
7027 int nunits = TYPE_VECTOR_SUBPARTS (vectype); | 7706 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (vectype); |
7028 int ncopies; | 7707 int ncopies; |
7029 gimple *use_stmt; | 7708 gimple *use_stmt; |
7030 auto_vec<tree> vec_oprnds; | 7709 auto_vec<tree> vec_oprnds; |
7710 int vec_entry = 0; | |
7711 poly_uint64 vec_index = 0; | |
7031 | 7712 |
7032 gcc_assert (STMT_VINFO_LIVE_P (stmt_info)); | 7713 gcc_assert (STMT_VINFO_LIVE_P (stmt_info)); |
7033 | 7714 |
7034 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def) | 7715 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def) |
7035 return false; | 7716 return false; |
7036 | 7717 |
7037 /* FORNOW. CHECKME. */ | 7718 /* FORNOW. CHECKME. */ |
7038 if (nested_in_vect_loop_p (loop, stmt)) | 7719 if (nested_in_vect_loop_p (loop, stmt_info)) |
7039 return false; | 7720 return false; |
7040 | 7721 |
7041 /* If STMT is not relevant and it is a simple assignment and its inputs are | 7722 /* If STMT is not relevant and it is a simple assignment and its inputs are |
7042 invariant then it can remain in place, unvectorized. The original last | 7723 invariant then it can remain in place, unvectorized. The original last |
7043 scalar value that it computes will be used. */ | 7724 scalar value that it computes will be used. */ |
7044 if (!STMT_VINFO_RELEVANT_P (stmt_info)) | 7725 if (!STMT_VINFO_RELEVANT_P (stmt_info)) |
7045 { | 7726 { |
7046 gcc_assert (is_simple_and_all_uses_invariant (stmt, loop_vinfo)); | 7727 gcc_assert (is_simple_and_all_uses_invariant (stmt_info, loop_vinfo)); |
7047 if (dump_enabled_p ()) | 7728 if (dump_enabled_p ()) |
7048 dump_printf_loc (MSG_NOTE, vect_location, | 7729 dump_printf_loc (MSG_NOTE, vect_location, |
7049 "statement is simple and uses invariant. Leaving in " | 7730 "statement is simple and uses invariant. Leaving in " |
7050 "place.\n"); | 7731 "place.\n"); |
7051 return true; | 7732 return true; |
7054 if (slp_node) | 7735 if (slp_node) |
7055 ncopies = 1; | 7736 ncopies = 1; |
7056 else | 7737 else |
7057 ncopies = vect_get_num_copies (loop_vinfo, vectype); | 7738 ncopies = vect_get_num_copies (loop_vinfo, vectype); |
7058 | 7739 |
7740 if (slp_node) | |
7741 { | |
7742 gcc_assert (slp_index >= 0); | |
7743 | |
7744 int num_scalar = SLP_TREE_SCALAR_STMTS (slp_node).length (); | |
7745 int num_vec = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); | |
7746 | |
7747 /* Get the last occurrence of the scalar index from the concatenation of | |
7748 all the slp vectors. Calculate which slp vector it is and the index | |
7749 within. */ | |
7750 poly_uint64 pos = (num_vec * nunits) - num_scalar + slp_index; | |
7751 | |
7752 /* Calculate which vector contains the result, and which lane of | |
7753 that vector we need. */ | |
7754 if (!can_div_trunc_p (pos, nunits, &vec_entry, &vec_index)) | |
7755 { | |
7756 if (dump_enabled_p ()) | |
7757 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
7758 "Cannot determine which vector holds the" | |
7759 " final result.\n"); | |
7760 return false; | |
7761 } | |
7762 } | |
7763 | |
7059 if (!vec_stmt) | 7764 if (!vec_stmt) |
7060 /* No transformation required. */ | 7765 { |
7061 return true; | 7766 /* No transformation required. */ |
7062 | 7767 if (LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo)) |
7063 /* If stmt has a related stmt, then use that for getting the lhs. */ | 7768 { |
7064 if (is_pattern_stmt_p (stmt_info)) | 7769 if (!direct_internal_fn_supported_p (IFN_EXTRACT_LAST, vectype, |
7065 stmt = STMT_VINFO_RELATED_STMT (stmt_info); | 7770 OPTIMIZE_FOR_SPEED)) |
7771 { | |
7772 if (dump_enabled_p ()) | |
7773 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
7774 "can't use a fully-masked loop because " | |
7775 "the target doesn't support extract last " | |
7776 "reduction.\n"); | |
7777 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; | |
7778 } | |
7779 else if (slp_node) | |
7780 { | |
7781 if (dump_enabled_p ()) | |
7782 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
7783 "can't use a fully-masked loop because an " | |
7784 "SLP statement is live after the loop.\n"); | |
7785 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; | |
7786 } | |
7787 else if (ncopies > 1) | |
7788 { | |
7789 if (dump_enabled_p ()) | |
7790 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
7791 "can't use a fully-masked loop because" | |
7792 " ncopies is greater than 1.\n"); | |
7793 LOOP_VINFO_CAN_FULLY_MASK_P (loop_vinfo) = false; | |
7794 } | |
7795 else | |
7796 { | |
7797 gcc_assert (ncopies == 1 && !slp_node); | |
7798 vect_record_loop_mask (loop_vinfo, | |
7799 &LOOP_VINFO_MASKS (loop_vinfo), | |
7800 1, vectype); | |
7801 } | |
7802 } | |
7803 return true; | |
7804 } | |
7805 | |
7806 /* Use the lhs of the original scalar statement. */ | |
7807 gimple *stmt = vect_orig_stmt (stmt_info)->stmt; | |
7066 | 7808 |
7067 lhs = (is_a <gphi *> (stmt)) ? gimple_phi_result (stmt) | 7809 lhs = (is_a <gphi *> (stmt)) ? gimple_phi_result (stmt) |
7068 : gimple_get_lhs (stmt); | 7810 : gimple_get_lhs (stmt); |
7069 lhs_type = TREE_TYPE (lhs); | 7811 lhs_type = TREE_TYPE (lhs); |
7070 | 7812 |
7075 | 7817 |
7076 /* Get the vectorized lhs of STMT and the lane to use (counted in bits). */ | 7818 /* Get the vectorized lhs of STMT and the lane to use (counted in bits). */ |
7077 tree vec_lhs, bitstart; | 7819 tree vec_lhs, bitstart; |
7078 if (slp_node) | 7820 if (slp_node) |
7079 { | 7821 { |
7080 gcc_assert (slp_index >= 0); | 7822 gcc_assert (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)); |
7081 | |
7082 int num_scalar = SLP_TREE_SCALAR_STMTS (slp_node).length (); | |
7083 int num_vec = SLP_TREE_NUMBER_OF_VEC_STMTS (slp_node); | |
7084 | |
7085 /* Get the last occurrence of the scalar index from the concatenation of | |
7086 all the slp vectors. Calculate which slp vector it is and the index | |
7087 within. */ | |
7088 int pos = (num_vec * nunits) - num_scalar + slp_index; | |
7089 int vec_entry = pos / nunits; | |
7090 int vec_index = pos % nunits; | |
7091 | 7823 |
7092 /* Get the correct slp vectorized stmt. */ | 7824 /* Get the correct slp vectorized stmt. */ |
7093 vec_lhs = gimple_get_lhs (SLP_TREE_VEC_STMTS (slp_node)[vec_entry]); | 7825 gimple *vec_stmt = SLP_TREE_VEC_STMTS (slp_node)[vec_entry]->stmt; |
7826 if (gphi *phi = dyn_cast <gphi *> (vec_stmt)) | |
7827 vec_lhs = gimple_phi_result (phi); | |
7828 else | |
7829 vec_lhs = gimple_get_lhs (vec_stmt); | |
7094 | 7830 |
7095 /* Get entry to use. */ | 7831 /* Get entry to use. */ |
7096 bitstart = bitsize_int (vec_index); | 7832 bitstart = bitsize_int (vec_index); |
7097 bitstart = int_const_binop (MULT_EXPR, bitsize, bitstart); | 7833 bitstart = int_const_binop (MULT_EXPR, bitsize, bitstart); |
7098 } | 7834 } |
7099 else | 7835 else |
7100 { | 7836 { |
7101 enum vect_def_type dt = STMT_VINFO_DEF_TYPE (stmt_info); | 7837 enum vect_def_type dt = STMT_VINFO_DEF_TYPE (stmt_info); |
7102 vec_lhs = vect_get_vec_def_for_operand_1 (stmt, dt); | 7838 vec_lhs = vect_get_vec_def_for_operand_1 (stmt_info, dt); |
7839 gcc_checking_assert (ncopies == 1 | |
7840 || !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)); | |
7103 | 7841 |
7104 /* For multiple copies, get the last copy. */ | 7842 /* For multiple copies, get the last copy. */ |
7105 for (int i = 1; i < ncopies; ++i) | 7843 for (int i = 1; i < ncopies; ++i) |
7106 vec_lhs = vect_get_vec_def_for_stmt_copy (vect_unknown_def_type, | 7844 vec_lhs = vect_get_vec_def_for_stmt_copy (loop_vinfo, vec_lhs); |
7107 vec_lhs); | |
7108 | 7845 |
7109 /* Get the last lane in the vector. */ | 7846 /* Get the last lane in the vector. */ |
7110 bitstart = int_const_binop (MINUS_EXPR, vec_bitsize, bitsize); | 7847 bitstart = int_const_binop (MINUS_EXPR, vec_bitsize, bitsize); |
7111 } | 7848 } |
7112 | 7849 |
7113 /* Create a new vectorized stmt for the uses of STMT and insert outside the | |
7114 loop. */ | |
7115 gimple_seq stmts = NULL; | 7850 gimple_seq stmts = NULL; |
7116 tree bftype = TREE_TYPE (vectype); | 7851 tree new_tree; |
7117 if (VECTOR_BOOLEAN_TYPE_P (vectype)) | 7852 if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)) |
7118 bftype = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 1); | 7853 { |
7119 tree new_tree = build3 (BIT_FIELD_REF, bftype, vec_lhs, bitsize, bitstart); | 7854 /* Emit: |
7120 new_tree = force_gimple_operand (fold_convert (lhs_type, new_tree), &stmts, | 7855 |
7121 true, NULL_TREE); | 7856 SCALAR_RES = EXTRACT_LAST <VEC_LHS, MASK> |
7857 | |
7858 where VEC_LHS is the vectorized live-out result and MASK is | |
7859 the loop mask for the final iteration. */ | |
7860 gcc_assert (ncopies == 1 && !slp_node); | |
7861 tree scalar_type = TREE_TYPE (STMT_VINFO_VECTYPE (stmt_info)); | |
7862 tree mask = vect_get_loop_mask (gsi, &LOOP_VINFO_MASKS (loop_vinfo), | |
7863 1, vectype, 0); | |
7864 tree scalar_res = gimple_build (&stmts, CFN_EXTRACT_LAST, | |
7865 scalar_type, mask, vec_lhs); | |
7866 | |
7867 /* Convert the extracted vector element to the required scalar type. */ | |
7868 new_tree = gimple_convert (&stmts, lhs_type, scalar_res); | |
7869 } | |
7870 else | |
7871 { | |
7872 tree bftype = TREE_TYPE (vectype); | |
7873 if (VECTOR_BOOLEAN_TYPE_P (vectype)) | |
7874 bftype = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 1); | |
7875 new_tree = build3 (BIT_FIELD_REF, bftype, vec_lhs, bitsize, bitstart); | |
7876 new_tree = force_gimple_operand (fold_convert (lhs_type, new_tree), | |
7877 &stmts, true, NULL_TREE); | |
7878 } | |
7879 | |
7122 if (stmts) | 7880 if (stmts) |
7123 gsi_insert_seq_on_edge_immediate (single_exit (loop), stmts); | 7881 gsi_insert_seq_on_edge_immediate (single_exit (loop), stmts); |
7124 | 7882 |
7125 /* Replace use of lhs with newly computed result. If the use stmt is a | 7883 /* Replace use of lhs with newly computed result. If the use stmt is a |
7126 single arg PHI, just replace all uses of PHI result. It's necessary | 7884 single arg PHI, just replace all uses of PHI result. It's necessary |
7144 } | 7902 } |
7145 | 7903 |
7146 return true; | 7904 return true; |
7147 } | 7905 } |
7148 | 7906 |
7149 /* Kill any debug uses outside LOOP of SSA names defined in STMT. */ | 7907 /* Kill any debug uses outside LOOP of SSA names defined in STMT_INFO. */ |
7150 | 7908 |
7151 static void | 7909 static void |
7152 vect_loop_kill_debug_uses (struct loop *loop, gimple *stmt) | 7910 vect_loop_kill_debug_uses (struct loop *loop, stmt_vec_info stmt_info) |
7153 { | 7911 { |
7154 ssa_op_iter op_iter; | 7912 ssa_op_iter op_iter; |
7155 imm_use_iterator imm_iter; | 7913 imm_use_iterator imm_iter; |
7156 def_operand_p def_p; | 7914 def_operand_p def_p; |
7157 gimple *ustmt; | 7915 gimple *ustmt; |
7158 | 7916 |
7159 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, op_iter, SSA_OP_DEF) | 7917 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt_info->stmt, op_iter, SSA_OP_DEF) |
7160 { | 7918 { |
7161 FOR_EACH_IMM_USE_STMT (ustmt, imm_iter, DEF_FROM_PTR (def_p)) | 7919 FOR_EACH_IMM_USE_STMT (ustmt, imm_iter, DEF_FROM_PTR (def_p)) |
7162 { | 7920 { |
7163 basic_block bb; | 7921 basic_block bb; |
7164 | 7922 |
7216 return true; | 7974 return true; |
7217 } | 7975 } |
7218 return false; | 7976 return false; |
7219 } | 7977 } |
7220 | 7978 |
7979 /* Return a mask type with half the number of elements as TYPE. */ | |
7980 | |
7981 tree | |
7982 vect_halve_mask_nunits (tree type) | |
7983 { | |
7984 poly_uint64 nunits = exact_div (TYPE_VECTOR_SUBPARTS (type), 2); | |
7985 return build_truth_vector_type (nunits, current_vector_size); | |
7986 } | |
7987 | |
7988 /* Return a mask type with twice as many elements as TYPE. */ | |
7989 | |
7990 tree | |
7991 vect_double_mask_nunits (tree type) | |
7992 { | |
7993 poly_uint64 nunits = TYPE_VECTOR_SUBPARTS (type) * 2; | |
7994 return build_truth_vector_type (nunits, current_vector_size); | |
7995 } | |
7996 | |
7997 /* Record that a fully-masked version of LOOP_VINFO would need MASKS to | |
7998 contain a sequence of NVECTORS masks that each control a vector of type | |
7999 VECTYPE. */ | |
8000 | |
8001 void | |
8002 vect_record_loop_mask (loop_vec_info loop_vinfo, vec_loop_masks *masks, | |
8003 unsigned int nvectors, tree vectype) | |
8004 { | |
8005 gcc_assert (nvectors != 0); | |
8006 if (masks->length () < nvectors) | |
8007 masks->safe_grow_cleared (nvectors); | |
8008 rgroup_masks *rgm = &(*masks)[nvectors - 1]; | |
8009 /* The number of scalars per iteration and the number of vectors are | |
8010 both compile-time constants. */ | |
8011 unsigned int nscalars_per_iter | |
8012 = exact_div (nvectors * TYPE_VECTOR_SUBPARTS (vectype), | |
8013 LOOP_VINFO_VECT_FACTOR (loop_vinfo)).to_constant (); | |
8014 if (rgm->max_nscalars_per_iter < nscalars_per_iter) | |
8015 { | |
8016 rgm->max_nscalars_per_iter = nscalars_per_iter; | |
8017 rgm->mask_type = build_same_sized_truth_vector_type (vectype); | |
8018 } | |
8019 } | |
8020 | |
8021 /* Given a complete set of masks MASKS, extract mask number INDEX | |
8022 for an rgroup that operates on NVECTORS vectors of type VECTYPE, | |
8023 where 0 <= INDEX < NVECTORS. Insert any set-up statements before GSI. | |
8024 | |
8025 See the comment above vec_loop_masks for more details about the mask | |
8026 arrangement. */ | |
8027 | |
8028 tree | |
8029 vect_get_loop_mask (gimple_stmt_iterator *gsi, vec_loop_masks *masks, | |
8030 unsigned int nvectors, tree vectype, unsigned int index) | |
8031 { | |
8032 rgroup_masks *rgm = &(*masks)[nvectors - 1]; | |
8033 tree mask_type = rgm->mask_type; | |
8034 | |
8035 /* Populate the rgroup's mask array, if this is the first time we've | |
8036 used it. */ | |
8037 if (rgm->masks.is_empty ()) | |
8038 { | |
8039 rgm->masks.safe_grow_cleared (nvectors); | |
8040 for (unsigned int i = 0; i < nvectors; ++i) | |
8041 { | |
8042 tree mask = make_temp_ssa_name (mask_type, NULL, "loop_mask"); | |
8043 /* Provide a dummy definition until the real one is available. */ | |
8044 SSA_NAME_DEF_STMT (mask) = gimple_build_nop (); | |
8045 rgm->masks[i] = mask; | |
8046 } | |
8047 } | |
8048 | |
8049 tree mask = rgm->masks[index]; | |
8050 if (maybe_ne (TYPE_VECTOR_SUBPARTS (mask_type), | |
8051 TYPE_VECTOR_SUBPARTS (vectype))) | |
8052 { | |
8053 /* A loop mask for data type X can be reused for data type Y | |
8054 if X has N times more elements than Y and if Y's elements | |
8055 are N times bigger than X's. In this case each sequence | |
8056 of N elements in the loop mask will be all-zero or all-one. | |
8057 We can then view-convert the mask so that each sequence of | |
8058 N elements is replaced by a single element. */ | |
8059 gcc_assert (multiple_p (TYPE_VECTOR_SUBPARTS (mask_type), | |
8060 TYPE_VECTOR_SUBPARTS (vectype))); | |
8061 gimple_seq seq = NULL; | |
8062 mask_type = build_same_sized_truth_vector_type (vectype); | |
8063 mask = gimple_build (&seq, VIEW_CONVERT_EXPR, mask_type, mask); | |
8064 if (seq) | |
8065 gsi_insert_seq_before (gsi, seq, GSI_SAME_STMT); | |
8066 } | |
8067 return mask; | |
8068 } | |
8069 | |
7221 /* Scale profiling counters by estimation for LOOP which is vectorized | 8070 /* Scale profiling counters by estimation for LOOP which is vectorized |
7222 by factor VF. */ | 8071 by factor VF. */ |
7223 | 8072 |
7224 static void | 8073 static void |
7225 scale_profile_for_vect_loop (struct loop *loop, unsigned vf) | 8074 scale_profile_for_vect_loop (struct loop *loop, unsigned vf) |
7227 edge preheader = loop_preheader_edge (loop); | 8076 edge preheader = loop_preheader_edge (loop); |
7228 /* Reduce loop iterations by the vectorization factor. */ | 8077 /* Reduce loop iterations by the vectorization factor. */ |
7229 gcov_type new_est_niter = niter_for_unrolled_loop (loop, vf); | 8078 gcov_type new_est_niter = niter_for_unrolled_loop (loop, vf); |
7230 profile_count freq_h = loop->header->count, freq_e = preheader->count (); | 8079 profile_count freq_h = loop->header->count, freq_e = preheader->count (); |
7231 | 8080 |
7232 /* Use frequency only if counts are zero. */ | 8081 if (freq_h.nonzero_p ()) |
7233 if (!(freq_h > 0) && !(freq_e > 0)) | |
7234 { | |
7235 freq_h = profile_count::from_gcov_type (loop->header->frequency); | |
7236 freq_e = profile_count::from_gcov_type (EDGE_FREQUENCY (preheader)); | |
7237 } | |
7238 if (freq_h > 0) | |
7239 { | 8082 { |
7240 profile_probability p; | 8083 profile_probability p; |
7241 | 8084 |
7242 /* Avoid dropping loop body profile counter to 0 because of zero count | 8085 /* Avoid dropping loop body profile counter to 0 because of zero count |
7243 in loop's preheader. */ | 8086 in loop's preheader. */ |
7244 if (!(freq_e > profile_count::from_gcov_type (1))) | 8087 if (!(freq_e == profile_count::zero ())) |
7245 freq_e = profile_count::from_gcov_type (1); | 8088 freq_e = freq_e.force_nonzero (); |
7246 p = freq_e.apply_scale (new_est_niter + 1, 1).probability_in (freq_h); | 8089 p = freq_e.apply_scale (new_est_niter + 1, 1).probability_in (freq_h); |
7247 scale_loop_frequencies (loop, p); | 8090 scale_loop_frequencies (loop, p); |
7248 } | 8091 } |
7249 | 8092 |
7250 edge exit_e = single_exit (loop); | 8093 edge exit_e = single_exit (loop); |
7254 edge exit_l = single_pred_edge (loop->latch); | 8097 edge exit_l = single_pred_edge (loop->latch); |
7255 profile_probability prob = exit_l->probability; | 8098 profile_probability prob = exit_l->probability; |
7256 exit_l->probability = exit_e->probability.invert (); | 8099 exit_l->probability = exit_e->probability.invert (); |
7257 if (prob.initialized_p () && exit_l->probability.initialized_p ()) | 8100 if (prob.initialized_p () && exit_l->probability.initialized_p ()) |
7258 scale_bbs_frequencies (&loop->latch, 1, exit_l->probability / prob); | 8101 scale_bbs_frequencies (&loop->latch, 1, exit_l->probability / prob); |
8102 } | |
8103 | |
8104 /* Vectorize STMT_INFO if relevant, inserting any new instructions before GSI. | |
8105 When vectorizing STMT_INFO as a store, set *SEEN_STORE to its | |
8106 stmt_vec_info. */ | |
8107 | |
8108 static void | |
8109 vect_transform_loop_stmt (loop_vec_info loop_vinfo, stmt_vec_info stmt_info, | |
8110 gimple_stmt_iterator *gsi, stmt_vec_info *seen_store) | |
8111 { | |
8112 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
8113 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); | |
8114 | |
8115 if (dump_enabled_p ()) | |
8116 dump_printf_loc (MSG_NOTE, vect_location, | |
8117 "------>vectorizing statement: %G", stmt_info->stmt); | |
8118 | |
8119 if (MAY_HAVE_DEBUG_BIND_STMTS && !STMT_VINFO_LIVE_P (stmt_info)) | |
8120 vect_loop_kill_debug_uses (loop, stmt_info); | |
8121 | |
8122 if (!STMT_VINFO_RELEVANT_P (stmt_info) | |
8123 && !STMT_VINFO_LIVE_P (stmt_info)) | |
8124 return; | |
8125 | |
8126 if (STMT_VINFO_VECTYPE (stmt_info)) | |
8127 { | |
8128 poly_uint64 nunits | |
8129 = TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)); | |
8130 if (!STMT_SLP_TYPE (stmt_info) | |
8131 && maybe_ne (nunits, vf) | |
8132 && dump_enabled_p ()) | |
8133 /* For SLP VF is set according to unrolling factor, and not | |
8134 to vector size, hence for SLP this print is not valid. */ | |
8135 dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n"); | |
8136 } | |
8137 | |
8138 /* Pure SLP statements have already been vectorized. We still need | |
8139 to apply loop vectorization to hybrid SLP statements. */ | |
8140 if (PURE_SLP_STMT (stmt_info)) | |
8141 return; | |
8142 | |
8143 if (dump_enabled_p ()) | |
8144 dump_printf_loc (MSG_NOTE, vect_location, "transform statement.\n"); | |
8145 | |
8146 if (vect_transform_stmt (stmt_info, gsi, NULL, NULL)) | |
8147 *seen_store = stmt_info; | |
7259 } | 8148 } |
7260 | 8149 |
7261 /* Function vect_transform_loop. | 8150 /* Function vect_transform_loop. |
7262 | 8151 |
7263 The analysis phase has determined that the loop is vectorizable. | 8152 The analysis phase has determined that the loop is vectorizable. |
7271 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | 8160 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); |
7272 struct loop *epilogue = NULL; | 8161 struct loop *epilogue = NULL; |
7273 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); | 8162 basic_block *bbs = LOOP_VINFO_BBS (loop_vinfo); |
7274 int nbbs = loop->num_nodes; | 8163 int nbbs = loop->num_nodes; |
7275 int i; | 8164 int i; |
7276 tree niters_vector = NULL; | 8165 tree niters_vector = NULL_TREE; |
7277 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); | 8166 tree step_vector = NULL_TREE; |
7278 bool grouped_store; | 8167 tree niters_vector_mult_vf = NULL_TREE; |
7279 bool slp_scheduled = false; | 8168 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo); |
7280 gimple *stmt, *pattern_stmt; | 8169 unsigned int lowest_vf = constant_lower_bound (vf); |
7281 gimple_seq pattern_def_seq = NULL; | 8170 gimple *stmt; |
7282 gimple_stmt_iterator pattern_def_si = gsi_none (); | |
7283 bool transform_pattern_stmt = false; | |
7284 bool check_profitability = false; | 8171 bool check_profitability = false; |
7285 int th; | 8172 unsigned int th; |
7286 | 8173 |
7287 if (dump_enabled_p ()) | 8174 DUMP_VECT_SCOPE ("vec_transform_loop"); |
7288 dump_printf_loc (MSG_NOTE, vect_location, "=== vec_transform_loop ===\n"); | 8175 |
8176 loop_vinfo->shared->check_datarefs (); | |
7289 | 8177 |
7290 /* Use the more conservative vectorization threshold. If the number | 8178 /* Use the more conservative vectorization threshold. If the number |
7291 of iterations is constant assume the cost check has been performed | 8179 of iterations is constant assume the cost check has been performed |
7292 by our caller. If the threshold makes all loops profitable that | 8180 by our caller. If the threshold makes all loops profitable that |
7293 run at least the vectorization factor number of times checking | 8181 run at least the (estimated) vectorization factor number of times |
7294 is pointless, too. */ | 8182 checking is pointless, too. */ |
7295 th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo); | 8183 th = LOOP_VINFO_COST_MODEL_THRESHOLD (loop_vinfo); |
7296 if (th >= LOOP_VINFO_VECT_FACTOR (loop_vinfo) | 8184 if (th >= vect_vf_for_cost (loop_vinfo) |
7297 && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) | 8185 && !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) |
7298 { | 8186 { |
7299 if (dump_enabled_p ()) | 8187 if (dump_enabled_p ()) |
7300 dump_printf_loc (MSG_NOTE, vect_location, | 8188 dump_printf_loc (MSG_NOTE, vect_location, |
7301 "Profitability threshold is %d loop iterations.\n", | 8189 "Profitability threshold is %d loop iterations.\n", |
7316 /* Version the loop first, if required, so the profitability check | 8204 /* Version the loop first, if required, so the profitability check |
7317 comes first. */ | 8205 comes first. */ |
7318 | 8206 |
7319 if (LOOP_REQUIRES_VERSIONING (loop_vinfo)) | 8207 if (LOOP_REQUIRES_VERSIONING (loop_vinfo)) |
7320 { | 8208 { |
7321 vect_loop_versioning (loop_vinfo, th, check_profitability); | 8209 poly_uint64 versioning_threshold |
8210 = LOOP_VINFO_VERSIONING_THRESHOLD (loop_vinfo); | |
8211 if (check_profitability | |
8212 && ordered_p (poly_uint64 (th), versioning_threshold)) | |
8213 { | |
8214 versioning_threshold = ordered_max (poly_uint64 (th), | |
8215 versioning_threshold); | |
8216 check_profitability = false; | |
8217 } | |
8218 vect_loop_versioning (loop_vinfo, th, check_profitability, | |
8219 versioning_threshold); | |
7322 check_profitability = false; | 8220 check_profitability = false; |
7323 } | 8221 } |
7324 | 8222 |
7325 /* Make sure there exists a single-predecessor exit bb also on the | 8223 /* Make sure there exists a single-predecessor exit bb also on the |
7326 scalar loop copy. Do this after versioning but before peeling | 8224 scalar loop copy. Do this after versioning but before peeling |
7340 | 8238 |
7341 tree niters = vect_build_loop_niters (loop_vinfo); | 8239 tree niters = vect_build_loop_niters (loop_vinfo); |
7342 LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = niters; | 8240 LOOP_VINFO_NITERS_UNCHANGED (loop_vinfo) = niters; |
7343 tree nitersm1 = unshare_expr (LOOP_VINFO_NITERSM1 (loop_vinfo)); | 8241 tree nitersm1 = unshare_expr (LOOP_VINFO_NITERSM1 (loop_vinfo)); |
7344 bool niters_no_overflow = loop_niters_no_overflow (loop_vinfo); | 8242 bool niters_no_overflow = loop_niters_no_overflow (loop_vinfo); |
7345 epilogue = vect_do_peeling (loop_vinfo, niters, nitersm1, &niters_vector, th, | 8243 epilogue = vect_do_peeling (loop_vinfo, niters, nitersm1, &niters_vector, |
8244 &step_vector, &niters_vector_mult_vf, th, | |
7346 check_profitability, niters_no_overflow); | 8245 check_profitability, niters_no_overflow); |
8246 | |
7347 if (niters_vector == NULL_TREE) | 8247 if (niters_vector == NULL_TREE) |
7348 { | 8248 { |
7349 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)) | 8249 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) |
7350 niters_vector | 8250 && !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) |
7351 = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)), | 8251 && known_eq (lowest_vf, vf)) |
7352 LOOP_VINFO_INT_NITERS (loop_vinfo) / vf); | 8252 { |
8253 niters_vector | |
8254 = build_int_cst (TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo)), | |
8255 LOOP_VINFO_INT_NITERS (loop_vinfo) / lowest_vf); | |
8256 step_vector = build_one_cst (TREE_TYPE (niters)); | |
8257 } | |
7353 else | 8258 else |
7354 vect_gen_vector_loop_niters (loop_vinfo, niters, &niters_vector, | 8259 vect_gen_vector_loop_niters (loop_vinfo, niters, &niters_vector, |
7355 niters_no_overflow); | 8260 &step_vector, niters_no_overflow); |
7356 } | 8261 } |
7357 | 8262 |
7358 /* 1) Make sure the loop header has exactly two entries | 8263 /* 1) Make sure the loop header has exactly two entries |
7359 2) Make sure we have a preheader basic block. */ | 8264 2) Make sure we have a preheader basic block. */ |
7360 | 8265 |
7361 gcc_assert (EDGE_COUNT (loop->header->preds) == 2); | 8266 gcc_assert (EDGE_COUNT (loop->header->preds) == 2); |
7362 | 8267 |
7363 split_edge (loop_preheader_edge (loop)); | 8268 split_edge (loop_preheader_edge (loop)); |
8269 | |
8270 if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo) | |
8271 && vect_use_loop_mask_for_alignment_p (loop_vinfo)) | |
8272 /* This will deal with any possible peeling. */ | |
8273 vect_prepare_for_masked_peels (loop_vinfo); | |
8274 | |
8275 /* Schedule the SLP instances first, then handle loop vectorization | |
8276 below. */ | |
8277 if (!loop_vinfo->slp_instances.is_empty ()) | |
8278 { | |
8279 DUMP_VECT_SCOPE ("scheduling SLP instances"); | |
8280 vect_schedule_slp (loop_vinfo); | |
8281 } | |
7364 | 8282 |
7365 /* FORNOW: the vectorizer supports only loops which body consist | 8283 /* FORNOW: the vectorizer supports only loops which body consist |
7366 of one basic block (header + empty latch). When the vectorizer will | 8284 of one basic block (header + empty latch). When the vectorizer will |
7367 support more involved loop forms, the order by which the BBs are | 8285 support more involved loop forms, the order by which the BBs are |
7368 traversed need to be reconsidered. */ | 8286 traversed need to be reconsidered. */ |
7375 for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si); | 8293 for (gphi_iterator si = gsi_start_phis (bb); !gsi_end_p (si); |
7376 gsi_next (&si)) | 8294 gsi_next (&si)) |
7377 { | 8295 { |
7378 gphi *phi = si.phi (); | 8296 gphi *phi = si.phi (); |
7379 if (dump_enabled_p ()) | 8297 if (dump_enabled_p ()) |
7380 { | 8298 dump_printf_loc (MSG_NOTE, vect_location, |
7381 dump_printf_loc (MSG_NOTE, vect_location, | 8299 "------>vectorizing phi: %G", phi); |
7382 "------>vectorizing phi: "); | 8300 stmt_info = loop_vinfo->lookup_stmt (phi); |
7383 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, phi, 0); | |
7384 } | |
7385 stmt_info = vinfo_for_stmt (phi); | |
7386 if (!stmt_info) | 8301 if (!stmt_info) |
7387 continue; | 8302 continue; |
7388 | 8303 |
7389 if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info)) | 8304 if (MAY_HAVE_DEBUG_BIND_STMTS && !STMT_VINFO_LIVE_P (stmt_info)) |
7390 vect_loop_kill_debug_uses (loop, phi); | 8305 vect_loop_kill_debug_uses (loop, stmt_info); |
7391 | 8306 |
7392 if (!STMT_VINFO_RELEVANT_P (stmt_info) | 8307 if (!STMT_VINFO_RELEVANT_P (stmt_info) |
7393 && !STMT_VINFO_LIVE_P (stmt_info)) | 8308 && !STMT_VINFO_LIVE_P (stmt_info)) |
7394 continue; | 8309 continue; |
7395 | 8310 |
7396 if (STMT_VINFO_VECTYPE (stmt_info) | 8311 if (STMT_VINFO_VECTYPE (stmt_info) |
7397 && (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)) | 8312 && (maybe_ne |
7398 != (unsigned HOST_WIDE_INT) vf) | 8313 (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)), vf)) |
7399 && dump_enabled_p ()) | 8314 && dump_enabled_p ()) |
7400 dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n"); | 8315 dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n"); |
7401 | 8316 |
7402 if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def | 8317 if ((STMT_VINFO_DEF_TYPE (stmt_info) == vect_induction_def |
7403 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def | 8318 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def |
7404 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle) | 8319 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_nested_cycle) |
7405 && ! PURE_SLP_STMT (stmt_info)) | 8320 && ! PURE_SLP_STMT (stmt_info)) |
7406 { | 8321 { |
7407 if (dump_enabled_p ()) | 8322 if (dump_enabled_p ()) |
7408 dump_printf_loc (MSG_NOTE, vect_location, "transform phi.\n"); | 8323 dump_printf_loc (MSG_NOTE, vect_location, "transform phi.\n"); |
7409 vect_transform_stmt (phi, NULL, NULL, NULL, NULL); | 8324 vect_transform_stmt (stmt_info, NULL, NULL, NULL); |
7410 } | 8325 } |
7411 } | 8326 } |
7412 | 8327 |
7413 pattern_stmt = NULL; | |
7414 for (gimple_stmt_iterator si = gsi_start_bb (bb); | 8328 for (gimple_stmt_iterator si = gsi_start_bb (bb); |
7415 !gsi_end_p (si) || transform_pattern_stmt;) | 8329 !gsi_end_p (si);) |
7416 { | 8330 { |
7417 bool is_store; | 8331 stmt = gsi_stmt (si); |
7418 | 8332 /* During vectorization remove existing clobber stmts. */ |
7419 if (transform_pattern_stmt) | 8333 if (gimple_clobber_p (stmt)) |
7420 stmt = pattern_stmt; | |
7421 else | |
7422 { | 8334 { |
7423 stmt = gsi_stmt (si); | 8335 unlink_stmt_vdef (stmt); |
7424 /* During vectorization remove existing clobber stmts. */ | 8336 gsi_remove (&si, true); |
7425 if (gimple_clobber_p (stmt)) | 8337 release_defs (stmt); |
8338 } | |
8339 else | |
8340 { | |
8341 stmt_info = loop_vinfo->lookup_stmt (stmt); | |
8342 | |
8343 /* vector stmts created in the outer-loop during vectorization of | |
8344 stmts in an inner-loop may not have a stmt_info, and do not | |
8345 need to be vectorized. */ | |
8346 stmt_vec_info seen_store = NULL; | |
8347 if (stmt_info) | |
7426 { | 8348 { |
7427 unlink_stmt_vdef (stmt); | 8349 if (STMT_VINFO_IN_PATTERN_P (stmt_info)) |
7428 gsi_remove (&si, true); | 8350 { |
7429 release_defs (stmt); | 8351 gimple *def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info); |
7430 continue; | 8352 for (gimple_stmt_iterator subsi = gsi_start (def_seq); |
8353 !gsi_end_p (subsi); gsi_next (&subsi)) | |
8354 { | |
8355 stmt_vec_info pat_stmt_info | |
8356 = loop_vinfo->lookup_stmt (gsi_stmt (subsi)); | |
8357 vect_transform_loop_stmt (loop_vinfo, pat_stmt_info, | |
8358 &si, &seen_store); | |
8359 } | |
8360 stmt_vec_info pat_stmt_info | |
8361 = STMT_VINFO_RELATED_STMT (stmt_info); | |
8362 vect_transform_loop_stmt (loop_vinfo, pat_stmt_info, &si, | |
8363 &seen_store); | |
8364 } | |
8365 vect_transform_loop_stmt (loop_vinfo, stmt_info, &si, | |
8366 &seen_store); | |
8367 } | |
8368 gsi_next (&si); | |
8369 if (seen_store) | |
8370 { | |
8371 if (STMT_VINFO_GROUPED_ACCESS (seen_store)) | |
8372 /* Interleaving. If IS_STORE is TRUE, the | |
8373 vectorization of the interleaving chain was | |
8374 completed - free all the stores in the chain. */ | |
8375 vect_remove_stores (DR_GROUP_FIRST_ELEMENT (seen_store)); | |
8376 else | |
8377 /* Free the attached stmt_vec_info and remove the stmt. */ | |
8378 loop_vinfo->remove_stmt (stmt_info); | |
7431 } | 8379 } |
7432 } | 8380 } |
7433 | 8381 } |
7434 if (dump_enabled_p ()) | 8382 |
8383 /* Stub out scalar statements that must not survive vectorization. | |
8384 Doing this here helps with grouped statements, or statements that | |
8385 are involved in patterns. */ | |
8386 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); | |
8387 !gsi_end_p (gsi); gsi_next (&gsi)) | |
8388 { | |
8389 gcall *call = dyn_cast <gcall *> (gsi_stmt (gsi)); | |
8390 if (call && gimple_call_internal_p (call, IFN_MASK_LOAD)) | |
7435 { | 8391 { |
7436 dump_printf_loc (MSG_NOTE, vect_location, | 8392 tree lhs = gimple_get_lhs (call); |
7437 "------>vectorizing statement: "); | 8393 if (!VECTOR_TYPE_P (TREE_TYPE (lhs))) |
7438 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt, 0); | |
7439 } | |
7440 | |
7441 stmt_info = vinfo_for_stmt (stmt); | |
7442 | |
7443 /* vector stmts created in the outer-loop during vectorization of | |
7444 stmts in an inner-loop may not have a stmt_info, and do not | |
7445 need to be vectorized. */ | |
7446 if (!stmt_info) | |
7447 { | |
7448 gsi_next (&si); | |
7449 continue; | |
7450 } | |
7451 | |
7452 if (MAY_HAVE_DEBUG_STMTS && !STMT_VINFO_LIVE_P (stmt_info)) | |
7453 vect_loop_kill_debug_uses (loop, stmt); | |
7454 | |
7455 if (!STMT_VINFO_RELEVANT_P (stmt_info) | |
7456 && !STMT_VINFO_LIVE_P (stmt_info)) | |
7457 { | |
7458 if (STMT_VINFO_IN_PATTERN_P (stmt_info) | |
7459 && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info)) | |
7460 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) | |
7461 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) | |
7462 { | |
7463 stmt = pattern_stmt; | |
7464 stmt_info = vinfo_for_stmt (stmt); | |
7465 } | |
7466 else | |
7467 { | |
7468 gsi_next (&si); | |
7469 continue; | |
7470 } | |
7471 } | |
7472 else if (STMT_VINFO_IN_PATTERN_P (stmt_info) | |
7473 && (pattern_stmt = STMT_VINFO_RELATED_STMT (stmt_info)) | |
7474 && (STMT_VINFO_RELEVANT_P (vinfo_for_stmt (pattern_stmt)) | |
7475 || STMT_VINFO_LIVE_P (vinfo_for_stmt (pattern_stmt)))) | |
7476 transform_pattern_stmt = true; | |
7477 | |
7478 /* If pattern statement has def stmts, vectorize them too. */ | |
7479 if (is_pattern_stmt_p (stmt_info)) | |
7480 { | |
7481 if (pattern_def_seq == NULL) | |
7482 { | 8394 { |
7483 pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info); | 8395 tree zero = build_zero_cst (TREE_TYPE (lhs)); |
7484 pattern_def_si = gsi_start (pattern_def_seq); | 8396 gimple *new_stmt = gimple_build_assign (lhs, zero); |
7485 } | 8397 gsi_replace (&gsi, new_stmt, true); |
7486 else if (!gsi_end_p (pattern_def_si)) | |
7487 gsi_next (&pattern_def_si); | |
7488 if (pattern_def_seq != NULL) | |
7489 { | |
7490 gimple *pattern_def_stmt = NULL; | |
7491 stmt_vec_info pattern_def_stmt_info = NULL; | |
7492 | |
7493 while (!gsi_end_p (pattern_def_si)) | |
7494 { | |
7495 pattern_def_stmt = gsi_stmt (pattern_def_si); | |
7496 pattern_def_stmt_info | |
7497 = vinfo_for_stmt (pattern_def_stmt); | |
7498 if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info) | |
7499 || STMT_VINFO_LIVE_P (pattern_def_stmt_info)) | |
7500 break; | |
7501 gsi_next (&pattern_def_si); | |
7502 } | |
7503 | |
7504 if (!gsi_end_p (pattern_def_si)) | |
7505 { | |
7506 if (dump_enabled_p ()) | |
7507 { | |
7508 dump_printf_loc (MSG_NOTE, vect_location, | |
7509 "==> vectorizing pattern def " | |
7510 "stmt: "); | |
7511 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, | |
7512 pattern_def_stmt, 0); | |
7513 } | |
7514 | |
7515 stmt = pattern_def_stmt; | |
7516 stmt_info = pattern_def_stmt_info; | |
7517 } | |
7518 else | |
7519 { | |
7520 pattern_def_si = gsi_none (); | |
7521 transform_pattern_stmt = false; | |
7522 } | |
7523 } | |
7524 else | |
7525 transform_pattern_stmt = false; | |
7526 } | |
7527 | |
7528 if (STMT_VINFO_VECTYPE (stmt_info)) | |
7529 { | |
7530 unsigned int nunits | |
7531 = (unsigned int) | |
7532 TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (stmt_info)); | |
7533 if (!STMT_SLP_TYPE (stmt_info) | |
7534 && nunits != (unsigned int) vf | |
7535 && dump_enabled_p ()) | |
7536 /* For SLP VF is set according to unrolling factor, and not | |
7537 to vector size, hence for SLP this print is not valid. */ | |
7538 dump_printf_loc (MSG_NOTE, vect_location, "multiple-types.\n"); | |
7539 } | |
7540 | |
7541 /* SLP. Schedule all the SLP instances when the first SLP stmt is | |
7542 reached. */ | |
7543 if (STMT_SLP_TYPE (stmt_info)) | |
7544 { | |
7545 if (!slp_scheduled) | |
7546 { | |
7547 slp_scheduled = true; | |
7548 | |
7549 if (dump_enabled_p ()) | |
7550 dump_printf_loc (MSG_NOTE, vect_location, | |
7551 "=== scheduling SLP instances ===\n"); | |
7552 | |
7553 vect_schedule_slp (loop_vinfo); | |
7554 } | |
7555 | |
7556 /* Hybrid SLP stmts must be vectorized in addition to SLP. */ | |
7557 if (!vinfo_for_stmt (stmt) || PURE_SLP_STMT (stmt_info)) | |
7558 { | |
7559 if (!transform_pattern_stmt && gsi_end_p (pattern_def_si)) | |
7560 { | |
7561 pattern_def_seq = NULL; | |
7562 gsi_next (&si); | |
7563 } | |
7564 continue; | |
7565 } | 8398 } |
7566 } | 8399 } |
7567 | 8400 } |
7568 /* -------- vectorize statement ------------ */ | |
7569 if (dump_enabled_p ()) | |
7570 dump_printf_loc (MSG_NOTE, vect_location, "transform statement.\n"); | |
7571 | |
7572 grouped_store = false; | |
7573 is_store = vect_transform_stmt (stmt, &si, &grouped_store, NULL, NULL); | |
7574 if (is_store) | |
7575 { | |
7576 if (STMT_VINFO_GROUPED_ACCESS (stmt_info)) | |
7577 { | |
7578 /* Interleaving. If IS_STORE is TRUE, the vectorization of the | |
7579 interleaving chain was completed - free all the stores in | |
7580 the chain. */ | |
7581 gsi_next (&si); | |
7582 vect_remove_stores (GROUP_FIRST_ELEMENT (stmt_info)); | |
7583 } | |
7584 else | |
7585 { | |
7586 /* Free the attached stmt_vec_info and remove the stmt. */ | |
7587 gimple *store = gsi_stmt (si); | |
7588 free_stmt_vec_info (store); | |
7589 unlink_stmt_vdef (store); | |
7590 gsi_remove (&si, true); | |
7591 release_defs (store); | |
7592 } | |
7593 | |
7594 /* Stores can only appear at the end of pattern statements. */ | |
7595 gcc_assert (!transform_pattern_stmt); | |
7596 pattern_def_seq = NULL; | |
7597 } | |
7598 else if (!transform_pattern_stmt && gsi_end_p (pattern_def_si)) | |
7599 { | |
7600 pattern_def_seq = NULL; | |
7601 gsi_next (&si); | |
7602 } | |
7603 } /* stmts in BB */ | |
7604 } /* BBs in loop */ | 8401 } /* BBs in loop */ |
7605 | 8402 |
7606 slpeel_make_loop_iterate_ntimes (loop, niters_vector); | 8403 /* The vectorization factor is always > 1, so if we use an IV increment of 1. |
7607 | 8404 a zero NITERS becomes a nonzero NITERS_VECTOR. */ |
7608 scale_profile_for_vect_loop (loop, vf); | 8405 if (integer_onep (step_vector)) |
7609 | 8406 niters_no_overflow = true; |
8407 vect_set_loop_condition (loop, loop_vinfo, niters_vector, step_vector, | |
8408 niters_vector_mult_vf, !niters_no_overflow); | |
8409 | |
8410 unsigned int assumed_vf = vect_vf_for_cost (loop_vinfo); | |
8411 scale_profile_for_vect_loop (loop, assumed_vf); | |
8412 | |
8413 /* True if the final iteration might not handle a full vector's | |
8414 worth of scalar iterations. */ | |
8415 bool final_iter_may_be_partial = LOOP_VINFO_FULLY_MASKED_P (loop_vinfo); | |
7610 /* The minimum number of iterations performed by the epilogue. This | 8416 /* The minimum number of iterations performed by the epilogue. This |
7611 is 1 when peeling for gaps because we always need a final scalar | 8417 is 1 when peeling for gaps because we always need a final scalar |
7612 iteration. */ | 8418 iteration. */ |
7613 int min_epilogue_iters = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? 1 : 0; | 8419 int min_epilogue_iters = LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) ? 1 : 0; |
7614 /* +1 to convert latch counts to loop iteration counts, | 8420 /* +1 to convert latch counts to loop iteration counts, |
7615 -min_epilogue_iters to remove iterations that cannot be performed | 8421 -min_epilogue_iters to remove iterations that cannot be performed |
7616 by the vector code. */ | 8422 by the vector code. */ |
7617 int bias = 1 - min_epilogue_iters; | 8423 int bias_for_lowest = 1 - min_epilogue_iters; |
8424 int bias_for_assumed = bias_for_lowest; | |
8425 int alignment_npeels = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo); | |
8426 if (alignment_npeels && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)) | |
8427 { | |
8428 /* When the amount of peeling is known at compile time, the first | |
8429 iteration will have exactly alignment_npeels active elements. | |
8430 In the worst case it will have at least one. */ | |
8431 int min_first_active = (alignment_npeels > 0 ? alignment_npeels : 1); | |
8432 bias_for_lowest += lowest_vf - min_first_active; | |
8433 bias_for_assumed += assumed_vf - min_first_active; | |
8434 } | |
7618 /* In these calculations the "- 1" converts loop iteration counts | 8435 /* In these calculations the "- 1" converts loop iteration counts |
7619 back to latch counts. */ | 8436 back to latch counts. */ |
7620 if (loop->any_upper_bound) | 8437 if (loop->any_upper_bound) |
7621 loop->nb_iterations_upper_bound | 8438 loop->nb_iterations_upper_bound |
7622 = wi::udiv_floor (loop->nb_iterations_upper_bound + bias, vf) - 1; | 8439 = (final_iter_may_be_partial |
8440 ? wi::udiv_ceil (loop->nb_iterations_upper_bound + bias_for_lowest, | |
8441 lowest_vf) - 1 | |
8442 : wi::udiv_floor (loop->nb_iterations_upper_bound + bias_for_lowest, | |
8443 lowest_vf) - 1); | |
7623 if (loop->any_likely_upper_bound) | 8444 if (loop->any_likely_upper_bound) |
7624 loop->nb_iterations_likely_upper_bound | 8445 loop->nb_iterations_likely_upper_bound |
7625 = wi::udiv_floor (loop->nb_iterations_likely_upper_bound + bias, vf) - 1; | 8446 = (final_iter_may_be_partial |
8447 ? wi::udiv_ceil (loop->nb_iterations_likely_upper_bound | |
8448 + bias_for_lowest, lowest_vf) - 1 | |
8449 : wi::udiv_floor (loop->nb_iterations_likely_upper_bound | |
8450 + bias_for_lowest, lowest_vf) - 1); | |
7626 if (loop->any_estimate) | 8451 if (loop->any_estimate) |
7627 loop->nb_iterations_estimate | 8452 loop->nb_iterations_estimate |
7628 = wi::udiv_floor (loop->nb_iterations_estimate + bias, vf) - 1; | 8453 = (final_iter_may_be_partial |
8454 ? wi::udiv_ceil (loop->nb_iterations_estimate + bias_for_assumed, | |
8455 assumed_vf) - 1 | |
8456 : wi::udiv_floor (loop->nb_iterations_estimate + bias_for_assumed, | |
8457 assumed_vf) - 1); | |
7629 | 8458 |
7630 if (dump_enabled_p ()) | 8459 if (dump_enabled_p ()) |
7631 { | 8460 { |
7632 if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo)) | 8461 if (!LOOP_VINFO_EPILOGUE_P (loop_vinfo)) |
7633 { | 8462 { |
7637 dump_printf_loc (MSG_NOTE, vect_location, | 8466 dump_printf_loc (MSG_NOTE, vect_location, |
7638 "OUTER LOOP VECTORIZED\n"); | 8467 "OUTER LOOP VECTORIZED\n"); |
7639 dump_printf (MSG_NOTE, "\n"); | 8468 dump_printf (MSG_NOTE, "\n"); |
7640 } | 8469 } |
7641 else | 8470 else |
7642 dump_printf_loc (MSG_NOTE, vect_location, | 8471 { |
7643 "LOOP EPILOGUE VECTORIZED (VS=%d)\n", | 8472 dump_printf_loc (MSG_NOTE, vect_location, |
7644 current_vector_size); | 8473 "LOOP EPILOGUE VECTORIZED (VS="); |
8474 dump_dec (MSG_NOTE, current_vector_size); | |
8475 dump_printf (MSG_NOTE, ")\n"); | |
8476 } | |
7645 } | 8477 } |
7646 | 8478 |
7647 /* Free SLP instances here because otherwise stmt reference counting | 8479 /* Free SLP instances here because otherwise stmt reference counting |
7648 won't work. */ | 8480 won't work. */ |
7649 slp_instance instance; | 8481 slp_instance instance; |
7650 FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance) | 8482 FOR_EACH_VEC_ELT (LOOP_VINFO_SLP_INSTANCES (loop_vinfo), i, instance) |
7651 vect_free_slp_instance (instance); | 8483 vect_free_slp_instance (instance, true); |
7652 LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release (); | 8484 LOOP_VINFO_SLP_INSTANCES (loop_vinfo).release (); |
7653 /* Clear-up safelen field since its value is invalid after vectorization | 8485 /* Clear-up safelen field since its value is invalid after vectorization |
7654 since vectorized loop can have loop-carried dependencies. */ | 8486 since vectorized loop can have loop-carried dependencies. */ |
7655 loop->safelen = 0; | 8487 loop->safelen = 0; |
7656 | 8488 |
7657 /* Don't vectorize epilogue for epilogue. */ | 8489 /* Don't vectorize epilogue for epilogue. */ |
7658 if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)) | 8490 if (LOOP_VINFO_EPILOGUE_P (loop_vinfo)) |
7659 epilogue = NULL; | 8491 epilogue = NULL; |
7660 | 8492 |
8493 if (!PARAM_VALUE (PARAM_VECT_EPILOGUES_NOMASK)) | |
8494 epilogue = NULL; | |
8495 | |
7661 if (epilogue) | 8496 if (epilogue) |
7662 { | 8497 { |
7663 unsigned int vector_sizes | 8498 auto_vector_sizes vector_sizes; |
7664 = targetm.vectorize.autovectorize_vector_sizes (); | 8499 targetm.vectorize.autovectorize_vector_sizes (&vector_sizes); |
7665 vector_sizes &= current_vector_size - 1; | 8500 unsigned int next_size = 0; |
7666 | 8501 |
7667 if (!PARAM_VALUE (PARAM_VECT_EPILOGUES_NOMASK)) | 8502 if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) |
7668 epilogue = NULL; | 8503 && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0 |
7669 else if (!vector_sizes) | 8504 && known_eq (vf, lowest_vf)) |
7670 epilogue = NULL; | 8505 { |
7671 else if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo) | 8506 unsigned int eiters |
7672 && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0) | 8507 = (LOOP_VINFO_INT_NITERS (loop_vinfo) |
7673 { | 8508 - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo)); |
7674 int smallest_vec_size = 1 << ctz_hwi (vector_sizes); | 8509 eiters = eiters % lowest_vf; |
7675 int ratio = current_vector_size / smallest_vec_size; | 8510 epilogue->nb_iterations_upper_bound = eiters - 1; |
7676 int eiters = LOOP_VINFO_INT_NITERS (loop_vinfo) | 8511 |
7677 - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo); | 8512 unsigned int ratio; |
7678 eiters = eiters % vf; | 8513 while (next_size < vector_sizes.length () |
7679 | 8514 && !(constant_multiple_p (current_vector_size, |
7680 epilogue->nb_iterations_upper_bound = eiters - 1; | 8515 vector_sizes[next_size], &ratio) |
7681 | 8516 && eiters >= lowest_vf / ratio)) |
7682 if (eiters < vf / ratio) | 8517 next_size += 1; |
7683 epilogue = NULL; | 8518 } |
7684 } | 8519 else |
8520 while (next_size < vector_sizes.length () | |
8521 && maybe_lt (current_vector_size, vector_sizes[next_size])) | |
8522 next_size += 1; | |
8523 | |
8524 if (next_size == vector_sizes.length ()) | |
8525 epilogue = NULL; | |
7685 } | 8526 } |
7686 | 8527 |
7687 if (epilogue) | 8528 if (epilogue) |
7688 { | 8529 { |
7689 epilogue->force_vectorize = loop->force_vectorize; | 8530 epilogue->force_vectorize = loop->force_vectorize; |
7779 add_bb_to_loop (store_bb, bb_loop); | 8620 add_bb_to_loop (store_bb, bb_loop); |
7780 e->flags = EDGE_TRUE_VALUE; | 8621 e->flags = EDGE_TRUE_VALUE; |
7781 efalse = make_edge (bb, store_bb, EDGE_FALSE_VALUE); | 8622 efalse = make_edge (bb, store_bb, EDGE_FALSE_VALUE); |
7782 /* Put STORE_BB to likely part. */ | 8623 /* Put STORE_BB to likely part. */ |
7783 efalse->probability = profile_probability::unlikely (); | 8624 efalse->probability = profile_probability::unlikely (); |
7784 store_bb->frequency = PROB_ALWAYS - EDGE_FREQUENCY (efalse); | 8625 store_bb->count = efalse->count (); |
7785 make_single_succ_edge (store_bb, join_bb, EDGE_FALLTHRU); | 8626 make_single_succ_edge (store_bb, join_bb, EDGE_FALLTHRU); |
7786 if (dom_info_available_p (CDI_DOMINATORS)) | 8627 if (dom_info_available_p (CDI_DOMINATORS)) |
7787 set_immediate_dominator (CDI_DOMINATORS, store_bb, bb); | 8628 set_immediate_dominator (CDI_DOMINATORS, store_bb, bb); |
7788 if (dump_enabled_p ()) | 8629 if (dump_enabled_p ()) |
7789 dump_printf_loc (MSG_NOTE, vect_location, | 8630 dump_printf_loc (MSG_NOTE, vect_location, |
7823 gsi_to = gsi_start_bb (store_bb); | 8664 gsi_to = gsi_start_bb (store_bb); |
7824 gsi_move_before (&gsi_from, &gsi_to); | 8665 gsi_move_before (&gsi_from, &gsi_to); |
7825 /* Setup GSI_TO to the non-empty block start. */ | 8666 /* Setup GSI_TO to the non-empty block start. */ |
7826 gsi_to = gsi_start_bb (store_bb); | 8667 gsi_to = gsi_start_bb (store_bb); |
7827 if (dump_enabled_p ()) | 8668 if (dump_enabled_p ()) |
7828 { | 8669 dump_printf_loc (MSG_NOTE, vect_location, |
7829 dump_printf_loc (MSG_NOTE, vect_location, | 8670 "Move stmt to created bb\n%G", last); |
7830 "Move stmt to created bb\n"); | |
7831 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, last, 0); | |
7832 } | |
7833 /* Move all stored value producers if possible. */ | 8671 /* Move all stored value producers if possible. */ |
7834 while (!gsi_end_p (gsi)) | 8672 while (!gsi_end_p (gsi)) |
7835 { | 8673 { |
7836 tree lhs; | 8674 tree lhs; |
7837 imm_use_iterator imm_iter; | 8675 imm_use_iterator imm_iter; |
7891 && gimple_vuse (stmt1) != gimple_vuse (last_store)) | 8729 && gimple_vuse (stmt1) != gimple_vuse (last_store)) |
7892 break; | 8730 break; |
7893 | 8731 |
7894 /* Can move STMT1 to STORE_BB. */ | 8732 /* Can move STMT1 to STORE_BB. */ |
7895 if (dump_enabled_p ()) | 8733 if (dump_enabled_p ()) |
7896 { | 8734 dump_printf_loc (MSG_NOTE, vect_location, |
7897 dump_printf_loc (MSG_NOTE, vect_location, | 8735 "Move stmt to created bb\n%G", stmt1); |
7898 "Move stmt to created bb\n"); | |
7899 dump_gimple_stmt (MSG_NOTE, TDF_SLIM, stmt1, 0); | |
7900 } | |
7901 gsi_move_before (&gsi_from, &gsi_to); | 8736 gsi_move_before (&gsi_from, &gsi_to); |
7902 /* Shift GSI_TO for further insertion. */ | 8737 /* Shift GSI_TO for further insertion. */ |
7903 gsi_prev (&gsi_to); | 8738 gsi_prev (&gsi_to); |
7904 } | 8739 } |
7905 /* Put other masked stores with the same mask to STORE_BB. */ | 8740 /* Put other masked stores with the same mask to STORE_BB. */ |