Mercurial > hg > CbC > CbC_gcc
comparison gcc/tree-vectorizer.c @ 131:84e7813d76e9
gcc-8.2
author | mir3636 |
---|---|
date | Thu, 25 Oct 2018 07:37:49 +0900 |
parents | 04ced10e8804 |
children | 1830386684a0 |
comparison
equal
deleted
inserted
replaced
111:04ced10e8804 | 131:84e7813d76e9 |
---|---|
1 /* Vectorizer | 1 /* Vectorizer |
2 Copyright (C) 2003-2017 Free Software Foundation, Inc. | 2 Copyright (C) 2003-2018 Free Software Foundation, Inc. |
3 Contributed by Dorit Naishlos <dorit@il.ibm.com> | 3 Contributed by Dorit Naishlos <dorit@il.ibm.com> |
4 | 4 |
5 This file is part of GCC. | 5 This file is part of GCC. |
6 | 6 |
7 GCC is free software; you can redistribute it and/or modify it under | 7 GCC is free software; you can redistribute it and/or modify it under |
76 #include "tree-ssa-propagate.h" | 76 #include "tree-ssa-propagate.h" |
77 #include "dbgcnt.h" | 77 #include "dbgcnt.h" |
78 #include "tree-scalar-evolution.h" | 78 #include "tree-scalar-evolution.h" |
79 #include "stringpool.h" | 79 #include "stringpool.h" |
80 #include "attribs.h" | 80 #include "attribs.h" |
81 | 81 #include "gimple-pretty-print.h" |
82 | 82 #include "opt-problem.h" |
83 /* Loop or bb location. */ | 83 #include "internal-fn.h" |
84 source_location vect_location; | 84 |
85 | 85 |
86 /* Vector mapping GIMPLE stmt to stmt_vec_info. */ | 86 /* Loop or bb location, with hotness information. */ |
87 vec<stmt_vec_info> stmt_vec_info_vec; | 87 dump_user_location_t vect_location; |
88 | |
89 /* Dump a cost entry according to args to F. */ | |
90 | |
91 void | |
92 dump_stmt_cost (FILE *f, void *data, int count, enum vect_cost_for_stmt kind, | |
93 stmt_vec_info stmt_info, int misalign, unsigned cost, | |
94 enum vect_cost_model_location where) | |
95 { | |
96 fprintf (f, "%p ", data); | |
97 if (stmt_info) | |
98 { | |
99 print_gimple_expr (f, STMT_VINFO_STMT (stmt_info), 0, TDF_SLIM); | |
100 fprintf (f, " "); | |
101 } | |
102 else | |
103 fprintf (f, "<unknown> "); | |
104 fprintf (f, "%d times ", count); | |
105 const char *ks = "unknown"; | |
106 switch (kind) | |
107 { | |
108 case scalar_stmt: | |
109 ks = "scalar_stmt"; | |
110 break; | |
111 case scalar_load: | |
112 ks = "scalar_load"; | |
113 break; | |
114 case scalar_store: | |
115 ks = "scalar_store"; | |
116 break; | |
117 case vector_stmt: | |
118 ks = "vector_stmt"; | |
119 break; | |
120 case vector_load: | |
121 ks = "vector_load"; | |
122 break; | |
123 case vector_gather_load: | |
124 ks = "vector_gather_load"; | |
125 break; | |
126 case unaligned_load: | |
127 ks = "unaligned_load"; | |
128 break; | |
129 case unaligned_store: | |
130 ks = "unaligned_store"; | |
131 break; | |
132 case vector_store: | |
133 ks = "vector_store"; | |
134 break; | |
135 case vector_scatter_store: | |
136 ks = "vector_scatter_store"; | |
137 break; | |
138 case vec_to_scalar: | |
139 ks = "vec_to_scalar"; | |
140 break; | |
141 case scalar_to_vec: | |
142 ks = "scalar_to_vec"; | |
143 break; | |
144 case cond_branch_not_taken: | |
145 ks = "cond_branch_not_taken"; | |
146 break; | |
147 case cond_branch_taken: | |
148 ks = "cond_branch_taken"; | |
149 break; | |
150 case vec_perm: | |
151 ks = "vec_perm"; | |
152 break; | |
153 case vec_promote_demote: | |
154 ks = "vec_promote_demote"; | |
155 break; | |
156 case vec_construct: | |
157 ks = "vec_construct"; | |
158 break; | |
159 } | |
160 fprintf (f, "%s ", ks); | |
161 if (kind == unaligned_load || kind == unaligned_store) | |
162 fprintf (f, "(misalign %d) ", misalign); | |
163 fprintf (f, "costs %u ", cost); | |
164 const char *ws = "unknown"; | |
165 switch (where) | |
166 { | |
167 case vect_prologue: | |
168 ws = "prologue"; | |
169 break; | |
170 case vect_body: | |
171 ws = "body"; | |
172 break; | |
173 case vect_epilogue: | |
174 ws = "epilogue"; | |
175 break; | |
176 } | |
177 fprintf (f, "in %s\n", ws); | |
178 } | |
88 | 179 |
89 /* For mapping simduid to vectorization factor. */ | 180 /* For mapping simduid to vectorization factor. */ |
90 | 181 |
91 struct simduid_to_vf : free_ptr_hash<simduid_to_vf> | 182 struct simduid_to_vf : free_ptr_hash<simduid_to_vf> |
92 { | 183 { |
93 unsigned int simduid; | 184 unsigned int simduid; |
94 int vf; | 185 poly_uint64 vf; |
95 | 186 |
96 /* hash_table support. */ | 187 /* hash_table support. */ |
97 static inline hashval_t hash (const simduid_to_vf *); | 188 static inline hashval_t hash (const simduid_to_vf *); |
98 static inline int equal (const simduid_to_vf *, const simduid_to_vf *); | 189 static inline int equal (const simduid_to_vf *, const simduid_to_vf *); |
99 }; | 190 }; |
159 { | 250 { |
160 gimple_stmt_iterator i; | 251 gimple_stmt_iterator i; |
161 | 252 |
162 for (i = gsi_start_bb (bb); !gsi_end_p (i); ) | 253 for (i = gsi_start_bb (bb); !gsi_end_p (i); ) |
163 { | 254 { |
164 unsigned int vf = 1; | 255 poly_uint64 vf = 1; |
165 enum internal_fn ifn; | 256 enum internal_fn ifn; |
166 gimple *stmt = gsi_stmt (i); | 257 gimple *stmt = gsi_stmt (i); |
167 tree t; | 258 tree t; |
168 if (!is_gimple_call (stmt) | 259 if (!is_gimple_call (stmt) |
169 || !gimple_call_internal_p (stmt)) | 260 || !gimple_call_internal_p (stmt)) |
336 = simd_array_to_simduid_htab->begin (); | 427 = simd_array_to_simduid_htab->begin (); |
337 iter != simd_array_to_simduid_htab->end (); ++iter) | 428 iter != simd_array_to_simduid_htab->end (); ++iter) |
338 if ((*iter)->simduid != -1U) | 429 if ((*iter)->simduid != -1U) |
339 { | 430 { |
340 tree decl = (*iter)->decl; | 431 tree decl = (*iter)->decl; |
341 int vf = 1; | 432 poly_uint64 vf = 1; |
342 if (simduid_to_vf_htab) | 433 if (simduid_to_vf_htab) |
343 { | 434 { |
344 simduid_to_vf *p = NULL, data; | 435 simduid_to_vf *p = NULL, data; |
345 data.simduid = (*iter)->simduid; | 436 data.simduid = (*iter)->simduid; |
346 p = simduid_to_vf_htab->find (&data); | 437 p = simduid_to_vf_htab->find (&data); |
357 } | 448 } |
358 | 449 |
359 /* Initialize the vec_info with kind KIND_IN and target cost data | 450 /* Initialize the vec_info with kind KIND_IN and target cost data |
360 TARGET_COST_DATA_IN. */ | 451 TARGET_COST_DATA_IN. */ |
361 | 452 |
362 vec_info::vec_info (vec_info::vec_kind kind_in, void *target_cost_data_in) | 453 vec_info::vec_info (vec_info::vec_kind kind_in, void *target_cost_data_in, |
454 vec_info_shared *shared_) | |
363 : kind (kind_in), | 455 : kind (kind_in), |
364 datarefs (vNULL), | 456 shared (shared_), |
365 ddrs (vNULL), | |
366 target_cost_data (target_cost_data_in) | 457 target_cost_data (target_cost_data_in) |
367 { | 458 { |
459 stmt_vec_infos.create (50); | |
368 } | 460 } |
369 | 461 |
370 vec_info::~vec_info () | 462 vec_info::~vec_info () |
371 { | 463 { |
372 slp_instance instance; | 464 slp_instance instance; |
373 struct data_reference *dr; | |
374 unsigned int i; | 465 unsigned int i; |
375 | 466 |
376 FOR_EACH_VEC_ELT (datarefs, i, dr) | |
377 if (dr->aux) | |
378 { | |
379 free (dr->aux); | |
380 dr->aux = NULL; | |
381 } | |
382 | |
383 FOR_EACH_VEC_ELT (slp_instances, i, instance) | 467 FOR_EACH_VEC_ELT (slp_instances, i, instance) |
384 vect_free_slp_instance (instance); | 468 vect_free_slp_instance (instance, true); |
385 | 469 |
470 destroy_cost_data (target_cost_data); | |
471 free_stmt_vec_infos (); | |
472 } | |
473 | |
474 vec_info_shared::vec_info_shared () | |
475 : datarefs (vNULL), | |
476 datarefs_copy (vNULL), | |
477 ddrs (vNULL) | |
478 { | |
479 } | |
480 | |
481 vec_info_shared::~vec_info_shared () | |
482 { | |
386 free_data_refs (datarefs); | 483 free_data_refs (datarefs); |
387 free_dependence_relations (ddrs); | 484 free_dependence_relations (ddrs); |
388 destroy_cost_data (target_cost_data); | 485 datarefs_copy.release (); |
486 } | |
487 | |
488 void | |
489 vec_info_shared::save_datarefs () | |
490 { | |
491 if (!flag_checking) | |
492 return; | |
493 datarefs_copy.reserve_exact (datarefs.length ()); | |
494 for (unsigned i = 0; i < datarefs.length (); ++i) | |
495 datarefs_copy.quick_push (*datarefs[i]); | |
496 } | |
497 | |
498 void | |
499 vec_info_shared::check_datarefs () | |
500 { | |
501 if (!flag_checking) | |
502 return; | |
503 gcc_assert (datarefs.length () == datarefs_copy.length ()); | |
504 for (unsigned i = 0; i < datarefs.length (); ++i) | |
505 if (memcmp (&datarefs_copy[i], datarefs[i], sizeof (data_reference)) != 0) | |
506 gcc_unreachable (); | |
507 } | |
508 | |
509 /* Record that STMT belongs to the vectorizable region. Create and return | |
510 an associated stmt_vec_info. */ | |
511 | |
512 stmt_vec_info | |
513 vec_info::add_stmt (gimple *stmt) | |
514 { | |
515 stmt_vec_info res = new_stmt_vec_info (stmt); | |
516 set_vinfo_for_stmt (stmt, res); | |
517 return res; | |
518 } | |
519 | |
520 /* If STMT has an associated stmt_vec_info, return that vec_info, otherwise | |
521 return null. It is safe to call this function on any statement, even if | |
522 it might not be part of the vectorizable region. */ | |
523 | |
524 stmt_vec_info | |
525 vec_info::lookup_stmt (gimple *stmt) | |
526 { | |
527 unsigned int uid = gimple_uid (stmt); | |
528 if (uid > 0 && uid - 1 < stmt_vec_infos.length ()) | |
529 { | |
530 stmt_vec_info res = stmt_vec_infos[uid - 1]; | |
531 if (res && res->stmt == stmt) | |
532 return res; | |
533 } | |
534 return NULL; | |
535 } | |
536 | |
537 /* If NAME is an SSA_NAME and its definition has an associated stmt_vec_info, | |
538 return that stmt_vec_info, otherwise return null. It is safe to call | |
539 this on arbitrary operands. */ | |
540 | |
541 stmt_vec_info | |
542 vec_info::lookup_def (tree name) | |
543 { | |
544 if (TREE_CODE (name) == SSA_NAME | |
545 && !SSA_NAME_IS_DEFAULT_DEF (name)) | |
546 return lookup_stmt (SSA_NAME_DEF_STMT (name)); | |
547 return NULL; | |
548 } | |
549 | |
550 /* See whether there is a single non-debug statement that uses LHS and | |
551 whether that statement has an associated stmt_vec_info. Return the | |
552 stmt_vec_info if so, otherwise return null. */ | |
553 | |
554 stmt_vec_info | |
555 vec_info::lookup_single_use (tree lhs) | |
556 { | |
557 use_operand_p dummy; | |
558 gimple *use_stmt; | |
559 if (single_imm_use (lhs, &dummy, &use_stmt)) | |
560 return lookup_stmt (use_stmt); | |
561 return NULL; | |
562 } | |
563 | |
564 /* Return vectorization information about DR. */ | |
565 | |
566 dr_vec_info * | |
567 vec_info::lookup_dr (data_reference *dr) | |
568 { | |
569 stmt_vec_info stmt_info = lookup_stmt (DR_STMT (dr)); | |
570 /* DR_STMT should never refer to a stmt in a pattern replacement. */ | |
571 gcc_checking_assert (!is_pattern_stmt_p (stmt_info)); | |
572 return STMT_VINFO_DR_INFO (stmt_info->dr_aux.stmt); | |
573 } | |
574 | |
575 /* Record that NEW_STMT_INFO now implements the same data reference | |
576 as OLD_STMT_INFO. */ | |
577 | |
578 void | |
579 vec_info::move_dr (stmt_vec_info new_stmt_info, stmt_vec_info old_stmt_info) | |
580 { | |
581 gcc_assert (!is_pattern_stmt_p (old_stmt_info)); | |
582 STMT_VINFO_DR_INFO (old_stmt_info)->stmt = new_stmt_info; | |
583 new_stmt_info->dr_aux = old_stmt_info->dr_aux; | |
584 STMT_VINFO_DR_WRT_VEC_LOOP (new_stmt_info) | |
585 = STMT_VINFO_DR_WRT_VEC_LOOP (old_stmt_info); | |
586 STMT_VINFO_GATHER_SCATTER_P (new_stmt_info) | |
587 = STMT_VINFO_GATHER_SCATTER_P (old_stmt_info); | |
588 } | |
589 | |
590 /* Permanently remove the statement described by STMT_INFO from the | |
591 function. */ | |
592 | |
593 void | |
594 vec_info::remove_stmt (stmt_vec_info stmt_info) | |
595 { | |
596 gcc_assert (!stmt_info->pattern_stmt_p); | |
597 set_vinfo_for_stmt (stmt_info->stmt, NULL); | |
598 gimple_stmt_iterator si = gsi_for_stmt (stmt_info->stmt); | |
599 unlink_stmt_vdef (stmt_info->stmt); | |
600 gsi_remove (&si, true); | |
601 release_defs (stmt_info->stmt); | |
602 free_stmt_vec_info (stmt_info); | |
603 } | |
604 | |
605 /* Replace the statement at GSI by NEW_STMT, both the vectorization | |
606 information and the function itself. STMT_INFO describes the statement | |
607 at GSI. */ | |
608 | |
609 void | |
610 vec_info::replace_stmt (gimple_stmt_iterator *gsi, stmt_vec_info stmt_info, | |
611 gimple *new_stmt) | |
612 { | |
613 gimple *old_stmt = stmt_info->stmt; | |
614 gcc_assert (!stmt_info->pattern_stmt_p && old_stmt == gsi_stmt (*gsi)); | |
615 set_vinfo_for_stmt (old_stmt, NULL); | |
616 set_vinfo_for_stmt (new_stmt, stmt_info); | |
617 stmt_info->stmt = new_stmt; | |
618 gsi_replace (gsi, new_stmt, true); | |
619 } | |
620 | |
621 /* Create and initialize a new stmt_vec_info struct for STMT. */ | |
622 | |
623 stmt_vec_info | |
624 vec_info::new_stmt_vec_info (gimple *stmt) | |
625 { | |
626 stmt_vec_info res = XCNEW (struct _stmt_vec_info); | |
627 res->vinfo = this; | |
628 res->stmt = stmt; | |
629 | |
630 STMT_VINFO_TYPE (res) = undef_vec_info_type; | |
631 STMT_VINFO_RELEVANT (res) = vect_unused_in_scope; | |
632 STMT_VINFO_VECTORIZABLE (res) = true; | |
633 STMT_VINFO_VEC_REDUCTION_TYPE (res) = TREE_CODE_REDUCTION; | |
634 STMT_VINFO_VEC_CONST_COND_REDUC_CODE (res) = ERROR_MARK; | |
635 | |
636 if (gimple_code (stmt) == GIMPLE_PHI | |
637 && is_loop_header_bb_p (gimple_bb (stmt))) | |
638 STMT_VINFO_DEF_TYPE (res) = vect_unknown_def_type; | |
639 else | |
640 STMT_VINFO_DEF_TYPE (res) = vect_internal_def; | |
641 | |
642 STMT_VINFO_SAME_ALIGN_REFS (res).create (0); | |
643 STMT_SLP_TYPE (res) = loop_vect; | |
644 | |
645 /* This is really "uninitialized" until vect_compute_data_ref_alignment. */ | |
646 res->dr_aux.misalignment = DR_MISALIGNMENT_UNINITIALIZED; | |
647 | |
648 return res; | |
649 } | |
650 | |
651 /* Associate STMT with INFO. */ | |
652 | |
653 void | |
654 vec_info::set_vinfo_for_stmt (gimple *stmt, stmt_vec_info info) | |
655 { | |
656 unsigned int uid = gimple_uid (stmt); | |
657 if (uid == 0) | |
658 { | |
659 gcc_checking_assert (info); | |
660 uid = stmt_vec_infos.length () + 1; | |
661 gimple_set_uid (stmt, uid); | |
662 stmt_vec_infos.safe_push (info); | |
663 } | |
664 else | |
665 { | |
666 gcc_checking_assert (info == NULL); | |
667 stmt_vec_infos[uid - 1] = info; | |
668 } | |
669 } | |
670 | |
671 /* Free the contents of stmt_vec_infos. */ | |
672 | |
673 void | |
674 vec_info::free_stmt_vec_infos (void) | |
675 { | |
676 unsigned int i; | |
677 stmt_vec_info info; | |
678 FOR_EACH_VEC_ELT (stmt_vec_infos, i, info) | |
679 if (info != NULL) | |
680 free_stmt_vec_info (info); | |
681 stmt_vec_infos.release (); | |
682 } | |
683 | |
684 /* Free STMT_INFO. */ | |
685 | |
686 void | |
687 vec_info::free_stmt_vec_info (stmt_vec_info stmt_info) | |
688 { | |
689 if (stmt_info->pattern_stmt_p) | |
690 { | |
691 gimple_set_bb (stmt_info->stmt, NULL); | |
692 tree lhs = gimple_get_lhs (stmt_info->stmt); | |
693 if (lhs && TREE_CODE (lhs) == SSA_NAME) | |
694 release_ssa_name (lhs); | |
695 } | |
696 | |
697 STMT_VINFO_SAME_ALIGN_REFS (stmt_info).release (); | |
698 STMT_VINFO_SIMD_CLONE_INFO (stmt_info).release (); | |
699 free (stmt_info); | |
389 } | 700 } |
390 | 701 |
391 /* A helper function to free scev and LOOP niter information, as well as | 702 /* A helper function to free scev and LOOP niter information, as well as |
392 clear loop constraint LOOP_C_FINITE. */ | 703 clear loop constraint LOOP_C_FINITE. */ |
393 | 704 |
400 loop->any_upper_bound = false; | 711 loop->any_upper_bound = false; |
401 loop->any_likely_upper_bound = false; | 712 loop->any_likely_upper_bound = false; |
402 free_numbers_of_iterations_estimates (loop); | 713 free_numbers_of_iterations_estimates (loop); |
403 loop_constraint_clear (loop, LOOP_C_FINITE); | 714 loop_constraint_clear (loop, LOOP_C_FINITE); |
404 } | 715 } |
405 | |
406 /* Return whether STMT is inside the region we try to vectorize. */ | |
407 | |
408 bool | |
409 vect_stmt_in_region_p (vec_info *vinfo, gimple *stmt) | |
410 { | |
411 if (!gimple_bb (stmt)) | |
412 return false; | |
413 | |
414 if (loop_vec_info loop_vinfo = dyn_cast <loop_vec_info> (vinfo)) | |
415 { | |
416 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo); | |
417 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt))) | |
418 return false; | |
419 } | |
420 else | |
421 { | |
422 bb_vec_info bb_vinfo = as_a <bb_vec_info> (vinfo); | |
423 if (gimple_bb (stmt) != BB_VINFO_BB (bb_vinfo) | |
424 || gimple_uid (stmt) == -1U | |
425 || gimple_code (stmt) == GIMPLE_PHI) | |
426 return false; | |
427 } | |
428 | |
429 return true; | |
430 } | |
431 | |
432 | 716 |
433 /* If LOOP has been versioned during ifcvt, return the internal call | 717 /* If LOOP has been versioned during ifcvt, return the internal call |
434 guarding it. */ | 718 guarding it. */ |
435 | 719 |
436 static gimple * | 720 static gimple * |
460 || tree_to_shwi (gimple_call_arg (g, 1)) == loop->num)) | 744 || tree_to_shwi (gimple_call_arg (g, 1)) == loop->num)) |
461 return g; | 745 return g; |
462 } | 746 } |
463 } | 747 } |
464 return NULL; | 748 return NULL; |
465 } | |
466 | |
467 /* Fold loop internal call G like IFN_LOOP_VECTORIZED/IFN_LOOP_DIST_ALIAS | |
468 to VALUE and update any immediate uses of it's LHS. */ | |
469 | |
470 static void | |
471 fold_loop_internal_call (gimple *g, tree value) | |
472 { | |
473 tree lhs = gimple_call_lhs (g); | |
474 use_operand_p use_p; | |
475 imm_use_iterator iter; | |
476 gimple *use_stmt; | |
477 gimple_stmt_iterator gsi = gsi_for_stmt (g); | |
478 | |
479 update_call_from_tree (&gsi, value); | |
480 FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) | |
481 { | |
482 FOR_EACH_IMM_USE_ON_STMT (use_p, iter) | |
483 SET_USE (use_p, value); | |
484 update_stmt (use_stmt); | |
485 } | |
486 } | 749 } |
487 | 750 |
488 /* If LOOP has been versioned during loop distribution, return the gurading | 751 /* If LOOP has been versioned during loop distribution, return the gurading |
489 internal call. */ | 752 internal call. */ |
490 | 753 |
584 } | 847 } |
585 } | 848 } |
586 free (bbs); | 849 free (bbs); |
587 } | 850 } |
588 | 851 |
852 /* Try to vectorize LOOP. */ | |
853 | |
854 static unsigned | |
855 try_vectorize_loop_1 (hash_table<simduid_to_vf> *&simduid_to_vf_htab, | |
856 unsigned *num_vectorized_loops, | |
857 loop_p loop, loop_vec_info orig_loop_vinfo, | |
858 gimple *loop_vectorized_call, | |
859 gimple *loop_dist_alias_call) | |
860 { | |
861 unsigned ret = 0; | |
862 vec_info_shared shared; | |
863 vect_location = find_loop_location (loop); | |
864 if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION | |
865 && dump_enabled_p ()) | |
866 dump_printf (MSG_NOTE | MSG_PRIORITY_INTERNALS, | |
867 "\nAnalyzing loop at %s:%d\n", | |
868 LOCATION_FILE (vect_location.get_location_t ()), | |
869 LOCATION_LINE (vect_location.get_location_t ())); | |
870 | |
871 /* Try to analyze the loop, retaining an opt_problem if dump_enabled_p. */ | |
872 opt_loop_vec_info loop_vinfo | |
873 = vect_analyze_loop (loop, orig_loop_vinfo, &shared); | |
874 loop->aux = loop_vinfo; | |
875 | |
876 if (!loop_vinfo) | |
877 if (dump_enabled_p ()) | |
878 if (opt_problem *problem = loop_vinfo.get_problem ()) | |
879 { | |
880 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location, | |
881 "couldn't vectorize loop\n"); | |
882 problem->emit_and_clear (); | |
883 } | |
884 | |
885 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo)) | |
886 { | |
887 /* Free existing information if loop is analyzed with some | |
888 assumptions. */ | |
889 if (loop_constraint_set_p (loop, LOOP_C_FINITE)) | |
890 vect_free_loop_info_assumptions (loop); | |
891 | |
892 /* If we applied if-conversion then try to vectorize the | |
893 BB of innermost loops. | |
894 ??? Ideally BB vectorization would learn to vectorize | |
895 control flow by applying if-conversion on-the-fly, the | |
896 following retains the if-converted loop body even when | |
897 only non-if-converted parts took part in BB vectorization. */ | |
898 if (flag_tree_slp_vectorize != 0 | |
899 && loop_vectorized_call | |
900 && ! loop->inner) | |
901 { | |
902 basic_block bb = loop->header; | |
903 bool require_loop_vectorize = false; | |
904 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); | |
905 !gsi_end_p (gsi); gsi_next (&gsi)) | |
906 { | |
907 gimple *stmt = gsi_stmt (gsi); | |
908 gcall *call = dyn_cast <gcall *> (stmt); | |
909 if (call && gimple_call_internal_p (call)) | |
910 { | |
911 internal_fn ifn = gimple_call_internal_fn (call); | |
912 if (ifn == IFN_MASK_LOAD || ifn == IFN_MASK_STORE | |
913 /* Don't keep the if-converted parts when the ifn with | |
914 specifc type is not supported by the backend. */ | |
915 || (direct_internal_fn_p (ifn) | |
916 && !direct_internal_fn_supported_p | |
917 (call, OPTIMIZE_FOR_SPEED))) | |
918 { | |
919 require_loop_vectorize = true; | |
920 break; | |
921 } | |
922 } | |
923 gimple_set_uid (stmt, -1); | |
924 gimple_set_visited (stmt, false); | |
925 } | |
926 if (!require_loop_vectorize && vect_slp_bb (bb)) | |
927 { | |
928 dump_printf_loc (MSG_NOTE, vect_location, | |
929 "basic block vectorized\n"); | |
930 fold_loop_internal_call (loop_vectorized_call, | |
931 boolean_true_node); | |
932 loop_vectorized_call = NULL; | |
933 ret |= TODO_cleanup_cfg; | |
934 } | |
935 } | |
936 /* If outer loop vectorization fails for LOOP_VECTORIZED guarded | |
937 loop, don't vectorize its inner loop; we'll attempt to | |
938 vectorize LOOP_VECTORIZED guarded inner loop of the scalar | |
939 loop version. */ | |
940 if (loop_vectorized_call && loop->inner) | |
941 loop->inner->dont_vectorize = true; | |
942 return ret; | |
943 } | |
944 | |
945 if (!dbg_cnt (vect_loop)) | |
946 { | |
947 /* Free existing information if loop is analyzed with some | |
948 assumptions. */ | |
949 if (loop_constraint_set_p (loop, LOOP_C_FINITE)) | |
950 vect_free_loop_info_assumptions (loop); | |
951 return ret; | |
952 } | |
953 | |
954 if (loop_vectorized_call) | |
955 set_uid_loop_bbs (loop_vinfo, loop_vectorized_call); | |
956 | |
957 unsigned HOST_WIDE_INT bytes; | |
958 if (current_vector_size.is_constant (&bytes)) | |
959 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, | |
960 "loop vectorized using %wu byte vectors\n", bytes); | |
961 else | |
962 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, | |
963 "loop vectorized using variable length vectors\n"); | |
964 | |
965 loop_p new_loop = vect_transform_loop (loop_vinfo); | |
966 (*num_vectorized_loops)++; | |
967 /* Now that the loop has been vectorized, allow it to be unrolled | |
968 etc. */ | |
969 loop->force_vectorize = false; | |
970 | |
971 if (loop->simduid) | |
972 { | |
973 simduid_to_vf *simduid_to_vf_data = XNEW (simduid_to_vf); | |
974 if (!simduid_to_vf_htab) | |
975 simduid_to_vf_htab = new hash_table<simduid_to_vf> (15); | |
976 simduid_to_vf_data->simduid = DECL_UID (loop->simduid); | |
977 simduid_to_vf_data->vf = loop_vinfo->vectorization_factor; | |
978 *simduid_to_vf_htab->find_slot (simduid_to_vf_data, INSERT) | |
979 = simduid_to_vf_data; | |
980 } | |
981 | |
982 if (loop_vectorized_call) | |
983 { | |
984 fold_loop_internal_call (loop_vectorized_call, boolean_true_node); | |
985 loop_vectorized_call = NULL; | |
986 ret |= TODO_cleanup_cfg; | |
987 } | |
988 if (loop_dist_alias_call) | |
989 { | |
990 tree value = gimple_call_arg (loop_dist_alias_call, 1); | |
991 fold_loop_internal_call (loop_dist_alias_call, value); | |
992 loop_dist_alias_call = NULL; | |
993 ret |= TODO_cleanup_cfg; | |
994 } | |
995 | |
996 /* Epilogue of vectorized loop must be vectorized too. */ | |
997 if (new_loop) | |
998 ret |= try_vectorize_loop_1 (simduid_to_vf_htab, num_vectorized_loops, | |
999 new_loop, loop_vinfo, NULL, NULL); | |
1000 | |
1001 return ret; | |
1002 } | |
1003 | |
1004 /* Try to vectorize LOOP. */ | |
1005 | |
1006 static unsigned | |
1007 try_vectorize_loop (hash_table<simduid_to_vf> *&simduid_to_vf_htab, | |
1008 unsigned *num_vectorized_loops, loop_p loop) | |
1009 { | |
1010 if (!((flag_tree_loop_vectorize | |
1011 && optimize_loop_nest_for_speed_p (loop)) | |
1012 || loop->force_vectorize)) | |
1013 return 0; | |
1014 | |
1015 return try_vectorize_loop_1 (simduid_to_vf_htab, num_vectorized_loops, | |
1016 loop, NULL, | |
1017 vect_loop_vectorized_call (loop), | |
1018 vect_loop_dist_alias_call (loop)); | |
1019 } | |
1020 | |
1021 | |
589 /* Function vectorize_loops. | 1022 /* Function vectorize_loops. |
590 | 1023 |
591 Entry point to loop vectorization phase. */ | 1024 Entry point to loop vectorization phase. */ |
592 | 1025 |
593 unsigned | 1026 unsigned |
599 struct loop *loop; | 1032 struct loop *loop; |
600 hash_table<simduid_to_vf> *simduid_to_vf_htab = NULL; | 1033 hash_table<simduid_to_vf> *simduid_to_vf_htab = NULL; |
601 hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL; | 1034 hash_table<simd_array_to_simduid> *simd_array_to_simduid_htab = NULL; |
602 bool any_ifcvt_loops = false; | 1035 bool any_ifcvt_loops = false; |
603 unsigned ret = 0; | 1036 unsigned ret = 0; |
604 struct loop *new_loop; | |
605 | 1037 |
606 vect_loops_num = number_of_loops (cfun); | 1038 vect_loops_num = number_of_loops (cfun); |
607 | 1039 |
608 /* Bail out if there are no loops. */ | 1040 /* Bail out if there are no loops. */ |
609 if (vect_loops_num <= 1) | 1041 if (vect_loops_num <= 1) |
610 return 0; | 1042 return 0; |
611 | 1043 |
612 if (cfun->has_simduid_loops) | 1044 if (cfun->has_simduid_loops) |
613 note_simd_array_uses (&simd_array_to_simduid_htab); | 1045 note_simd_array_uses (&simd_array_to_simduid_htab); |
614 | |
615 init_stmt_vec_info_vec (); | |
616 | 1046 |
617 /* ----------- Analyze loops. ----------- */ | 1047 /* ----------- Analyze loops. ----------- */ |
618 | 1048 |
619 /* If some loop was duplicated, it gets bigger number | 1049 /* If some loop was duplicated, it gets bigger number |
620 than all previously defined loops. This fact allows us to run | 1050 than all previously defined loops. This fact allows us to run |
652 tree arg = gimple_call_arg (loop_vectorized_call, 0); | 1082 tree arg = gimple_call_arg (loop_vectorized_call, 0); |
653 struct loop *vector_loop | 1083 struct loop *vector_loop |
654 = get_loop (cfun, tree_to_shwi (arg)); | 1084 = get_loop (cfun, tree_to_shwi (arg)); |
655 if (vector_loop && vector_loop != loop) | 1085 if (vector_loop && vector_loop != loop) |
656 { | 1086 { |
657 loop = vector_loop; | |
658 /* Make sure we don't vectorize it twice. */ | 1087 /* Make sure we don't vectorize it twice. */ |
659 loop->dont_vectorize = true; | 1088 vector_loop->dont_vectorize = true; |
660 goto try_vectorize; | 1089 ret |= try_vectorize_loop (simduid_to_vf_htab, |
1090 &num_vectorized_loops, | |
1091 vector_loop); | |
661 } | 1092 } |
662 } | 1093 } |
663 } | 1094 } |
664 } | 1095 } |
665 else | 1096 else |
666 { | 1097 ret |= try_vectorize_loop (simduid_to_vf_htab, &num_vectorized_loops, |
667 loop_vec_info loop_vinfo, orig_loop_vinfo; | 1098 loop); |
668 gimple *loop_vectorized_call, *loop_dist_alias_call; | 1099 |
669 try_vectorize: | 1100 vect_location = dump_user_location_t (); |
670 if (!((flag_tree_loop_vectorize | |
671 && optimize_loop_nest_for_speed_p (loop)) | |
672 || loop->force_vectorize)) | |
673 continue; | |
674 orig_loop_vinfo = NULL; | |
675 loop_vectorized_call = vect_loop_vectorized_call (loop); | |
676 loop_dist_alias_call = vect_loop_dist_alias_call (loop); | |
677 vectorize_epilogue: | |
678 vect_location = find_loop_location (loop); | |
679 if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION | |
680 && dump_enabled_p ()) | |
681 dump_printf (MSG_NOTE, "\nAnalyzing loop at %s:%d\n", | |
682 LOCATION_FILE (vect_location), | |
683 LOCATION_LINE (vect_location)); | |
684 | |
685 loop_vinfo = vect_analyze_loop (loop, orig_loop_vinfo); | |
686 loop->aux = loop_vinfo; | |
687 | |
688 if (!loop_vinfo || !LOOP_VINFO_VECTORIZABLE_P (loop_vinfo)) | |
689 { | |
690 /* Free existing information if loop is analyzed with some | |
691 assumptions. */ | |
692 if (loop_constraint_set_p (loop, LOOP_C_FINITE)) | |
693 vect_free_loop_info_assumptions (loop); | |
694 | |
695 /* If we applied if-conversion then try to vectorize the | |
696 BB of innermost loops. | |
697 ??? Ideally BB vectorization would learn to vectorize | |
698 control flow by applying if-conversion on-the-fly, the | |
699 following retains the if-converted loop body even when | |
700 only non-if-converted parts took part in BB vectorization. */ | |
701 if (flag_tree_slp_vectorize != 0 | |
702 && loop_vectorized_call | |
703 && ! loop->inner) | |
704 { | |
705 basic_block bb = loop->header; | |
706 bool has_mask_load_store = false; | |
707 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); | |
708 !gsi_end_p (gsi); gsi_next (&gsi)) | |
709 { | |
710 gimple *stmt = gsi_stmt (gsi); | |
711 if (is_gimple_call (stmt) | |
712 && gimple_call_internal_p (stmt) | |
713 && (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD | |
714 || gimple_call_internal_fn (stmt) == IFN_MASK_STORE)) | |
715 { | |
716 has_mask_load_store = true; | |
717 break; | |
718 } | |
719 gimple_set_uid (stmt, -1); | |
720 gimple_set_visited (stmt, false); | |
721 } | |
722 if (! has_mask_load_store && vect_slp_bb (bb)) | |
723 { | |
724 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, | |
725 "basic block vectorized\n"); | |
726 fold_loop_internal_call (loop_vectorized_call, | |
727 boolean_true_node); | |
728 loop_vectorized_call = NULL; | |
729 ret |= TODO_cleanup_cfg; | |
730 } | |
731 } | |
732 /* If outer loop vectorization fails for LOOP_VECTORIZED guarded | |
733 loop, don't vectorize its inner loop; we'll attempt to | |
734 vectorize LOOP_VECTORIZED guarded inner loop of the scalar | |
735 loop version. */ | |
736 if (loop_vectorized_call && loop->inner) | |
737 loop->inner->dont_vectorize = true; | |
738 continue; | |
739 } | |
740 | |
741 if (!dbg_cnt (vect_loop)) | |
742 { | |
743 /* We may miss some if-converted loops due to | |
744 debug counter. Set any_ifcvt_loops to visit | |
745 them at finalization. */ | |
746 any_ifcvt_loops = true; | |
747 /* Free existing information if loop is analyzed with some | |
748 assumptions. */ | |
749 if (loop_constraint_set_p (loop, LOOP_C_FINITE)) | |
750 vect_free_loop_info_assumptions (loop); | |
751 | |
752 break; | |
753 } | |
754 | |
755 if (loop_vectorized_call) | |
756 set_uid_loop_bbs (loop_vinfo, loop_vectorized_call); | |
757 if (LOCATION_LOCUS (vect_location) != UNKNOWN_LOCATION | |
758 && dump_enabled_p ()) | |
759 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, | |
760 "loop vectorized\n"); | |
761 new_loop = vect_transform_loop (loop_vinfo); | |
762 num_vectorized_loops++; | |
763 /* Now that the loop has been vectorized, allow it to be unrolled | |
764 etc. */ | |
765 loop->force_vectorize = false; | |
766 | |
767 if (loop->simduid) | |
768 { | |
769 simduid_to_vf *simduid_to_vf_data = XNEW (simduid_to_vf); | |
770 if (!simduid_to_vf_htab) | |
771 simduid_to_vf_htab = new hash_table<simduid_to_vf> (15); | |
772 simduid_to_vf_data->simduid = DECL_UID (loop->simduid); | |
773 simduid_to_vf_data->vf = loop_vinfo->vectorization_factor; | |
774 *simduid_to_vf_htab->find_slot (simduid_to_vf_data, INSERT) | |
775 = simduid_to_vf_data; | |
776 } | |
777 | |
778 if (loop_vectorized_call) | |
779 { | |
780 fold_loop_internal_call (loop_vectorized_call, boolean_true_node); | |
781 loop_vectorized_call = NULL; | |
782 ret |= TODO_cleanup_cfg; | |
783 } | |
784 if (loop_dist_alias_call) | |
785 { | |
786 tree value = gimple_call_arg (loop_dist_alias_call, 1); | |
787 fold_loop_internal_call (loop_dist_alias_call, value); | |
788 loop_dist_alias_call = NULL; | |
789 ret |= TODO_cleanup_cfg; | |
790 } | |
791 | |
792 if (new_loop) | |
793 { | |
794 /* Epilogue of vectorized loop must be vectorized too. */ | |
795 vect_loops_num = number_of_loops (cfun); | |
796 loop = new_loop; | |
797 orig_loop_vinfo = loop_vinfo; /* To pass vect_analyze_loop. */ | |
798 goto vectorize_epilogue; | |
799 } | |
800 } | |
801 | |
802 vect_location = UNKNOWN_LOCATION; | |
803 | 1101 |
804 statistics_counter_event (cfun, "Vectorized loops", num_vectorized_loops); | 1102 statistics_counter_event (cfun, "Vectorized loops", num_vectorized_loops); |
805 if (dump_enabled_p () | 1103 if (dump_enabled_p () |
806 || (num_vectorized_loops > 0 && dump_enabled_p ())) | 1104 || (num_vectorized_loops > 0 && dump_enabled_p ())) |
807 dump_printf_loc (MSG_NOTE, vect_location, | 1105 dump_printf_loc (MSG_NOTE, vect_location, |
809 num_vectorized_loops); | 1107 num_vectorized_loops); |
810 | 1108 |
811 /* ----------- Finalize. ----------- */ | 1109 /* ----------- Finalize. ----------- */ |
812 | 1110 |
813 if (any_ifcvt_loops) | 1111 if (any_ifcvt_loops) |
814 for (i = 1; i < vect_loops_num; i++) | 1112 for (i = 1; i < number_of_loops (cfun); i++) |
815 { | 1113 { |
816 loop = get_loop (cfun, i); | 1114 loop = get_loop (cfun, i); |
817 if (loop && loop->dont_vectorize) | 1115 if (loop && loop->dont_vectorize) |
818 { | 1116 { |
819 gimple *g = vect_loop_vectorized_call (loop); | 1117 gimple *g = vect_loop_vectorized_call (loop); |
832 ret |= TODO_cleanup_cfg; | 1130 ret |= TODO_cleanup_cfg; |
833 } | 1131 } |
834 } | 1132 } |
835 } | 1133 } |
836 | 1134 |
837 for (i = 1; i < vect_loops_num; i++) | 1135 for (i = 1; i < number_of_loops (cfun); i++) |
838 { | 1136 { |
839 loop_vec_info loop_vinfo; | 1137 loop_vec_info loop_vinfo; |
840 bool has_mask_store; | 1138 bool has_mask_store; |
841 | 1139 |
842 loop = get_loop (cfun, i); | 1140 loop = get_loop (cfun, i); |
843 if (!loop) | 1141 if (!loop || !loop->aux) |
844 continue; | 1142 continue; |
845 loop_vinfo = (loop_vec_info) loop->aux; | 1143 loop_vinfo = (loop_vec_info) loop->aux; |
846 has_mask_store = false; | 1144 has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo); |
847 if (loop_vinfo) | |
848 has_mask_store = LOOP_VINFO_HAS_MASK_STORE (loop_vinfo); | |
849 delete loop_vinfo; | 1145 delete loop_vinfo; |
850 if (has_mask_store) | 1146 if (has_mask_store |
1147 && targetm.vectorize.empty_mask_is_expensive (IFN_MASK_STORE)) | |
851 optimize_mask_stores (loop); | 1148 optimize_mask_stores (loop); |
852 loop->aux = NULL; | 1149 loop->aux = NULL; |
853 } | 1150 } |
854 | |
855 free_stmt_vec_info_vec (); | |
856 | 1151 |
857 /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins. */ | 1152 /* Fold IFN_GOMP_SIMD_{VF,LANE,LAST_LANE,ORDERED_{START,END}} builtins. */ |
858 if (cfun->has_simduid_loops) | 1153 if (cfun->has_simduid_loops) |
859 adjust_simduid_builtins (simduid_to_vf_htab); | 1154 adjust_simduid_builtins (simduid_to_vf_htab); |
860 | 1155 |
989 gimple_set_uid (stmt, -1); | 1284 gimple_set_uid (stmt, -1); |
990 gimple_set_visited (stmt, false); | 1285 gimple_set_visited (stmt, false); |
991 } | 1286 } |
992 } | 1287 } |
993 | 1288 |
994 init_stmt_vec_info_vec (); | |
995 | |
996 FOR_EACH_BB_FN (bb, fun) | 1289 FOR_EACH_BB_FN (bb, fun) |
997 { | 1290 { |
998 if (vect_slp_bb (bb)) | 1291 if (vect_slp_bb (bb)) |
999 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, vect_location, | 1292 dump_printf_loc (MSG_NOTE, vect_location, "basic block vectorized\n"); |
1000 "basic block vectorized\n"); | 1293 } |
1001 } | |
1002 | |
1003 free_stmt_vec_info_vec (); | |
1004 | 1294 |
1005 if (!in_loop_pipeline) | 1295 if (!in_loop_pipeline) |
1006 { | 1296 { |
1007 scev_finalize (); | 1297 scev_finalize (); |
1008 loop_optimizer_finalize (); | 1298 loop_optimizer_finalize (); |
1034 0 if no vector type exists. */ | 1324 0 if no vector type exists. */ |
1035 static unsigned | 1325 static unsigned |
1036 get_vec_alignment_for_array_type (tree type) | 1326 get_vec_alignment_for_array_type (tree type) |
1037 { | 1327 { |
1038 gcc_assert (TREE_CODE (type) == ARRAY_TYPE); | 1328 gcc_assert (TREE_CODE (type) == ARRAY_TYPE); |
1329 poly_uint64 array_size, vector_size; | |
1039 | 1330 |
1040 tree vectype = get_vectype_for_scalar_type (strip_array_types (type)); | 1331 tree vectype = get_vectype_for_scalar_type (strip_array_types (type)); |
1041 if (!vectype | 1332 if (!vectype |
1042 || !TYPE_SIZE (type) | 1333 || !poly_int_tree_p (TYPE_SIZE (type), &array_size) |
1043 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST | 1334 || !poly_int_tree_p (TYPE_SIZE (vectype), &vector_size) |
1044 || tree_int_cst_lt (TYPE_SIZE (type), TYPE_SIZE (vectype))) | 1335 || maybe_lt (array_size, vector_size)) |
1045 return 0; | 1336 return 0; |
1046 | 1337 |
1047 return TYPE_ALIGN (vectype); | 1338 return TYPE_ALIGN (vectype); |
1048 } | 1339 } |
1049 | 1340 |
1136 static unsigned int | 1427 static unsigned int |
1137 increase_alignment (void) | 1428 increase_alignment (void) |
1138 { | 1429 { |
1139 varpool_node *vnode; | 1430 varpool_node *vnode; |
1140 | 1431 |
1141 vect_location = UNKNOWN_LOCATION; | 1432 vect_location = dump_user_location_t (); |
1142 type_align_map = new hash_map<tree, unsigned>; | 1433 type_align_map = new hash_map<tree, unsigned>; |
1143 | 1434 |
1144 /* Increase the alignment of all global arrays for vectorization. */ | 1435 /* Increase the alignment of all global arrays for vectorization. */ |
1145 FOR_EACH_DEFINED_VARIABLE (vnode) | 1436 FOR_EACH_DEFINED_VARIABLE (vnode) |
1146 { | 1437 { |
1154 | 1445 |
1155 alignment = get_vec_alignment_for_type (TREE_TYPE (decl)); | 1446 alignment = get_vec_alignment_for_type (TREE_TYPE (decl)); |
1156 if (alignment && vect_can_force_dr_alignment_p (decl, alignment)) | 1447 if (alignment && vect_can_force_dr_alignment_p (decl, alignment)) |
1157 { | 1448 { |
1158 vnode->increase_alignment (alignment); | 1449 vnode->increase_alignment (alignment); |
1159 dump_printf (MSG_NOTE, "Increasing alignment of decl: "); | 1450 dump_printf (MSG_NOTE, "Increasing alignment of decl: %T\n", decl); |
1160 dump_generic_expr (MSG_NOTE, TDF_SLIM, decl); | |
1161 dump_printf (MSG_NOTE, "\n"); | |
1162 } | 1451 } |
1163 } | 1452 } |
1164 | 1453 |
1165 delete type_align_map; | 1454 delete type_align_map; |
1166 return 0; | 1455 return 0; |