annotate gcc/tree-vect-loop-manip.c @ 136:4627f235cf2a

fix c-next example
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Thu, 08 Nov 2018 14:11:56 +0900
parents 84e7813d76e9
children 1830386684a0
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1 /* Vectorizer Specific Loop Manipulations
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3 Contributed by Dorit Naishlos <dorit@il.ibm.com>
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
4 and Ira Rosen <irar@il.ibm.com>
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
5
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
6 This file is part of GCC.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
7
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
8 GCC is free software; you can redistribute it and/or modify it under
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
9 the terms of the GNU General Public License as published by the Free
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
10 Software Foundation; either version 3, or (at your option) any later
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
11 version.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
12
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
16 for more details.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
17
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
18 You should have received a copy of the GNU General Public License
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
19 along with GCC; see the file COPYING3. If not see
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
20 <http://www.gnu.org/licenses/>. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
21
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
22 #include "config.h"
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
23 #include "system.h"
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
24 #include "coretypes.h"
111
kono
parents: 67
diff changeset
25 #include "backend.h"
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
26 #include "tree.h"
111
kono
parents: 67
diff changeset
27 #include "gimple.h"
kono
parents: 67
diff changeset
28 #include "cfghooks.h"
kono
parents: 67
diff changeset
29 #include "tree-pass.h"
kono
parents: 67
diff changeset
30 #include "ssa.h"
kono
parents: 67
diff changeset
31 #include "fold-const.h"
kono
parents: 67
diff changeset
32 #include "cfganal.h"
kono
parents: 67
diff changeset
33 #include "gimplify.h"
kono
parents: 67
diff changeset
34 #include "gimple-iterator.h"
kono
parents: 67
diff changeset
35 #include "gimplify-me.h"
kono
parents: 67
diff changeset
36 #include "tree-cfg.h"
kono
parents: 67
diff changeset
37 #include "tree-ssa-loop-manip.h"
kono
parents: 67
diff changeset
38 #include "tree-into-ssa.h"
kono
parents: 67
diff changeset
39 #include "tree-ssa.h"
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
40 #include "cfgloop.h"
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
41 #include "tree-scalar-evolution.h"
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
42 #include "tree-vectorizer.h"
111
kono
parents: 67
diff changeset
43 #include "tree-ssa-loop-ivopts.h"
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
44 #include "gimple-fold.h"
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
45 #include "tree-ssa-loop-niter.h"
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
46 #include "internal-fn.h"
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
47 #include "stor-layout.h"
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
48 #include "optabs-query.h"
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
49 #include "vec-perm-indices.h"
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
50
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
51 /*************************************************************************
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
52 Simple Loop Peeling Utilities
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
53
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
54 Utilities to support loop peeling for vectorization purposes.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
55 *************************************************************************/
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
56
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
57
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
58 /* Renames the use *OP_P. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
59
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
60 static void
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
61 rename_use_op (use_operand_p op_p)
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
62 {
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
63 tree new_name;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
64
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
65 if (TREE_CODE (USE_FROM_PTR (op_p)) != SSA_NAME)
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
66 return;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
67
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
68 new_name = get_current_def (USE_FROM_PTR (op_p));
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
69
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
70 /* Something defined outside of the loop. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
71 if (!new_name)
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
72 return;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
73
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
74 /* An ordinary ssa name defined in the loop. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
75
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
76 SET_USE (op_p, new_name);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
77 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
78
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
79
111
kono
parents: 67
diff changeset
80 /* Renames the variables in basic block BB. Allow renaming of PHI arguments
kono
parents: 67
diff changeset
81 on edges incoming from outer-block header if RENAME_FROM_OUTER_LOOP is
kono
parents: 67
diff changeset
82 true. */
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
83
111
kono
parents: 67
diff changeset
84 static void
kono
parents: 67
diff changeset
85 rename_variables_in_bb (basic_block bb, bool rename_from_outer_loop)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
86 {
111
kono
parents: 67
diff changeset
87 gimple *stmt;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
88 use_operand_p use_p;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
89 ssa_op_iter iter;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
90 edge e;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
91 edge_iterator ei;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
92 struct loop *loop = bb->loop_father;
111
kono
parents: 67
diff changeset
93 struct loop *outer_loop = NULL;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
94
111
kono
parents: 67
diff changeset
95 if (rename_from_outer_loop)
kono
parents: 67
diff changeset
96 {
kono
parents: 67
diff changeset
97 gcc_assert (loop);
kono
parents: 67
diff changeset
98 outer_loop = loop_outer (loop);
kono
parents: 67
diff changeset
99 }
kono
parents: 67
diff changeset
100
kono
parents: 67
diff changeset
101 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
kono
parents: 67
diff changeset
102 gsi_next (&gsi))
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
103 {
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
104 stmt = gsi_stmt (gsi);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
105 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
106 rename_use_op (use_p);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
107 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
108
111
kono
parents: 67
diff changeset
109 FOR_EACH_EDGE (e, ei, bb->preds)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
110 {
111
kono
parents: 67
diff changeset
111 if (!flow_bb_inside_loop_p (loop, e->src))
kono
parents: 67
diff changeset
112 {
kono
parents: 67
diff changeset
113 if (!rename_from_outer_loop)
kono
parents: 67
diff changeset
114 continue;
kono
parents: 67
diff changeset
115 if (e->src != outer_loop->header)
kono
parents: 67
diff changeset
116 {
kono
parents: 67
diff changeset
117 if (outer_loop->inner->next)
kono
parents: 67
diff changeset
118 {
kono
parents: 67
diff changeset
119 /* If outer_loop has 2 inner loops, allow there to
kono
parents: 67
diff changeset
120 be an extra basic block which decides which of the
kono
parents: 67
diff changeset
121 two loops to use using LOOP_VECTORIZED. */
kono
parents: 67
diff changeset
122 if (!single_pred_p (e->src)
kono
parents: 67
diff changeset
123 || single_pred (e->src) != outer_loop->header)
kono
parents: 67
diff changeset
124 continue;
kono
parents: 67
diff changeset
125 }
kono
parents: 67
diff changeset
126 }
kono
parents: 67
diff changeset
127 }
kono
parents: 67
diff changeset
128 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
kono
parents: 67
diff changeset
129 gsi_next (&gsi))
kono
parents: 67
diff changeset
130 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
131 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
132 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
133
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
134
111
kono
parents: 67
diff changeset
135 struct adjust_info
63
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
136 {
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
137 tree from, to;
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
138 basic_block bb;
111
kono
parents: 67
diff changeset
139 };
63
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
140
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
141 /* A stack of values to be adjusted in debug stmts. We have to
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
142 process them LIFO, so that the closest substitution applies. If we
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
143 processed them FIFO, without the stack, we might substitute uses
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
144 with a PHI DEF that would soon become non-dominant, and when we got
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
145 to the suitable one, it wouldn't have anything to substitute any
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
146 more. */
111
kono
parents: 67
diff changeset
147 static vec<adjust_info, va_heap> adjust_vec;
63
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
148
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
149 /* Adjust any debug stmts that referenced AI->from values to use the
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
150 loop-closed AI->to, if the references are dominated by AI->bb and
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
151 not by the definition of AI->from. */
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
152
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
153 static void
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
154 adjust_debug_stmts_now (adjust_info *ai)
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
155 {
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
156 basic_block bbphi = ai->bb;
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
157 tree orig_def = ai->from;
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
158 tree new_def = ai->to;
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
159 imm_use_iterator imm_iter;
111
kono
parents: 67
diff changeset
160 gimple *stmt;
63
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
161 basic_block bbdef = gimple_bb (SSA_NAME_DEF_STMT (orig_def));
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
162
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
163 gcc_assert (dom_info_available_p (CDI_DOMINATORS));
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
164
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
165 /* Adjust any debug stmts that held onto non-loop-closed
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
166 references. */
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
167 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, orig_def)
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
168 {
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
169 use_operand_p use_p;
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
170 basic_block bbuse;
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
171
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
172 if (!is_gimple_debug (stmt))
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
173 continue;
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
174
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
175 gcc_assert (gimple_debug_bind_p (stmt));
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
176
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
177 bbuse = gimple_bb (stmt);
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
178
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
179 if ((bbuse == bbphi
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
180 || dominated_by_p (CDI_DOMINATORS, bbuse, bbphi))
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
181 && !(bbuse == bbdef
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
182 || dominated_by_p (CDI_DOMINATORS, bbuse, bbdef)))
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
183 {
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
184 if (new_def)
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
185 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
186 SET_USE (use_p, new_def);
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
187 else
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
188 {
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
189 gimple_debug_bind_reset_value (stmt);
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
190 update_stmt (stmt);
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
191 }
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
192 }
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
193 }
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
194 }
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
195
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
196 /* Adjust debug stmts as scheduled before. */
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
197
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
198 static void
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
199 adjust_vec_debug_stmts (void)
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
200 {
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
201 if (!MAY_HAVE_DEBUG_BIND_STMTS)
63
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
202 return;
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
203
111
kono
parents: 67
diff changeset
204 gcc_assert (adjust_vec.exists ());
63
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
205
111
kono
parents: 67
diff changeset
206 while (!adjust_vec.is_empty ())
63
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
207 {
111
kono
parents: 67
diff changeset
208 adjust_debug_stmts_now (&adjust_vec.last ());
kono
parents: 67
diff changeset
209 adjust_vec.pop ();
63
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
210 }
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
211 }
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
212
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
213 /* Adjust any debug stmts that referenced FROM values to use the
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
214 loop-closed TO, if the references are dominated by BB and not by
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
215 the definition of FROM. If adjust_vec is non-NULL, adjustments
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
216 will be postponed until adjust_vec_debug_stmts is called. */
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
217
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
218 static void
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
219 adjust_debug_stmts (tree from, tree to, basic_block bb)
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
220 {
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
221 adjust_info ai;
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
222
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
223 if (MAY_HAVE_DEBUG_BIND_STMTS
111
kono
parents: 67
diff changeset
224 && TREE_CODE (from) == SSA_NAME
kono
parents: 67
diff changeset
225 && ! SSA_NAME_IS_DEFAULT_DEF (from)
kono
parents: 67
diff changeset
226 && ! virtual_operand_p (from))
63
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
227 {
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
228 ai.from = from;
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
229 ai.to = to;
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
230 ai.bb = bb;
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
231
111
kono
parents: 67
diff changeset
232 if (adjust_vec.exists ())
kono
parents: 67
diff changeset
233 adjust_vec.safe_push (ai);
63
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
234 else
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
235 adjust_debug_stmts_now (&ai);
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
236 }
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
237 }
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
238
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
239 /* Change E's phi arg in UPDATE_PHI to NEW_DEF, and record information
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
240 to adjust any debug stmts that referenced the old phi arg,
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
241 presumably non-loop-closed references left over from other
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
242 transformations. */
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
243
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
244 static void
111
kono
parents: 67
diff changeset
245 adjust_phi_and_debug_stmts (gimple *update_phi, edge e, tree new_def)
63
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
246 {
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
247 tree orig_def = PHI_ARG_DEF_FROM_EDGE (update_phi, e);
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
248
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
249 SET_PHI_ARG_DEF (update_phi, e->dest_idx, new_def);
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
250
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
251 if (MAY_HAVE_DEBUG_BIND_STMTS)
63
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
252 adjust_debug_stmts (orig_def, PHI_RESULT (update_phi),
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
253 gimple_bb (update_phi));
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
254 }
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
255
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
256 /* Define one loop mask MASK from loop LOOP. INIT_MASK is the value that
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
257 the mask should have during the first iteration and NEXT_MASK is the
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
258 value that it should have on subsequent iterations. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
259
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
260 static void
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
261 vect_set_loop_mask (struct loop *loop, tree mask, tree init_mask,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
262 tree next_mask)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
263 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
264 gphi *phi = create_phi_node (mask, loop->header);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
265 add_phi_arg (phi, init_mask, loop_preheader_edge (loop), UNKNOWN_LOCATION);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
266 add_phi_arg (phi, next_mask, loop_latch_edge (loop), UNKNOWN_LOCATION);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
267 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
268
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
269 /* Add SEQ to the end of LOOP's preheader block. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
270
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
271 static void
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
272 add_preheader_seq (struct loop *loop, gimple_seq seq)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
273 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
274 if (seq)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
275 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
276 edge pe = loop_preheader_edge (loop);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
277 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
278 gcc_assert (!new_bb);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
279 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
280 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
281
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
282 /* Add SEQ to the beginning of LOOP's header block. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
283
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
284 static void
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
285 add_header_seq (struct loop *loop, gimple_seq seq)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
286 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
287 if (seq)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
288 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
289 gimple_stmt_iterator gsi = gsi_after_labels (loop->header);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
290 gsi_insert_seq_before (&gsi, seq, GSI_SAME_STMT);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
291 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
292 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
293
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
294 /* Return true if the target can interleave elements of two vectors.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
295 OFFSET is 0 if the first half of the vectors should be interleaved
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
296 or 1 if the second half should. When returning true, store the
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
297 associated permutation in INDICES. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
298
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
299 static bool
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
300 interleave_supported_p (vec_perm_indices *indices, tree vectype,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
301 unsigned int offset)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
302 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
303 poly_uint64 nelts = TYPE_VECTOR_SUBPARTS (vectype);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
304 poly_uint64 base = exact_div (nelts, 2) * offset;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
305 vec_perm_builder sel (nelts, 2, 3);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
306 for (unsigned int i = 0; i < 3; ++i)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
307 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
308 sel.quick_push (base + i);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
309 sel.quick_push (base + i + nelts);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
310 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
311 indices->new_vector (sel, 2, nelts);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
312 return can_vec_perm_const_p (TYPE_MODE (vectype), *indices);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
313 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
314
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
315 /* Try to use permutes to define the masks in DEST_RGM using the masks
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
316 in SRC_RGM, given that the former has twice as many masks as the
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
317 latter. Return true on success, adding any new statements to SEQ. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
318
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
319 static bool
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
320 vect_maybe_permute_loop_masks (gimple_seq *seq, rgroup_masks *dest_rgm,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
321 rgroup_masks *src_rgm)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
322 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
323 tree src_masktype = src_rgm->mask_type;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
324 tree dest_masktype = dest_rgm->mask_type;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
325 machine_mode src_mode = TYPE_MODE (src_masktype);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
326 if (dest_rgm->max_nscalars_per_iter <= src_rgm->max_nscalars_per_iter
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
327 && optab_handler (vec_unpacku_hi_optab, src_mode) != CODE_FOR_nothing
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
328 && optab_handler (vec_unpacku_lo_optab, src_mode) != CODE_FOR_nothing)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
329 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
330 /* Unpacking the source masks gives at least as many mask bits as
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
331 we need. We can then VIEW_CONVERT any excess bits away. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
332 tree unpack_masktype = vect_halve_mask_nunits (src_masktype);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
333 for (unsigned int i = 0; i < dest_rgm->masks.length (); ++i)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
334 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
335 tree src = src_rgm->masks[i / 2];
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
336 tree dest = dest_rgm->masks[i];
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
337 tree_code code = ((i & 1) == (BYTES_BIG_ENDIAN ? 0 : 1)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
338 ? VEC_UNPACK_HI_EXPR
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
339 : VEC_UNPACK_LO_EXPR);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
340 gassign *stmt;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
341 if (dest_masktype == unpack_masktype)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
342 stmt = gimple_build_assign (dest, code, src);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
343 else
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
344 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
345 tree temp = make_ssa_name (unpack_masktype);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
346 stmt = gimple_build_assign (temp, code, src);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
347 gimple_seq_add_stmt (seq, stmt);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
348 stmt = gimple_build_assign (dest, VIEW_CONVERT_EXPR,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
349 build1 (VIEW_CONVERT_EXPR,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
350 dest_masktype, temp));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
351 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
352 gimple_seq_add_stmt (seq, stmt);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
353 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
354 return true;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
355 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
356 vec_perm_indices indices[2];
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
357 if (dest_masktype == src_masktype
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
358 && interleave_supported_p (&indices[0], src_masktype, 0)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
359 && interleave_supported_p (&indices[1], src_masktype, 1))
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
360 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
361 /* The destination requires twice as many mask bits as the source, so
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
362 we can use interleaving permutes to double up the number of bits. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
363 tree masks[2];
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
364 for (unsigned int i = 0; i < 2; ++i)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
365 masks[i] = vect_gen_perm_mask_checked (src_masktype, indices[i]);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
366 for (unsigned int i = 0; i < dest_rgm->masks.length (); ++i)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
367 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
368 tree src = src_rgm->masks[i / 2];
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
369 tree dest = dest_rgm->masks[i];
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
370 gimple *stmt = gimple_build_assign (dest, VEC_PERM_EXPR,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
371 src, src, masks[i & 1]);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
372 gimple_seq_add_stmt (seq, stmt);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
373 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
374 return true;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
375 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
376 return false;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
377 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
378
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
379 /* Helper for vect_set_loop_condition_masked. Generate definitions for
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
380 all the masks in RGM and return a mask that is nonzero when the loop
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
381 needs to iterate. Add any new preheader statements to PREHEADER_SEQ.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
382 Use LOOP_COND_GSI to insert code before the exit gcond.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
383
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
384 RGM belongs to loop LOOP. The loop originally iterated NITERS
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
385 times and has been vectorized according to LOOP_VINFO. Each iteration
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
386 of the vectorized loop handles VF iterations of the scalar loop.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
387
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
388 If NITERS_SKIP is nonnull, the first iteration of the vectorized loop
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
389 starts with NITERS_SKIP dummy iterations of the scalar loop before
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
390 the real work starts. The mask elements for these dummy iterations
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
391 must be 0, to ensure that the extra iterations do not have an effect.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
392
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
393 It is known that:
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
394
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
395 NITERS * RGM->max_nscalars_per_iter
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
396
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
397 does not overflow. However, MIGHT_WRAP_P says whether an induction
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
398 variable that starts at 0 and has step:
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
399
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
400 VF * RGM->max_nscalars_per_iter
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
401
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
402 might overflow before hitting a value above:
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
403
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
404 (NITERS + NITERS_SKIP) * RGM->max_nscalars_per_iter
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
405
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
406 This means that we cannot guarantee that such an induction variable
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
407 would ever hit a value that produces a set of all-false masks for RGM. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
408
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
409 static tree
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
410 vect_set_loop_masks_directly (struct loop *loop, loop_vec_info loop_vinfo,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
411 gimple_seq *preheader_seq,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
412 gimple_stmt_iterator loop_cond_gsi,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
413 rgroup_masks *rgm, tree vf,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
414 tree niters, tree niters_skip,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
415 bool might_wrap_p)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
416 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
417 tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
418 tree mask_type = rgm->mask_type;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
419 unsigned int nscalars_per_iter = rgm->max_nscalars_per_iter;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
420 poly_uint64 nscalars_per_mask = TYPE_VECTOR_SUBPARTS (mask_type);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
421
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
422 /* Calculate the maximum number of scalar values that the rgroup
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
423 handles in total, the number that it handles for each iteration
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
424 of the vector loop, and the number that it should skip during the
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
425 first iteration of the vector loop. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
426 tree nscalars_total = niters;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
427 tree nscalars_step = vf;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
428 tree nscalars_skip = niters_skip;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
429 if (nscalars_per_iter != 1)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
430 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
431 /* We checked before choosing to use a fully-masked loop that these
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
432 multiplications don't overflow. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
433 tree factor = build_int_cst (compare_type, nscalars_per_iter);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
434 nscalars_total = gimple_build (preheader_seq, MULT_EXPR, compare_type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
435 nscalars_total, factor);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
436 nscalars_step = gimple_build (preheader_seq, MULT_EXPR, compare_type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
437 nscalars_step, factor);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
438 if (nscalars_skip)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
439 nscalars_skip = gimple_build (preheader_seq, MULT_EXPR, compare_type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
440 nscalars_skip, factor);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
441 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
442
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
443 /* Create an induction variable that counts the number of scalars
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
444 processed. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
445 tree index_before_incr, index_after_incr;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
446 gimple_stmt_iterator incr_gsi;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
447 bool insert_after;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
448 tree zero_index = build_int_cst (compare_type, 0);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
449 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
450 create_iv (zero_index, nscalars_step, NULL_TREE, loop, &incr_gsi,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
451 insert_after, &index_before_incr, &index_after_incr);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
452
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
453 tree test_index, test_limit, first_limit;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
454 gimple_stmt_iterator *test_gsi;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
455 if (might_wrap_p)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
456 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
457 /* In principle the loop should stop iterating once the incremented
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
458 IV reaches a value greater than or equal to:
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
459
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
460 NSCALARS_TOTAL +[infinite-prec] NSCALARS_SKIP
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
461
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
462 However, there's no guarantee that this addition doesn't overflow
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
463 the comparison type, or that the IV hits a value above it before
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
464 wrapping around. We therefore adjust the limit down by one
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
465 IV step:
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
466
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
467 (NSCALARS_TOTAL +[infinite-prec] NSCALARS_SKIP)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
468 -[infinite-prec] NSCALARS_STEP
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
469
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
470 and compare the IV against this limit _before_ incrementing it.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
471 Since the comparison type is unsigned, we actually want the
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
472 subtraction to saturate at zero:
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
473
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
474 (NSCALARS_TOTAL +[infinite-prec] NSCALARS_SKIP)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
475 -[sat] NSCALARS_STEP
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
476
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
477 And since NSCALARS_SKIP < NSCALARS_STEP, we can reassociate this as:
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
478
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
479 NSCALARS_TOTAL -[sat] (NSCALARS_STEP - NSCALARS_SKIP)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
480
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
481 where the rightmost subtraction can be done directly in
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
482 COMPARE_TYPE. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
483 test_index = index_before_incr;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
484 tree adjust = nscalars_step;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
485 if (nscalars_skip)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
486 adjust = gimple_build (preheader_seq, MINUS_EXPR, compare_type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
487 adjust, nscalars_skip);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
488 test_limit = gimple_build (preheader_seq, MAX_EXPR, compare_type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
489 nscalars_total, adjust);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
490 test_limit = gimple_build (preheader_seq, MINUS_EXPR, compare_type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
491 test_limit, adjust);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
492 test_gsi = &incr_gsi;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
493
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
494 /* Get a safe limit for the first iteration. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
495 if (nscalars_skip)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
496 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
497 /* The first vector iteration can handle at most NSCALARS_STEP
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
498 scalars. NSCALARS_STEP <= CONST_LIMIT, and adding
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
499 NSCALARS_SKIP to that cannot overflow. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
500 tree const_limit = build_int_cst (compare_type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
501 LOOP_VINFO_VECT_FACTOR (loop_vinfo)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
502 * nscalars_per_iter);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
503 first_limit = gimple_build (preheader_seq, MIN_EXPR, compare_type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
504 nscalars_total, const_limit);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
505 first_limit = gimple_build (preheader_seq, PLUS_EXPR, compare_type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
506 first_limit, nscalars_skip);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
507 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
508 else
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
509 /* For the first iteration it doesn't matter whether the IV hits
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
510 a value above NSCALARS_TOTAL. That only matters for the latch
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
511 condition. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
512 first_limit = nscalars_total;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
513 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
514 else
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
515 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
516 /* Test the incremented IV, which will always hit a value above
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
517 the bound before wrapping. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
518 test_index = index_after_incr;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
519 test_limit = nscalars_total;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
520 if (nscalars_skip)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
521 test_limit = gimple_build (preheader_seq, PLUS_EXPR, compare_type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
522 test_limit, nscalars_skip);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
523 test_gsi = &loop_cond_gsi;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
524
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
525 first_limit = test_limit;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
526 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
527
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
528 /* Provide a definition of each mask in the group. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
529 tree next_mask = NULL_TREE;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
530 tree mask;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
531 unsigned int i;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
532 FOR_EACH_VEC_ELT_REVERSE (rgm->masks, i, mask)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
533 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
534 /* Previous masks will cover BIAS scalars. This mask covers the
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
535 next batch. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
536 poly_uint64 bias = nscalars_per_mask * i;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
537 tree bias_tree = build_int_cst (compare_type, bias);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
538 gimple *tmp_stmt;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
539
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
540 /* See whether the first iteration of the vector loop is known
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
541 to have a full mask. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
542 poly_uint64 const_limit;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
543 bool first_iteration_full
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
544 = (poly_int_tree_p (first_limit, &const_limit)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
545 && known_ge (const_limit, (i + 1) * nscalars_per_mask));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
546
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
547 /* Rather than have a new IV that starts at BIAS and goes up to
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
548 TEST_LIMIT, prefer to use the same 0-based IV for each mask
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
549 and adjust the bound down by BIAS. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
550 tree this_test_limit = test_limit;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
551 if (i != 0)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
552 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
553 this_test_limit = gimple_build (preheader_seq, MAX_EXPR,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
554 compare_type, this_test_limit,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
555 bias_tree);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
556 this_test_limit = gimple_build (preheader_seq, MINUS_EXPR,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
557 compare_type, this_test_limit,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
558 bias_tree);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
559 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
560
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
561 /* Create the initial mask. First include all scalars that
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
562 are within the loop limit. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
563 tree init_mask = NULL_TREE;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
564 if (!first_iteration_full)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
565 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
566 tree start, end;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
567 if (first_limit == test_limit)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
568 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
569 /* Use a natural test between zero (the initial IV value)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
570 and the loop limit. The "else" block would be valid too,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
571 but this choice can avoid the need to load BIAS_TREE into
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
572 a register. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
573 start = zero_index;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
574 end = this_test_limit;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
575 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
576 else
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
577 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
578 /* FIRST_LIMIT is the maximum number of scalars handled by the
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
579 first iteration of the vector loop. Test the portion
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
580 associated with this mask. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
581 start = bias_tree;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
582 end = first_limit;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
583 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
584
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
585 init_mask = make_temp_ssa_name (mask_type, NULL, "max_mask");
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
586 tmp_stmt = vect_gen_while (init_mask, start, end);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
587 gimple_seq_add_stmt (preheader_seq, tmp_stmt);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
588 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
589
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
590 /* Now AND out the bits that are within the number of skipped
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
591 scalars. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
592 poly_uint64 const_skip;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
593 if (nscalars_skip
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
594 && !(poly_int_tree_p (nscalars_skip, &const_skip)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
595 && known_le (const_skip, bias)))
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
596 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
597 tree unskipped_mask = vect_gen_while_not (preheader_seq, mask_type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
598 bias_tree, nscalars_skip);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
599 if (init_mask)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
600 init_mask = gimple_build (preheader_seq, BIT_AND_EXPR, mask_type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
601 init_mask, unskipped_mask);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
602 else
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
603 init_mask = unskipped_mask;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
604 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
605
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
606 if (!init_mask)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
607 /* First iteration is full. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
608 init_mask = build_minus_one_cst (mask_type);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
609
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
610 /* Get the mask value for the next iteration of the loop. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
611 next_mask = make_temp_ssa_name (mask_type, NULL, "next_mask");
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
612 gcall *call = vect_gen_while (next_mask, test_index, this_test_limit);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
613 gsi_insert_before (test_gsi, call, GSI_SAME_STMT);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
614
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
615 vect_set_loop_mask (loop, mask, init_mask, next_mask);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
616 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
617 return next_mask;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
618 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
619
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
620 /* Make LOOP iterate NITERS times using masking and WHILE_ULT calls.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
621 LOOP_VINFO describes the vectorization of LOOP. NITERS is the
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
622 number of iterations of the original scalar loop that should be
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
623 handled by the vector loop. NITERS_MAYBE_ZERO and FINAL_IV are
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
624 as for vect_set_loop_condition.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
625
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
626 Insert the branch-back condition before LOOP_COND_GSI and return the
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
627 final gcond. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
628
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
629 static gcond *
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
630 vect_set_loop_condition_masked (struct loop *loop, loop_vec_info loop_vinfo,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
631 tree niters, tree final_iv,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
632 bool niters_maybe_zero,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
633 gimple_stmt_iterator loop_cond_gsi)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
634 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
635 gimple_seq preheader_seq = NULL;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
636 gimple_seq header_seq = NULL;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
637
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
638 tree compare_type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
639 unsigned int compare_precision = TYPE_PRECISION (compare_type);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
640 unsigned HOST_WIDE_INT max_vf = vect_max_vf (loop_vinfo);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
641 tree orig_niters = niters;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
642
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
643 /* Type of the initial value of NITERS. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
644 tree ni_actual_type = TREE_TYPE (niters);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
645 unsigned int ni_actual_precision = TYPE_PRECISION (ni_actual_type);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
646
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
647 /* Convert NITERS to the same size as the compare. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
648 if (compare_precision > ni_actual_precision
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
649 && niters_maybe_zero)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
650 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
651 /* We know that there is always at least one iteration, so if the
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
652 count is zero then it must have wrapped. Cope with this by
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
653 subtracting 1 before the conversion and adding 1 to the result. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
654 gcc_assert (TYPE_UNSIGNED (ni_actual_type));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
655 niters = gimple_build (&preheader_seq, PLUS_EXPR, ni_actual_type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
656 niters, build_minus_one_cst (ni_actual_type));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
657 niters = gimple_convert (&preheader_seq, compare_type, niters);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
658 niters = gimple_build (&preheader_seq, PLUS_EXPR, compare_type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
659 niters, build_one_cst (compare_type));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
660 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
661 else
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
662 niters = gimple_convert (&preheader_seq, compare_type, niters);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
663
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
664 /* Convert skip_niters to the right type. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
665 tree niters_skip = LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
666
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
667 /* Now calculate the value that the induction variable must be able
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
668 to hit in order to ensure that we end the loop with an all-false mask.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
669 This involves adding the maximum number of inactive trailing scalar
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
670 iterations. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
671 widest_int iv_limit;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
672 bool known_max_iters = max_loop_iterations (loop, &iv_limit);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
673 if (known_max_iters)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
674 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
675 if (niters_skip)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
676 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
677 /* Add the maximum number of skipped iterations to the
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
678 maximum iteration count. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
679 if (TREE_CODE (niters_skip) == INTEGER_CST)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
680 iv_limit += wi::to_widest (niters_skip);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
681 else
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
682 iv_limit += max_vf - 1;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
683 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
684 /* IV_LIMIT is the maximum number of latch iterations, which is also
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
685 the maximum in-range IV value. Round this value down to the previous
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
686 vector alignment boundary and then add an extra full iteration. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
687 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
688 iv_limit = (iv_limit & -(int) known_alignment (vf)) + max_vf;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
689 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
690
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
691 /* Get the vectorization factor in tree form. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
692 tree vf = build_int_cst (compare_type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
693 LOOP_VINFO_VECT_FACTOR (loop_vinfo));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
694
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
695 /* Iterate over all the rgroups and fill in their masks. We could use
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
696 the first mask from any rgroup for the loop condition; here we
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
697 arbitrarily pick the last. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
698 tree test_mask = NULL_TREE;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
699 rgroup_masks *rgm;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
700 unsigned int i;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
701 vec_loop_masks *masks = &LOOP_VINFO_MASKS (loop_vinfo);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
702 FOR_EACH_VEC_ELT (*masks, i, rgm)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
703 if (!rgm->masks.is_empty ())
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
704 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
705 /* First try using permutes. This adds a single vector
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
706 instruction to the loop for each mask, but needs no extra
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
707 loop invariants or IVs. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
708 unsigned int nmasks = i + 1;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
709 if ((nmasks & 1) == 0)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
710 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
711 rgroup_masks *half_rgm = &(*masks)[nmasks / 2 - 1];
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
712 if (!half_rgm->masks.is_empty ()
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
713 && vect_maybe_permute_loop_masks (&header_seq, rgm, half_rgm))
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
714 continue;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
715 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
716
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
717 /* See whether zero-based IV would ever generate all-false masks
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
718 before wrapping around. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
719 bool might_wrap_p
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
720 = (!known_max_iters
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
721 || (wi::min_precision (iv_limit * rgm->max_nscalars_per_iter,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
722 UNSIGNED)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
723 > compare_precision));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
724
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
725 /* Set up all masks for this group. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
726 test_mask = vect_set_loop_masks_directly (loop, loop_vinfo,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
727 &preheader_seq,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
728 loop_cond_gsi, rgm, vf,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
729 niters, niters_skip,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
730 might_wrap_p);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
731 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
732
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
733 /* Emit all accumulated statements. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
734 add_preheader_seq (loop, preheader_seq);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
735 add_header_seq (loop, header_seq);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
736
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
737 /* Get a boolean result that tells us whether to iterate. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
738 edge exit_edge = single_exit (loop);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
739 tree_code code = (exit_edge->flags & EDGE_TRUE_VALUE) ? EQ_EXPR : NE_EXPR;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
740 tree zero_mask = build_zero_cst (TREE_TYPE (test_mask));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
741 gcond *cond_stmt = gimple_build_cond (code, test_mask, zero_mask,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
742 NULL_TREE, NULL_TREE);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
743 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
744
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
745 /* The loop iterates (NITERS - 1) / VF + 1 times.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
746 Subtract one from this to get the latch count. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
747 tree step = build_int_cst (compare_type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
748 LOOP_VINFO_VECT_FACTOR (loop_vinfo));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
749 tree niters_minus_one = fold_build2 (PLUS_EXPR, compare_type, niters,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
750 build_minus_one_cst (compare_type));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
751 loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, compare_type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
752 niters_minus_one, step);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
753
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
754 if (final_iv)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
755 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
756 gassign *assign = gimple_build_assign (final_iv, orig_niters);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
757 gsi_insert_on_edge_immediate (single_exit (loop), assign);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
758 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
759
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
760 return cond_stmt;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
761 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
762
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
763 /* Like vect_set_loop_condition, but handle the case in which there
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
764 are no loop masks. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
765
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
766 static gcond *
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
767 vect_set_loop_condition_unmasked (struct loop *loop, tree niters,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
768 tree step, tree final_iv,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
769 bool niters_maybe_zero,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
770 gimple_stmt_iterator loop_cond_gsi)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
771 {
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
772 tree indx_before_incr, indx_after_incr;
111
kono
parents: 67
diff changeset
773 gcond *cond_stmt;
kono
parents: 67
diff changeset
774 gcond *orig_cond;
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
775 edge pe = loop_preheader_edge (loop);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
776 edge exit_edge = single_exit (loop);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
777 gimple_stmt_iterator incr_gsi;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
778 bool insert_after;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
779 enum tree_code code;
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
780 tree niters_type = TREE_TYPE (niters);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
781
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
782 orig_cond = get_loop_exit_condition (loop);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
783 gcc_assert (orig_cond);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
784 loop_cond_gsi = gsi_for_stmt (orig_cond);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
785
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
786 tree init, limit;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
787 if (!niters_maybe_zero && integer_onep (step))
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
788 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
789 /* In this case we can use a simple 0-based IV:
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
790
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
791 A:
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
792 x = 0;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
793 do
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
794 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
795 ...
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
796 x += 1;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
797 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
798 while (x < NITERS); */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
799 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
800 init = build_zero_cst (niters_type);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
801 limit = niters;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
802 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
803 else
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
804 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
805 /* The following works for all values of NITERS except 0:
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
806
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
807 B:
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
808 x = 0;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
809 do
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
810 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
811 ...
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
812 x += STEP;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
813 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
814 while (x <= NITERS - STEP);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
815
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
816 so that the loop continues to iterate if x + STEP - 1 < NITERS
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
817 but stops if x + STEP - 1 >= NITERS.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
818
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
819 However, if NITERS is zero, x never hits a value above NITERS - STEP
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
820 before wrapping around. There are two obvious ways of dealing with
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
821 this:
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
822
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
823 - start at STEP - 1 and compare x before incrementing it
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
824 - start at -1 and compare x after incrementing it
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
825
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
826 The latter is simpler and is what we use. The loop in this case
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
827 looks like:
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
828
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
829 C:
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
830 x = -1;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
831 do
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
832 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
833 ...
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
834 x += STEP;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
835 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
836 while (x < NITERS - STEP);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
837
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
838 In both cases the loop limit is NITERS - STEP. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
839 gimple_seq seq = NULL;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
840 limit = force_gimple_operand (niters, &seq, true, NULL_TREE);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
841 limit = gimple_build (&seq, MINUS_EXPR, TREE_TYPE (limit), limit, step);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
842 if (seq)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
843 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
844 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
845 gcc_assert (!new_bb);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
846 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
847 if (niters_maybe_zero)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
848 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
849 /* Case C. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
850 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GE_EXPR : LT_EXPR;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
851 init = build_all_ones_cst (niters_type);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
852 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
853 else
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
854 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
855 /* Case B. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
856 code = (exit_edge->flags & EDGE_TRUE_VALUE) ? GT_EXPR : LE_EXPR;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
857 init = build_zero_cst (niters_type);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
858 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
859 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
860
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
861 standard_iv_increment_position (loop, &incr_gsi, &insert_after);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
862 create_iv (init, step, NULL_TREE, loop,
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
863 &incr_gsi, insert_after, &indx_before_incr, &indx_after_incr);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
864 indx_after_incr = force_gimple_operand_gsi (&loop_cond_gsi, indx_after_incr,
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
865 true, NULL_TREE, true,
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
866 GSI_SAME_STMT);
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
867 limit = force_gimple_operand_gsi (&loop_cond_gsi, limit, true, NULL_TREE,
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
868 true, GSI_SAME_STMT);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
869
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
870 cond_stmt = gimple_build_cond (code, indx_after_incr, limit, NULL_TREE,
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
871 NULL_TREE);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
872
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
873 gsi_insert_before (&loop_cond_gsi, cond_stmt, GSI_SAME_STMT);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
874
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
875 /* Record the number of latch iterations. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
876 if (limit == niters)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
877 /* Case A: the loop iterates NITERS times. Subtract one to get the
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
878 latch count. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
879 loop->nb_iterations = fold_build2 (MINUS_EXPR, niters_type, niters,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
880 build_int_cst (niters_type, 1));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
881 else
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
882 /* Case B or C: the loop iterates (NITERS - STEP) / STEP + 1 times.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
883 Subtract one from this to get the latch count. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
884 loop->nb_iterations = fold_build2 (TRUNC_DIV_EXPR, niters_type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
885 limit, step);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
886
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
887 if (final_iv)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
888 {
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
889 gassign *assign = gimple_build_assign (final_iv, MINUS_EXPR,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
890 indx_after_incr, init);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
891 gsi_insert_on_edge_immediate (single_exit (loop), assign);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
892 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
893
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
894 return cond_stmt;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
895 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
896
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
897 /* If we're using fully-masked loops, make LOOP iterate:
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
898
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
899 N == (NITERS - 1) / STEP + 1
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
900
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
901 times. When NITERS is zero, this is equivalent to making the loop
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
902 execute (1 << M) / STEP times, where M is the precision of NITERS.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
903 NITERS_MAYBE_ZERO is true if this last case might occur.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
904
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
905 If we're not using fully-masked loops, make LOOP iterate:
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
906
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
907 N == (NITERS - STEP) / STEP + 1
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
908
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
909 times, where NITERS is known to be outside the range [1, STEP - 1].
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
910 This is equivalent to making the loop execute NITERS / STEP times
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
911 when NITERS is nonzero and (1 << M) / STEP times otherwise.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
912 NITERS_MAYBE_ZERO again indicates whether this last case might occur.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
913
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
914 If FINAL_IV is nonnull, it is an SSA name that should be set to
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
915 N * STEP on exit from the loop.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
916
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
917 Assumption: the exit-condition of LOOP is the last stmt in the loop. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
918
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
919 void
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
920 vect_set_loop_condition (struct loop *loop, loop_vec_info loop_vinfo,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
921 tree niters, tree step, tree final_iv,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
922 bool niters_maybe_zero)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
923 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
924 gcond *cond_stmt;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
925 gcond *orig_cond = get_loop_exit_condition (loop);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
926 gimple_stmt_iterator loop_cond_gsi = gsi_for_stmt (orig_cond);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
927
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
928 if (loop_vinfo && LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
929 cond_stmt = vect_set_loop_condition_masked (loop, loop_vinfo, niters,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
930 final_iv, niters_maybe_zero,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
931 loop_cond_gsi);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
932 else
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
933 cond_stmt = vect_set_loop_condition_unmasked (loop, niters, step,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
934 final_iv, niters_maybe_zero,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
935 loop_cond_gsi);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
936
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
937 /* Remove old loop exit test. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
938 stmt_vec_info orig_cond_info;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
939 if (loop_vinfo
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
940 && (orig_cond_info = loop_vinfo->lookup_stmt (orig_cond)))
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
941 loop_vinfo->remove_stmt (orig_cond_info);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
942 else
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
943 gsi_remove (&loop_cond_gsi, true);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
944
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
945 if (dump_enabled_p ())
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
946 dump_printf_loc (MSG_NOTE, vect_location, "New loop exit condition: %G",
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
947 cond_stmt);
111
kono
parents: 67
diff changeset
948 }
kono
parents: 67
diff changeset
949
kono
parents: 67
diff changeset
950 /* Helper routine of slpeel_tree_duplicate_loop_to_edge_cfg.
kono
parents: 67
diff changeset
951 For all PHI arguments in FROM->dest and TO->dest from those
kono
parents: 67
diff changeset
952 edges ensure that TO->dest PHI arguments have current_def
kono
parents: 67
diff changeset
953 to that in from. */
kono
parents: 67
diff changeset
954
kono
parents: 67
diff changeset
955 static void
kono
parents: 67
diff changeset
956 slpeel_duplicate_current_defs_from_edges (edge from, edge to)
kono
parents: 67
diff changeset
957 {
kono
parents: 67
diff changeset
958 gimple_stmt_iterator gsi_from, gsi_to;
kono
parents: 67
diff changeset
959
kono
parents: 67
diff changeset
960 for (gsi_from = gsi_start_phis (from->dest),
kono
parents: 67
diff changeset
961 gsi_to = gsi_start_phis (to->dest);
kono
parents: 67
diff changeset
962 !gsi_end_p (gsi_from) && !gsi_end_p (gsi_to);)
kono
parents: 67
diff changeset
963 {
kono
parents: 67
diff changeset
964 gimple *from_phi = gsi_stmt (gsi_from);
kono
parents: 67
diff changeset
965 gimple *to_phi = gsi_stmt (gsi_to);
kono
parents: 67
diff changeset
966 tree from_arg = PHI_ARG_DEF_FROM_EDGE (from_phi, from);
kono
parents: 67
diff changeset
967 tree to_arg = PHI_ARG_DEF_FROM_EDGE (to_phi, to);
kono
parents: 67
diff changeset
968 if (virtual_operand_p (from_arg))
kono
parents: 67
diff changeset
969 {
kono
parents: 67
diff changeset
970 gsi_next (&gsi_from);
kono
parents: 67
diff changeset
971 continue;
kono
parents: 67
diff changeset
972 }
kono
parents: 67
diff changeset
973 if (virtual_operand_p (to_arg))
kono
parents: 67
diff changeset
974 {
kono
parents: 67
diff changeset
975 gsi_next (&gsi_to);
kono
parents: 67
diff changeset
976 continue;
kono
parents: 67
diff changeset
977 }
kono
parents: 67
diff changeset
978 if (TREE_CODE (from_arg) != SSA_NAME)
kono
parents: 67
diff changeset
979 gcc_assert (operand_equal_p (from_arg, to_arg, 0));
kono
parents: 67
diff changeset
980 else
kono
parents: 67
diff changeset
981 {
kono
parents: 67
diff changeset
982 if (get_current_def (to_arg) == NULL_TREE)
kono
parents: 67
diff changeset
983 set_current_def (to_arg, get_current_def (from_arg));
kono
parents: 67
diff changeset
984 }
kono
parents: 67
diff changeset
985 gsi_next (&gsi_from);
kono
parents: 67
diff changeset
986 gsi_next (&gsi_to);
kono
parents: 67
diff changeset
987 }
kono
parents: 67
diff changeset
988
kono
parents: 67
diff changeset
989 gphi *from_phi = get_virtual_phi (from->dest);
kono
parents: 67
diff changeset
990 gphi *to_phi = get_virtual_phi (to->dest);
kono
parents: 67
diff changeset
991 if (from_phi)
kono
parents: 67
diff changeset
992 set_current_def (PHI_ARG_DEF_FROM_EDGE (to_phi, to),
kono
parents: 67
diff changeset
993 get_current_def (PHI_ARG_DEF_FROM_EDGE (from_phi, from)));
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
994 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
995
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
996
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
997 /* Given LOOP this function generates a new copy of it and puts it
111
kono
parents: 67
diff changeset
998 on E which is either the entry or exit of LOOP. If SCALAR_LOOP is
kono
parents: 67
diff changeset
999 non-NULL, assume LOOP and SCALAR_LOOP are equivalent and copy the
kono
parents: 67
diff changeset
1000 basic blocks from SCALAR_LOOP instead of LOOP, but to either the
kono
parents: 67
diff changeset
1001 entry or exit of LOOP. */
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1002
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1003 struct loop *
111
kono
parents: 67
diff changeset
1004 slpeel_tree_duplicate_loop_to_edge_cfg (struct loop *loop,
kono
parents: 67
diff changeset
1005 struct loop *scalar_loop, edge e)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1006 {
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1007 struct loop *new_loop;
111
kono
parents: 67
diff changeset
1008 basic_block *new_bbs, *bbs, *pbbs;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1009 bool at_exit;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1010 bool was_imm_dom;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1011 basic_block exit_dest;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1012 edge exit, new_exit;
111
kono
parents: 67
diff changeset
1013 bool duplicate_outer_loop = false;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1014
111
kono
parents: 67
diff changeset
1015 exit = single_exit (loop);
kono
parents: 67
diff changeset
1016 at_exit = (e == exit);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1017 if (!at_exit && e != loop_preheader_edge (loop))
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1018 return NULL;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1019
111
kono
parents: 67
diff changeset
1020 if (scalar_loop == NULL)
kono
parents: 67
diff changeset
1021 scalar_loop = loop;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1022
111
kono
parents: 67
diff changeset
1023 bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
kono
parents: 67
diff changeset
1024 pbbs = bbs + 1;
kono
parents: 67
diff changeset
1025 get_loop_body_with_size (scalar_loop, pbbs, scalar_loop->num_nodes);
kono
parents: 67
diff changeset
1026 /* Allow duplication of outer loops. */
kono
parents: 67
diff changeset
1027 if (scalar_loop->inner)
kono
parents: 67
diff changeset
1028 duplicate_outer_loop = true;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1029 /* Check whether duplication is possible. */
111
kono
parents: 67
diff changeset
1030 if (!can_copy_bbs_p (pbbs, scalar_loop->num_nodes))
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1031 {
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1032 free (bbs);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1033 return NULL;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1034 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1035
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1036 /* Generate new loop structure. */
111
kono
parents: 67
diff changeset
1037 new_loop = duplicate_loop (scalar_loop, loop_outer (scalar_loop));
kono
parents: 67
diff changeset
1038 duplicate_subloops (scalar_loop, new_loop);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1039
111
kono
parents: 67
diff changeset
1040 exit_dest = exit->dest;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1041 was_imm_dom = (get_immediate_dominator (CDI_DOMINATORS,
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1042 exit_dest) == loop->header ?
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1043 true : false);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1044
111
kono
parents: 67
diff changeset
1045 /* Also copy the pre-header, this avoids jumping through hoops to
kono
parents: 67
diff changeset
1046 duplicate the loop entry PHI arguments. Create an empty
kono
parents: 67
diff changeset
1047 pre-header unconditionally for this. */
kono
parents: 67
diff changeset
1048 basic_block preheader = split_edge (loop_preheader_edge (scalar_loop));
kono
parents: 67
diff changeset
1049 edge entry_e = single_pred_edge (preheader);
kono
parents: 67
diff changeset
1050 bbs[0] = preheader;
kono
parents: 67
diff changeset
1051 new_bbs = XNEWVEC (basic_block, scalar_loop->num_nodes + 1);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1052
111
kono
parents: 67
diff changeset
1053 exit = single_exit (scalar_loop);
kono
parents: 67
diff changeset
1054 copy_bbs (bbs, scalar_loop->num_nodes + 1, new_bbs,
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1055 &exit, 1, &new_exit, NULL,
111
kono
parents: 67
diff changeset
1056 at_exit ? loop->latch : e->src, true);
kono
parents: 67
diff changeset
1057 exit = single_exit (loop);
kono
parents: 67
diff changeset
1058 basic_block new_preheader = new_bbs[0];
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1059
111
kono
parents: 67
diff changeset
1060 add_phi_args_after_copy (new_bbs, scalar_loop->num_nodes + 1, NULL);
kono
parents: 67
diff changeset
1061
kono
parents: 67
diff changeset
1062 if (scalar_loop != loop)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1063 {
111
kono
parents: 67
diff changeset
1064 /* If we copied from SCALAR_LOOP rather than LOOP, SSA_NAMEs from
kono
parents: 67
diff changeset
1065 SCALAR_LOOP will have current_def set to SSA_NAMEs in the new_loop,
kono
parents: 67
diff changeset
1066 but LOOP will not. slpeel_update_phi_nodes_for_guard{1,2} expects
kono
parents: 67
diff changeset
1067 the LOOP SSA_NAMEs (on the exit edge and edge from latch to
kono
parents: 67
diff changeset
1068 header) to have current_def set, so copy them over. */
kono
parents: 67
diff changeset
1069 slpeel_duplicate_current_defs_from_edges (single_exit (scalar_loop),
kono
parents: 67
diff changeset
1070 exit);
kono
parents: 67
diff changeset
1071 slpeel_duplicate_current_defs_from_edges (EDGE_SUCC (scalar_loop->latch,
kono
parents: 67
diff changeset
1072 0),
kono
parents: 67
diff changeset
1073 EDGE_SUCC (loop->latch, 0));
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1074 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1075
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1076 if (at_exit) /* Add the loop copy at exit. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1077 {
111
kono
parents: 67
diff changeset
1078 if (scalar_loop != loop)
kono
parents: 67
diff changeset
1079 {
kono
parents: 67
diff changeset
1080 gphi_iterator gsi;
kono
parents: 67
diff changeset
1081 new_exit = redirect_edge_and_branch (new_exit, exit_dest);
kono
parents: 67
diff changeset
1082
kono
parents: 67
diff changeset
1083 for (gsi = gsi_start_phis (exit_dest); !gsi_end_p (gsi);
kono
parents: 67
diff changeset
1084 gsi_next (&gsi))
kono
parents: 67
diff changeset
1085 {
kono
parents: 67
diff changeset
1086 gphi *phi = gsi.phi ();
kono
parents: 67
diff changeset
1087 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
kono
parents: 67
diff changeset
1088 location_t orig_locus
kono
parents: 67
diff changeset
1089 = gimple_phi_arg_location_from_edge (phi, e);
kono
parents: 67
diff changeset
1090
kono
parents: 67
diff changeset
1091 add_phi_arg (phi, orig_arg, new_exit, orig_locus);
kono
parents: 67
diff changeset
1092 }
kono
parents: 67
diff changeset
1093 }
kono
parents: 67
diff changeset
1094 redirect_edge_and_branch_force (e, new_preheader);
kono
parents: 67
diff changeset
1095 flush_pending_stmts (e);
kono
parents: 67
diff changeset
1096 set_immediate_dominator (CDI_DOMINATORS, new_preheader, e->src);
kono
parents: 67
diff changeset
1097 if (was_imm_dom || duplicate_outer_loop)
kono
parents: 67
diff changeset
1098 set_immediate_dominator (CDI_DOMINATORS, exit_dest, new_exit->src);
kono
parents: 67
diff changeset
1099
kono
parents: 67
diff changeset
1100 /* And remove the non-necessary forwarder again. Keep the other
kono
parents: 67
diff changeset
1101 one so we have a proper pre-header for the loop at the exit edge. */
kono
parents: 67
diff changeset
1102 redirect_edge_pred (single_succ_edge (preheader),
kono
parents: 67
diff changeset
1103 single_pred (preheader));
kono
parents: 67
diff changeset
1104 delete_basic_block (preheader);
kono
parents: 67
diff changeset
1105 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
kono
parents: 67
diff changeset
1106 loop_preheader_edge (scalar_loop)->src);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1107 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1108 else /* Add the copy at entry. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1109 {
111
kono
parents: 67
diff changeset
1110 if (scalar_loop != loop)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1111 {
111
kono
parents: 67
diff changeset
1112 /* Remove the non-necessary forwarder of scalar_loop again. */
kono
parents: 67
diff changeset
1113 redirect_edge_pred (single_succ_edge (preheader),
kono
parents: 67
diff changeset
1114 single_pred (preheader));
kono
parents: 67
diff changeset
1115 delete_basic_block (preheader);
kono
parents: 67
diff changeset
1116 set_immediate_dominator (CDI_DOMINATORS, scalar_loop->header,
kono
parents: 67
diff changeset
1117 loop_preheader_edge (scalar_loop)->src);
kono
parents: 67
diff changeset
1118 preheader = split_edge (loop_preheader_edge (loop));
kono
parents: 67
diff changeset
1119 entry_e = single_pred_edge (preheader);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1120 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1121
111
kono
parents: 67
diff changeset
1122 redirect_edge_and_branch_force (entry_e, new_preheader);
kono
parents: 67
diff changeset
1123 flush_pending_stmts (entry_e);
kono
parents: 67
diff changeset
1124 set_immediate_dominator (CDI_DOMINATORS, new_preheader, entry_e->src);
kono
parents: 67
diff changeset
1125
kono
parents: 67
diff changeset
1126 redirect_edge_and_branch_force (new_exit, preheader);
kono
parents: 67
diff changeset
1127 flush_pending_stmts (new_exit);
kono
parents: 67
diff changeset
1128 set_immediate_dominator (CDI_DOMINATORS, preheader, new_exit->src);
kono
parents: 67
diff changeset
1129
kono
parents: 67
diff changeset
1130 /* And remove the non-necessary forwarder again. Keep the other
kono
parents: 67
diff changeset
1131 one so we have a proper pre-header for the loop at the exit edge. */
kono
parents: 67
diff changeset
1132 redirect_edge_pred (single_succ_edge (new_preheader),
kono
parents: 67
diff changeset
1133 single_pred (new_preheader));
kono
parents: 67
diff changeset
1134 delete_basic_block (new_preheader);
kono
parents: 67
diff changeset
1135 set_immediate_dominator (CDI_DOMINATORS, new_loop->header,
kono
parents: 67
diff changeset
1136 loop_preheader_edge (new_loop)->src);
kono
parents: 67
diff changeset
1137 }
kono
parents: 67
diff changeset
1138
kono
parents: 67
diff changeset
1139 /* Skip new preheader since it's deleted if copy loop is added at entry. */
kono
parents: 67
diff changeset
1140 for (unsigned i = (at_exit ? 0 : 1); i < scalar_loop->num_nodes + 1; i++)
kono
parents: 67
diff changeset
1141 rename_variables_in_bb (new_bbs[i], duplicate_outer_loop);
kono
parents: 67
diff changeset
1142
kono
parents: 67
diff changeset
1143 if (scalar_loop != loop)
kono
parents: 67
diff changeset
1144 {
kono
parents: 67
diff changeset
1145 /* Update new_loop->header PHIs, so that on the preheader
kono
parents: 67
diff changeset
1146 edge they are the ones from loop rather than scalar_loop. */
kono
parents: 67
diff changeset
1147 gphi_iterator gsi_orig, gsi_new;
kono
parents: 67
diff changeset
1148 edge orig_e = loop_preheader_edge (loop);
kono
parents: 67
diff changeset
1149 edge new_e = loop_preheader_edge (new_loop);
kono
parents: 67
diff changeset
1150
kono
parents: 67
diff changeset
1151 for (gsi_orig = gsi_start_phis (loop->header),
kono
parents: 67
diff changeset
1152 gsi_new = gsi_start_phis (new_loop->header);
kono
parents: 67
diff changeset
1153 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_new);
kono
parents: 67
diff changeset
1154 gsi_next (&gsi_orig), gsi_next (&gsi_new))
kono
parents: 67
diff changeset
1155 {
kono
parents: 67
diff changeset
1156 gphi *orig_phi = gsi_orig.phi ();
kono
parents: 67
diff changeset
1157 gphi *new_phi = gsi_new.phi ();
kono
parents: 67
diff changeset
1158 tree orig_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
kono
parents: 67
diff changeset
1159 location_t orig_locus
kono
parents: 67
diff changeset
1160 = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
kono
parents: 67
diff changeset
1161
kono
parents: 67
diff changeset
1162 add_phi_arg (new_phi, orig_arg, new_e, orig_locus);
kono
parents: 67
diff changeset
1163 }
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1164 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1165
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1166 free (new_bbs);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1167 free (bbs);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1168
111
kono
parents: 67
diff changeset
1169 checking_verify_dominators (CDI_DOMINATORS);
kono
parents: 67
diff changeset
1170
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1171 return new_loop;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1172 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1173
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1174
111
kono
parents: 67
diff changeset
1175 /* Given the condition expression COND, put it as the last statement of
kono
parents: 67
diff changeset
1176 GUARD_BB; set both edges' probability; set dominator of GUARD_TO to
kono
parents: 67
diff changeset
1177 DOM_BB; return the skip edge. GUARD_TO is the target basic block to
kono
parents: 67
diff changeset
1178 skip the loop. PROBABILITY is the skip edge's probability. Mark the
kono
parents: 67
diff changeset
1179 new edge as irreducible if IRREDUCIBLE_P is true. */
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1180
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1181 static edge
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1182 slpeel_add_loop_guard (basic_block guard_bb, tree cond,
111
kono
parents: 67
diff changeset
1183 basic_block guard_to, basic_block dom_bb,
kono
parents: 67
diff changeset
1184 profile_probability probability, bool irreducible_p)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1185 {
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1186 gimple_stmt_iterator gsi;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1187 edge new_e, enter_e;
111
kono
parents: 67
diff changeset
1188 gcond *cond_stmt;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1189 gimple_seq gimplify_stmt_list = NULL;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1190
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1191 enter_e = EDGE_SUCC (guard_bb, 0);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1192 enter_e->flags &= ~EDGE_FALLTHRU;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1193 enter_e->flags |= EDGE_FALSE_VALUE;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1194 gsi = gsi_last_bb (guard_bb);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1195
111
kono
parents: 67
diff changeset
1196 cond = force_gimple_operand_1 (cond, &gimplify_stmt_list, is_gimple_condexpr,
kono
parents: 67
diff changeset
1197 NULL_TREE);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1198 if (gimplify_stmt_list)
111
kono
parents: 67
diff changeset
1199 gsi_insert_seq_after (&gsi, gimplify_stmt_list, GSI_NEW_STMT);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1200
111
kono
parents: 67
diff changeset
1201 cond_stmt = gimple_build_cond_from_tree (cond, NULL_TREE, NULL_TREE);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1202 gsi = gsi_last_bb (guard_bb);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1203 gsi_insert_after (&gsi, cond_stmt, GSI_NEW_STMT);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1204
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1205 /* Add new edge to connect guard block to the merge/loop-exit block. */
111
kono
parents: 67
diff changeset
1206 new_e = make_edge (guard_bb, guard_to, EDGE_TRUE_VALUE);
kono
parents: 67
diff changeset
1207
kono
parents: 67
diff changeset
1208 new_e->probability = probability;
kono
parents: 67
diff changeset
1209 if (irreducible_p)
kono
parents: 67
diff changeset
1210 new_e->flags |= EDGE_IRREDUCIBLE_LOOP;
kono
parents: 67
diff changeset
1211
kono
parents: 67
diff changeset
1212 enter_e->probability = probability.invert ();
kono
parents: 67
diff changeset
1213 set_immediate_dominator (CDI_DOMINATORS, guard_to, dom_bb);
kono
parents: 67
diff changeset
1214
kono
parents: 67
diff changeset
1215 /* Split enter_e to preserve LOOPS_HAVE_PREHEADERS. */
kono
parents: 67
diff changeset
1216 if (enter_e->dest->loop_father->header == enter_e->dest)
kono
parents: 67
diff changeset
1217 split_edge (enter_e);
kono
parents: 67
diff changeset
1218
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1219 return new_e;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1220 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1221
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1222
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1223 /* This function verifies that the following restrictions apply to LOOP:
111
kono
parents: 67
diff changeset
1224 (1) it consists of exactly 2 basic blocks - header, and an empty latch
kono
parents: 67
diff changeset
1225 for innermost loop and 5 basic blocks for outer-loop.
kono
parents: 67
diff changeset
1226 (2) it is single entry, single exit
kono
parents: 67
diff changeset
1227 (3) its exit condition is the last stmt in the header
kono
parents: 67
diff changeset
1228 (4) E is the entry/exit edge of LOOP.
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1229 */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1230
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1231 bool
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1232 slpeel_can_duplicate_loop_p (const struct loop *loop, const_edge e)
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1233 {
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1234 edge exit_e = single_exit (loop);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1235 edge entry_e = loop_preheader_edge (loop);
111
kono
parents: 67
diff changeset
1236 gcond *orig_cond = get_loop_exit_condition (loop);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1237 gimple_stmt_iterator loop_exit_gsi = gsi_last_bb (exit_e->src);
111
kono
parents: 67
diff changeset
1238 unsigned int num_bb = loop->inner? 5 : 2;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1239
111
kono
parents: 67
diff changeset
1240 /* All loops have an outer scope; the only case loop->outer is NULL is for
kono
parents: 67
diff changeset
1241 the function itself. */
kono
parents: 67
diff changeset
1242 if (!loop_outer (loop)
kono
parents: 67
diff changeset
1243 || loop->num_nodes != num_bb
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1244 || !empty_block_p (loop->latch)
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1245 || !single_exit (loop)
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1246 /* Verify that new loop exit condition can be trivially modified. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1247 || (!orig_cond || orig_cond != gsi_stmt (loop_exit_gsi))
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1248 || (e != exit_e && e != entry_e))
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1249 return false;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1250
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1251 return true;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1252 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1253
111
kono
parents: 67
diff changeset
1254 /* If the loop has a virtual PHI, but exit bb doesn't, create a virtual PHI
kono
parents: 67
diff changeset
1255 in the exit bb and rename all the uses after the loop. This simplifies
kono
parents: 67
diff changeset
1256 the *guard[12] routines, which assume loop closed SSA form for all PHIs
kono
parents: 67
diff changeset
1257 (but normally loop closed SSA form doesn't require virtual PHIs to be
kono
parents: 67
diff changeset
1258 in the same form). Doing this early simplifies the checking what
kono
parents: 67
diff changeset
1259 uses should be renamed. */
67
f6334be47118 update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
1260
f6334be47118 update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
1261 static void
111
kono
parents: 67
diff changeset
1262 create_lcssa_for_virtual_phi (struct loop *loop)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1263 {
111
kono
parents: 67
diff changeset
1264 gphi_iterator gsi;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1265 edge exit_e = single_exit (loop);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1266
111
kono
parents: 67
diff changeset
1267 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
kono
parents: 67
diff changeset
1268 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
kono
parents: 67
diff changeset
1269 {
kono
parents: 67
diff changeset
1270 gphi *phi = gsi.phi ();
kono
parents: 67
diff changeset
1271 for (gsi = gsi_start_phis (exit_e->dest);
kono
parents: 67
diff changeset
1272 !gsi_end_p (gsi); gsi_next (&gsi))
kono
parents: 67
diff changeset
1273 if (virtual_operand_p (gimple_phi_result (gsi_stmt (gsi))))
kono
parents: 67
diff changeset
1274 break;
kono
parents: 67
diff changeset
1275 if (gsi_end_p (gsi))
kono
parents: 67
diff changeset
1276 {
kono
parents: 67
diff changeset
1277 tree new_vop = copy_ssa_name (PHI_RESULT (phi));
kono
parents: 67
diff changeset
1278 gphi *new_phi = create_phi_node (new_vop, exit_e->dest);
kono
parents: 67
diff changeset
1279 tree vop = PHI_ARG_DEF_FROM_EDGE (phi, EDGE_SUCC (loop->latch, 0));
kono
parents: 67
diff changeset
1280 imm_use_iterator imm_iter;
kono
parents: 67
diff changeset
1281 gimple *stmt;
kono
parents: 67
diff changeset
1282 use_operand_p use_p;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1283
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1284 SSA_NAME_OCCURS_IN_ABNORMAL_PHI (new_vop)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1285 = SSA_NAME_OCCURS_IN_ABNORMAL_PHI (vop);
111
kono
parents: 67
diff changeset
1286 add_phi_arg (new_phi, vop, exit_e, UNKNOWN_LOCATION);
kono
parents: 67
diff changeset
1287 gimple_phi_set_result (new_phi, new_vop);
kono
parents: 67
diff changeset
1288 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, vop)
kono
parents: 67
diff changeset
1289 if (stmt != new_phi
kono
parents: 67
diff changeset
1290 && !flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
kono
parents: 67
diff changeset
1291 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
kono
parents: 67
diff changeset
1292 SET_USE (use_p, new_vop);
kono
parents: 67
diff changeset
1293 }
kono
parents: 67
diff changeset
1294 break;
kono
parents: 67
diff changeset
1295 }
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1296
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1297 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1298
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1299 /* Function vect_get_loop_location.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1300
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1301 Extract the location of the loop in the source code.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1302 If the loop is not well formed for vectorization, an estimated
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1303 location is calculated.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1304 Return the loop location if succeed and NULL if not. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1305
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1306 dump_user_location_t
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1307 find_loop_location (struct loop *loop)
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1308 {
111
kono
parents: 67
diff changeset
1309 gimple *stmt = NULL;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1310 basic_block bb;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1311 gimple_stmt_iterator si;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1312
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1313 if (!loop)
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1314 return dump_user_location_t ();
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1315
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1316 stmt = get_loop_exit_condition (loop);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1317
111
kono
parents: 67
diff changeset
1318 if (stmt
kono
parents: 67
diff changeset
1319 && LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1320 return stmt;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1321
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1322 /* If we got here the loop is probably not "well formed",
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1323 try to estimate the loop location */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1324
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1325 if (!loop->header)
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1326 return dump_user_location_t ();
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1327
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1328 bb = loop->header;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1329
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1330 for (si = gsi_start_bb (bb); !gsi_end_p (si); gsi_next (&si))
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1331 {
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1332 stmt = gsi_stmt (si);
111
kono
parents: 67
diff changeset
1333 if (LOCATION_LOCUS (gimple_location (stmt)) > BUILTINS_LOCATION)
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1334 return stmt;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1335 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1336
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1337 return dump_user_location_t ();
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1338 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1339
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1340 /* Return true if the phi described by STMT_INFO defines an IV of the
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1341 loop to be vectorized. */
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1342
111
kono
parents: 67
diff changeset
1343 static bool
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1344 iv_phi_p (stmt_vec_info stmt_info)
111
kono
parents: 67
diff changeset
1345 {
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1346 gphi *phi = as_a <gphi *> (stmt_info->stmt);
111
kono
parents: 67
diff changeset
1347 if (virtual_operand_p (PHI_RESULT (phi)))
kono
parents: 67
diff changeset
1348 return false;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1349
111
kono
parents: 67
diff changeset
1350 if (STMT_VINFO_DEF_TYPE (stmt_info) == vect_reduction_def
kono
parents: 67
diff changeset
1351 || STMT_VINFO_DEF_TYPE (stmt_info) == vect_double_reduction_def)
kono
parents: 67
diff changeset
1352 return false;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1353
111
kono
parents: 67
diff changeset
1354 return true;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1355 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1356
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1357 /* Function vect_can_advance_ivs_p
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1358
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1359 In case the number of iterations that LOOP iterates is unknown at compile
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1360 time, an epilog loop will be generated, and the loop induction variables
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1361 (IVs) will be "advanced" to the value they are supposed to take just before
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1362 the epilog loop. Here we check that the access function of the loop IVs
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1363 and the expression that represents the loop bound are simple enough.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1364 These restrictions will be relaxed in the future. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1365
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1366 bool
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1367 vect_can_advance_ivs_p (loop_vec_info loop_vinfo)
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1368 {
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1369 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1370 basic_block bb = loop->header;
111
kono
parents: 67
diff changeset
1371 gphi_iterator gsi;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1372
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1373 /* Analyze phi functions of the loop header. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1374
111
kono
parents: 67
diff changeset
1375 if (dump_enabled_p ())
kono
parents: 67
diff changeset
1376 dump_printf_loc (MSG_NOTE, vect_location, "vect_can_advance_ivs_p:\n");
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1377 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1378 {
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1379 tree evolution_part;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1380
111
kono
parents: 67
diff changeset
1381 gphi *phi = gsi.phi ();
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1382 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (phi);
111
kono
parents: 67
diff changeset
1383 if (dump_enabled_p ())
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1384 dump_printf_loc (MSG_NOTE, vect_location, "Analyze phi: %G",
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1385 phi_info->stmt);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1386
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1387 /* Skip virtual phi's. The data dependences that are associated with
111
kono
parents: 67
diff changeset
1388 virtual defs/uses (i.e., memory accesses) are analyzed elsewhere.
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1389
111
kono
parents: 67
diff changeset
1390 Skip reduction phis. */
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1391 if (!iv_phi_p (phi_info))
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1392 {
111
kono
parents: 67
diff changeset
1393 if (dump_enabled_p ())
kono
parents: 67
diff changeset
1394 dump_printf_loc (MSG_NOTE, vect_location,
kono
parents: 67
diff changeset
1395 "reduc or virtual phi. skip.\n");
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1396 continue;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1397 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1398
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1399 /* Analyze the evolution function. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1400
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1401 evolution_part = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
111
kono
parents: 67
diff changeset
1402 if (evolution_part == NULL_TREE)
kono
parents: 67
diff changeset
1403 {
kono
parents: 67
diff changeset
1404 if (dump_enabled_p ())
kono
parents: 67
diff changeset
1405 dump_printf (MSG_MISSED_OPTIMIZATION,
kono
parents: 67
diff changeset
1406 "No access function or evolution.\n");
kono
parents: 67
diff changeset
1407 return false;
kono
parents: 67
diff changeset
1408 }
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1409
111
kono
parents: 67
diff changeset
1410 /* FORNOW: We do not transform initial conditions of IVs
kono
parents: 67
diff changeset
1411 which evolution functions are not invariants in the loop. */
kono
parents: 67
diff changeset
1412
kono
parents: 67
diff changeset
1413 if (!expr_invariant_in_loop_p (loop, evolution_part))
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1414 {
111
kono
parents: 67
diff changeset
1415 if (dump_enabled_p ())
kono
parents: 67
diff changeset
1416 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
kono
parents: 67
diff changeset
1417 "evolution not invariant in loop.\n");
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1418 return false;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1419 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1420
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1421 /* FORNOW: We do not transform initial conditions of IVs
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1422 which evolution functions are a polynomial of degree >= 2. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1423
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1424 if (tree_is_chrec (evolution_part))
111
kono
parents: 67
diff changeset
1425 {
kono
parents: 67
diff changeset
1426 if (dump_enabled_p ())
kono
parents: 67
diff changeset
1427 dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
kono
parents: 67
diff changeset
1428 "evolution is chrec.\n");
kono
parents: 67
diff changeset
1429 return false;
kono
parents: 67
diff changeset
1430 }
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1431 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1432
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1433 return true;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1434 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1435
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1436
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1437 /* Function vect_update_ivs_after_vectorizer.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1438
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1439 "Advance" the induction variables of LOOP to the value they should take
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1440 after the execution of LOOP. This is currently necessary because the
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1441 vectorizer does not handle induction variables that are used after the
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1442 loop. Such a situation occurs when the last iterations of LOOP are
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1443 peeled, because:
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1444 1. We introduced new uses after LOOP for IVs that were not originally used
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1445 after LOOP: the IVs of LOOP are now used by an epilog loop.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1446 2. LOOP is going to be vectorized; this means that it will iterate N/VF
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1447 times, whereas the loop IVs should be bumped N times.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1448
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1449 Input:
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1450 - LOOP - a loop that is going to be vectorized. The last few iterations
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1451 of LOOP were peeled.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1452 - NITERS - the number of iterations that LOOP executes (before it is
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1453 vectorized). i.e, the number of times the ivs should be bumped.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1454 - UPDATE_E - a successor edge of LOOP->exit that is on the (only) path
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1455 coming out from LOOP on which there are uses of the LOOP ivs
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1456 (this is the path from LOOP->exit to epilog_loop->preheader).
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1457
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1458 The new definitions of the ivs are placed in LOOP->exit.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1459 The phi args associated with the edge UPDATE_E in the bb
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1460 UPDATE_E->dest are updated accordingly.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1461
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1462 Assumption 1: Like the rest of the vectorizer, this function assumes
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1463 a single loop exit that has a single predecessor.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1464
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1465 Assumption 2: The phi nodes in the LOOP header and in update_bb are
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1466 organized in the same order.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1467
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1468 Assumption 3: The access function of the ivs is simple enough (see
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1469 vect_can_advance_ivs_p). This assumption will be relaxed in the future.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1470
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1471 Assumption 4: Exactly one of the successors of LOOP exit-bb is on a path
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1472 coming out of LOOP on which the ivs of LOOP are used (this is the path
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1473 that leads to the epilog loop; other paths skip the epilog loop). This
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1474 path starts with the edge UPDATE_E, and its destination (denoted update_bb)
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1475 needs to have its phis updated.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1476 */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1477
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1478 static void
111
kono
parents: 67
diff changeset
1479 vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
kono
parents: 67
diff changeset
1480 tree niters, edge update_e)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1481 {
111
kono
parents: 67
diff changeset
1482 gphi_iterator gsi, gsi1;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1483 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
111
kono
parents: 67
diff changeset
1484 basic_block update_bb = update_e->dest;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1485 basic_block exit_bb = single_exit (loop)->dest;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1486
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1487 /* Make sure there exists a single-predecessor exit bb: */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1488 gcc_assert (single_pred_p (exit_bb));
111
kono
parents: 67
diff changeset
1489 gcc_assert (single_succ_edge (exit_bb) == update_e);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1490
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1491 for (gsi = gsi_start_phis (loop->header), gsi1 = gsi_start_phis (update_bb);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1492 !gsi_end_p (gsi) && !gsi_end_p (gsi1);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1493 gsi_next (&gsi), gsi_next (&gsi1))
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1494 {
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1495 tree init_expr;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1496 tree step_expr, off;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1497 tree type;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1498 tree var, ni, ni_name;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1499 gimple_stmt_iterator last_gsi;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1500
111
kono
parents: 67
diff changeset
1501 gphi *phi = gsi.phi ();
kono
parents: 67
diff changeset
1502 gphi *phi1 = gsi1.phi ();
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1503 stmt_vec_info phi_info = loop_vinfo->lookup_stmt (phi);
111
kono
parents: 67
diff changeset
1504 if (dump_enabled_p ())
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1505 dump_printf_loc (MSG_NOTE, vect_location,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1506 "vect_update_ivs_after_vectorizer: phi: %G", phi);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1507
111
kono
parents: 67
diff changeset
1508 /* Skip reduction and virtual phis. */
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1509 if (!iv_phi_p (phi_info))
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1510 {
111
kono
parents: 67
diff changeset
1511 if (dump_enabled_p ())
kono
parents: 67
diff changeset
1512 dump_printf_loc (MSG_NOTE, vect_location,
kono
parents: 67
diff changeset
1513 "reduc or virtual phi. skip.\n");
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1514 continue;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1515 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1516
111
kono
parents: 67
diff changeset
1517 type = TREE_TYPE (gimple_phi_result (phi));
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1518 step_expr = STMT_VINFO_LOOP_PHI_EVOLUTION_PART (phi_info);
111
kono
parents: 67
diff changeset
1519 step_expr = unshare_expr (step_expr);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1520
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1521 /* FORNOW: We do not support IVs whose evolution function is a polynomial
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1522 of degree >= 2 or exponential. */
111
kono
parents: 67
diff changeset
1523 gcc_assert (!tree_is_chrec (step_expr));
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1524
111
kono
parents: 67
diff changeset
1525 init_expr = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1526
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1527 off = fold_build2 (MULT_EXPR, TREE_TYPE (step_expr),
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1528 fold_convert (TREE_TYPE (step_expr), niters),
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1529 step_expr);
111
kono
parents: 67
diff changeset
1530 if (POINTER_TYPE_P (type))
kono
parents: 67
diff changeset
1531 ni = fold_build_pointer_plus (init_expr, off);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1532 else
111
kono
parents: 67
diff changeset
1533 ni = fold_build2 (PLUS_EXPR, type,
kono
parents: 67
diff changeset
1534 init_expr, fold_convert (type, off));
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1535
111
kono
parents: 67
diff changeset
1536 var = create_tmp_var (type, "tmp");
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1537
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1538 last_gsi = gsi_last_bb (exit_bb);
111
kono
parents: 67
diff changeset
1539 gimple_seq new_stmts = NULL;
kono
parents: 67
diff changeset
1540 ni_name = force_gimple_operand (ni, &new_stmts, false, var);
kono
parents: 67
diff changeset
1541 /* Exit_bb shouldn't be empty. */
kono
parents: 67
diff changeset
1542 if (!gsi_end_p (last_gsi))
kono
parents: 67
diff changeset
1543 gsi_insert_seq_after (&last_gsi, new_stmts, GSI_SAME_STMT);
kono
parents: 67
diff changeset
1544 else
kono
parents: 67
diff changeset
1545 gsi_insert_seq_before (&last_gsi, new_stmts, GSI_SAME_STMT);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1546
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1547 /* Fix phi expressions in the successor bb. */
63
b7f97abdc517 update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents: 55
diff changeset
1548 adjust_phi_and_debug_stmts (phi1, update_e, ni_name);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1549 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1550 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1551
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1552 /* Return a gimple value containing the misalignment (measured in vector
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1553 elements) for the loop described by LOOP_VINFO, i.e. how many elements
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1554 it is away from a perfectly aligned address. Add any new statements
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1555 to SEQ. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1556
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1557 static tree
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1558 get_misalign_in_elems (gimple **seq, loop_vec_info loop_vinfo)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1559 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1560 dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1561 stmt_vec_info stmt_info = dr_info->stmt;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1562 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1563
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1564 unsigned int target_align = DR_TARGET_ALIGNMENT (dr_info);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1565 gcc_assert (target_align != 0);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1566
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1567 bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1568 size_zero_node) < 0;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1569 tree offset = (negative
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1570 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1571 : size_zero_node);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1572 tree start_addr = vect_create_addr_base_for_vector_ref (stmt_info, seq,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1573 offset);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1574 tree type = unsigned_type_for (TREE_TYPE (start_addr));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1575 tree target_align_minus_1 = build_int_cst (type, target_align - 1);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1576 HOST_WIDE_INT elem_size
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1577 = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1578 tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1579
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1580 /* Create: misalign_in_bytes = addr & (target_align - 1). */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1581 tree int_start_addr = fold_convert (type, start_addr);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1582 tree misalign_in_bytes = fold_build2 (BIT_AND_EXPR, type, int_start_addr,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1583 target_align_minus_1);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1584
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1585 /* Create: misalign_in_elems = misalign_in_bytes / element_size. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1586 tree misalign_in_elems = fold_build2 (RSHIFT_EXPR, type, misalign_in_bytes,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1587 elem_size_log);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1588
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1589 return misalign_in_elems;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1590 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1591
111
kono
parents: 67
diff changeset
1592 /* Function vect_gen_prolog_loop_niters
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1593
111
kono
parents: 67
diff changeset
1594 Generate the number of iterations which should be peeled as prolog for the
kono
parents: 67
diff changeset
1595 loop represented by LOOP_VINFO. It is calculated as the misalignment of
kono
parents: 67
diff changeset
1596 DR - the data reference recorded in LOOP_VINFO_UNALIGNED_DR (LOOP_VINFO).
kono
parents: 67
diff changeset
1597 As a result, after the execution of this loop, the data reference DR will
kono
parents: 67
diff changeset
1598 refer to an aligned location. The following computation is generated:
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1599
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1600 If the misalignment of DR is known at compile time:
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1601 addr_mis = int mis = DR_MISALIGNMENT (dr);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1602 Else, compute address misalignment in bytes:
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1603 addr_mis = addr & (target_align - 1)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1604
111
kono
parents: 67
diff changeset
1605 prolog_niters = ((VF - addr_mis/elem_size)&(VF-1))/step
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1606
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1607 (elem_size = element type size; an element is the scalar element whose type
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1608 is the inner type of the vectype)
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1609
111
kono
parents: 67
diff changeset
1610 The computations will be emitted at the end of BB. We also compute and
kono
parents: 67
diff changeset
1611 store upper bound (included) of the result in BOUND.
kono
parents: 67
diff changeset
1612
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1613 When the step of the data-ref in the loop is not 1 (as in interleaved data
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1614 and SLP), the number of iterations of the prolog must be divided by the step
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1615 (which is equal to the size of interleaved group).
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1616
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1617 The above formulas assume that VF == number of elements in the vector. This
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1618 may not hold when there are multiple-types in the loop.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1619 In this case, for some data-references in the loop the VF does not represent
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1620 the number of elements that fit in the vector. Therefore, instead of VF we
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1621 use TYPE_VECTOR_SUBPARTS. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1622
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1623 static tree
111
kono
parents: 67
diff changeset
1624 vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
kono
parents: 67
diff changeset
1625 basic_block bb, int *bound)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1626 {
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1627 dr_vec_info *dr_info = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1628 tree var;
111
kono
parents: 67
diff changeset
1629 tree niters_type = TREE_TYPE (LOOP_VINFO_NITERS (loop_vinfo));
kono
parents: 67
diff changeset
1630 gimple_seq stmts = NULL, new_stmts = NULL;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1631 tree iters, iters_name;
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1632 stmt_vec_info stmt_info = dr_info->stmt;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1633 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1634 unsigned int target_align = DR_TARGET_ALIGNMENT (dr_info);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1635
111
kono
parents: 67
diff changeset
1636 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1637 {
111
kono
parents: 67
diff changeset
1638 int npeel = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1639
111
kono
parents: 67
diff changeset
1640 if (dump_enabled_p ())
kono
parents: 67
diff changeset
1641 dump_printf_loc (MSG_NOTE, vect_location,
kono
parents: 67
diff changeset
1642 "known peeling = %d.\n", npeel);
67
f6334be47118 update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
1643
f6334be47118 update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
1644 iters = build_int_cst (niters_type, npeel);
111
kono
parents: 67
diff changeset
1645 *bound = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1646 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1647 else
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1648 {
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1649 tree misalign_in_elems = get_misalign_in_elems (&stmts, loop_vinfo);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1650 tree type = TREE_TYPE (misalign_in_elems);
111
kono
parents: 67
diff changeset
1651 HOST_WIDE_INT elem_size
kono
parents: 67
diff changeset
1652 = int_cst_value (TYPE_SIZE_UNIT (TREE_TYPE (vectype)));
kono
parents: 67
diff changeset
1653 HOST_WIDE_INT align_in_elems = target_align / elem_size;
kono
parents: 67
diff changeset
1654 tree align_in_elems_minus_1 = build_int_cst (type, align_in_elems - 1);
kono
parents: 67
diff changeset
1655 tree align_in_elems_tree = build_int_cst (type, align_in_elems);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1656
111
kono
parents: 67
diff changeset
1657 /* Create: (niters_type) ((align_in_elems - misalign_in_elems)
kono
parents: 67
diff changeset
1658 & (align_in_elems - 1)). */
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1659 bool negative = tree_int_cst_compare (DR_STEP (dr_info->dr),
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1660 size_zero_node) < 0;
67
f6334be47118 update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
1661 if (negative)
111
kono
parents: 67
diff changeset
1662 iters = fold_build2 (MINUS_EXPR, type, misalign_in_elems,
kono
parents: 67
diff changeset
1663 align_in_elems_tree);
67
f6334be47118 update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
1664 else
111
kono
parents: 67
diff changeset
1665 iters = fold_build2 (MINUS_EXPR, type, align_in_elems_tree,
kono
parents: 67
diff changeset
1666 misalign_in_elems);
kono
parents: 67
diff changeset
1667 iters = fold_build2 (BIT_AND_EXPR, type, iters, align_in_elems_minus_1);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1668 iters = fold_convert (niters_type, iters);
111
kono
parents: 67
diff changeset
1669 *bound = align_in_elems - 1;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1670 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1671
111
kono
parents: 67
diff changeset
1672 if (dump_enabled_p ())
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1673 dump_printf_loc (MSG_NOTE, vect_location,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1674 "niters for prolog loop: %T\n", iters);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1675
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1676 var = create_tmp_var (niters_type, "prolog_loop_niters");
111
kono
parents: 67
diff changeset
1677 iters_name = force_gimple_operand (iters, &new_stmts, false, var);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1678
111
kono
parents: 67
diff changeset
1679 if (new_stmts)
kono
parents: 67
diff changeset
1680 gimple_seq_add_seq (&stmts, new_stmts);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1681 if (stmts)
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1682 {
111
kono
parents: 67
diff changeset
1683 gcc_assert (single_succ_p (bb));
kono
parents: 67
diff changeset
1684 gimple_stmt_iterator gsi = gsi_last_bb (bb);
kono
parents: 67
diff changeset
1685 if (gsi_end_p (gsi))
kono
parents: 67
diff changeset
1686 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
kono
parents: 67
diff changeset
1687 else
kono
parents: 67
diff changeset
1688 gsi_insert_seq_after (&gsi, stmts, GSI_SAME_STMT);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1689 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1690 return iters_name;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1691 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1692
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1693
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1694 /* Function vect_update_init_of_dr
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1695
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1696 If CODE is PLUS, the vector loop starts NITERS iterations after the
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1697 scalar one, otherwise CODE is MINUS and the vector loop starts NITERS
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1698 iterations before the scalar one (using masking to skip inactive
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1699 elements). This function updates the information recorded in DR to
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1700 account for the difference. Specifically, it updates the OFFSET
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1701 field of DR. */
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1702
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1703 static void
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1704 vect_update_init_of_dr (struct data_reference *dr, tree niters, tree_code code)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1705 {
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1706 tree offset = DR_OFFSET (dr);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1707
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1708 niters = fold_build2 (MULT_EXPR, sizetype,
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1709 fold_convert (sizetype, niters),
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1710 fold_convert (sizetype, DR_STEP (dr)));
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1711 offset = fold_build2 (code, sizetype,
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1712 fold_convert (sizetype, offset), niters);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1713 DR_OFFSET (dr) = offset;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1714 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1715
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1716
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1717 /* Function vect_update_inits_of_drs
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1718
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1719 Apply vect_update_inits_of_dr to all accesses in LOOP_VINFO.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1720 CODE and NITERS are as for vect_update_inits_of_dr. */
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1721
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1722 static void
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1723 vect_update_inits_of_drs (loop_vec_info loop_vinfo, tree niters,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1724 tree_code code)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1725 {
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1726 unsigned int i;
111
kono
parents: 67
diff changeset
1727 vec<data_reference_p> datarefs = LOOP_VINFO_DATAREFS (loop_vinfo);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1728 struct data_reference *dr;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1729
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1730 DUMP_VECT_SCOPE ("vect_update_inits_of_dr");
111
kono
parents: 67
diff changeset
1731
kono
parents: 67
diff changeset
1732 /* Adjust niters to sizetype and insert stmts on loop preheader edge. */
kono
parents: 67
diff changeset
1733 if (!types_compatible_p (sizetype, TREE_TYPE (niters)))
kono
parents: 67
diff changeset
1734 {
kono
parents: 67
diff changeset
1735 gimple_seq seq;
kono
parents: 67
diff changeset
1736 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
kono
parents: 67
diff changeset
1737 tree var = create_tmp_var (sizetype, "prolog_loop_adjusted_niters");
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1738
111
kono
parents: 67
diff changeset
1739 niters = fold_convert (sizetype, niters);
kono
parents: 67
diff changeset
1740 niters = force_gimple_operand (niters, &seq, false, var);
kono
parents: 67
diff changeset
1741 if (seq)
kono
parents: 67
diff changeset
1742 {
kono
parents: 67
diff changeset
1743 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
kono
parents: 67
diff changeset
1744 gcc_assert (!new_bb);
kono
parents: 67
diff changeset
1745 }
kono
parents: 67
diff changeset
1746 }
kono
parents: 67
diff changeset
1747
kono
parents: 67
diff changeset
1748 FOR_EACH_VEC_ELT (datarefs, i, dr)
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1749 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1750 dr_vec_info *dr_info = loop_vinfo->lookup_dr (dr);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1751 if (!STMT_VINFO_GATHER_SCATTER_P (dr_info->stmt))
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1752 vect_update_init_of_dr (dr, niters, code);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1753 }
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1754 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1755
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1756 /* For the information recorded in LOOP_VINFO prepare the loop for peeling
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1757 by masking. This involves calculating the number of iterations to
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1758 be peeled and then aligning all memory references appropriately. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1759
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1760 void
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1761 vect_prepare_for_masked_peels (loop_vec_info loop_vinfo)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1762 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1763 tree misalign_in_elems;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1764 tree type = LOOP_VINFO_MASK_COMPARE_TYPE (loop_vinfo);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1765
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1766 gcc_assert (vect_use_loop_mask_for_alignment_p (loop_vinfo));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1767
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1768 /* From the information recorded in LOOP_VINFO get the number of iterations
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1769 that need to be skipped via masking. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1770 if (LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) > 0)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1771 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1772 poly_int64 misalign = (LOOP_VINFO_VECT_FACTOR (loop_vinfo)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1773 - LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1774 misalign_in_elems = build_int_cst (type, misalign);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1775 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1776 else
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1777 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1778 gimple_seq seq1 = NULL, seq2 = NULL;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1779 misalign_in_elems = get_misalign_in_elems (&seq1, loop_vinfo);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1780 misalign_in_elems = fold_convert (type, misalign_in_elems);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1781 misalign_in_elems = force_gimple_operand (misalign_in_elems,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1782 &seq2, true, NULL_TREE);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1783 gimple_seq_add_seq (&seq1, seq2);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1784 if (seq1)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1785 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1786 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1787 basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq1);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1788 gcc_assert (!new_bb);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1789 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1790 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1791
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1792 if (dump_enabled_p ())
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1793 dump_printf_loc (MSG_NOTE, vect_location,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1794 "misalignment for fully-masked loop: %T\n",
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1795 misalign_in_elems);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1796
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1797 LOOP_VINFO_MASK_SKIP_NITERS (loop_vinfo) = misalign_in_elems;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1798
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1799 vect_update_inits_of_drs (loop_vinfo, misalign_in_elems, MINUS_EXPR);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1800 }
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1801
111
kono
parents: 67
diff changeset
1802 /* This function builds ni_name = number of iterations. Statements
kono
parents: 67
diff changeset
1803 are emitted on the loop preheader edge. If NEW_VAR_P is not NULL, set
kono
parents: 67
diff changeset
1804 it to TRUE if new ssa_var is generated. */
kono
parents: 67
diff changeset
1805
kono
parents: 67
diff changeset
1806 tree
kono
parents: 67
diff changeset
1807 vect_build_loop_niters (loop_vec_info loop_vinfo, bool *new_var_p)
kono
parents: 67
diff changeset
1808 {
kono
parents: 67
diff changeset
1809 tree ni = unshare_expr (LOOP_VINFO_NITERS (loop_vinfo));
kono
parents: 67
diff changeset
1810 if (TREE_CODE (ni) == INTEGER_CST)
kono
parents: 67
diff changeset
1811 return ni;
kono
parents: 67
diff changeset
1812 else
kono
parents: 67
diff changeset
1813 {
kono
parents: 67
diff changeset
1814 tree ni_name, var;
kono
parents: 67
diff changeset
1815 gimple_seq stmts = NULL;
kono
parents: 67
diff changeset
1816 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
kono
parents: 67
diff changeset
1817
kono
parents: 67
diff changeset
1818 var = create_tmp_var (TREE_TYPE (ni), "niters");
kono
parents: 67
diff changeset
1819 ni_name = force_gimple_operand (ni, &stmts, false, var);
kono
parents: 67
diff changeset
1820 if (stmts)
kono
parents: 67
diff changeset
1821 {
kono
parents: 67
diff changeset
1822 gsi_insert_seq_on_edge_immediate (pe, stmts);
kono
parents: 67
diff changeset
1823 if (new_var_p != NULL)
kono
parents: 67
diff changeset
1824 *new_var_p = true;
kono
parents: 67
diff changeset
1825 }
kono
parents: 67
diff changeset
1826
kono
parents: 67
diff changeset
1827 return ni_name;
kono
parents: 67
diff changeset
1828 }
kono
parents: 67
diff changeset
1829 }
kono
parents: 67
diff changeset
1830
kono
parents: 67
diff changeset
1831 /* Calculate the number of iterations above which vectorized loop will be
kono
parents: 67
diff changeset
1832 preferred than scalar loop. NITERS_PROLOG is the number of iterations
kono
parents: 67
diff changeset
1833 of prolog loop. If it's integer const, the integer number is also passed
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1834 in INT_NITERS_PROLOG. BOUND_PROLOG is the upper bound (inclusive) of the
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1835 number of iterations of the prolog loop. BOUND_EPILOG is the corresponding
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1836 value for the epilog loop. If CHECK_PROFITABILITY is true, TH is the
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1837 threshold below which the scalar (rather than vectorized) loop will be
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1838 executed. This function stores the upper bound (inclusive) of the result
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1839 in BOUND_SCALAR. */
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1840
111
kono
parents: 67
diff changeset
1841 static tree
kono
parents: 67
diff changeset
1842 vect_gen_scalar_loop_niters (tree niters_prolog, int int_niters_prolog,
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1843 int bound_prolog, poly_int64 bound_epilog, int th,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1844 poly_uint64 *bound_scalar,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1845 bool check_profitability)
111
kono
parents: 67
diff changeset
1846 {
kono
parents: 67
diff changeset
1847 tree type = TREE_TYPE (niters_prolog);
kono
parents: 67
diff changeset
1848 tree niters = fold_build2 (PLUS_EXPR, type, niters_prolog,
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1849 build_int_cst (type, bound_epilog));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1850
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1851 *bound_scalar = bound_prolog + bound_epilog;
111
kono
parents: 67
diff changeset
1852 if (check_profitability)
kono
parents: 67
diff changeset
1853 {
kono
parents: 67
diff changeset
1854 /* TH indicates the minimum niters of vectorized loop, while we
kono
parents: 67
diff changeset
1855 compute the maximum niters of scalar loop. */
kono
parents: 67
diff changeset
1856 th--;
kono
parents: 67
diff changeset
1857 /* Peeling for constant times. */
kono
parents: 67
diff changeset
1858 if (int_niters_prolog >= 0)
kono
parents: 67
diff changeset
1859 {
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1860 *bound_scalar = upper_bound (int_niters_prolog + bound_epilog, th);
111
kono
parents: 67
diff changeset
1861 return build_int_cst (type, *bound_scalar);
kono
parents: 67
diff changeset
1862 }
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1863 /* Peeling an unknown number of times. Note that both BOUND_PROLOG
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1864 and BOUND_EPILOG are inclusive upper bounds. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1865 if (known_ge (th, bound_prolog + bound_epilog))
111
kono
parents: 67
diff changeset
1866 {
kono
parents: 67
diff changeset
1867 *bound_scalar = th;
kono
parents: 67
diff changeset
1868 return build_int_cst (type, th);
kono
parents: 67
diff changeset
1869 }
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1870 /* Need to do runtime comparison. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1871 else if (maybe_gt (th, bound_epilog))
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1872 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1873 *bound_scalar = upper_bound (*bound_scalar, th);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1874 return fold_build2 (MAX_EXPR, type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1875 build_int_cst (type, th), niters);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1876 }
111
kono
parents: 67
diff changeset
1877 }
kono
parents: 67
diff changeset
1878 return niters;
kono
parents: 67
diff changeset
1879 }
kono
parents: 67
diff changeset
1880
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1881 /* NITERS is the number of times that the original scalar loop executes
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1882 after peeling. Work out the maximum number of iterations N that can
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1883 be handled by the vectorized form of the loop and then either:
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1884
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1885 a) set *STEP_VECTOR_PTR to the vectorization factor and generate:
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1886
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1887 niters_vector = N
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1888
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1889 b) set *STEP_VECTOR_PTR to one and generate:
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1890
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1891 niters_vector = N / vf
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1892
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1893 In both cases, store niters_vector in *NITERS_VECTOR_PTR and add
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1894 any new statements on the loop preheader edge. NITERS_NO_OVERFLOW
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1895 is true if NITERS doesn't overflow (i.e. if NITERS is always nonzero). */
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1896
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1897 void
111
kono
parents: 67
diff changeset
1898 vect_gen_vector_loop_niters (loop_vec_info loop_vinfo, tree niters,
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1899 tree *niters_vector_ptr, tree *step_vector_ptr,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1900 bool niters_no_overflow)
111
kono
parents: 67
diff changeset
1901 {
kono
parents: 67
diff changeset
1902 tree ni_minus_gap, var;
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1903 tree niters_vector, step_vector, type = TREE_TYPE (niters);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1904 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
111
kono
parents: 67
diff changeset
1905 edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1906 tree log_vf = NULL_TREE;
111
kono
parents: 67
diff changeset
1907
kono
parents: 67
diff changeset
1908 /* If epilogue loop is required because of data accesses with gaps, we
kono
parents: 67
diff changeset
1909 subtract one iteration from the total number of iterations here for
kono
parents: 67
diff changeset
1910 correct calculation of RATIO. */
kono
parents: 67
diff changeset
1911 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
kono
parents: 67
diff changeset
1912 {
kono
parents: 67
diff changeset
1913 ni_minus_gap = fold_build2 (MINUS_EXPR, type, niters,
kono
parents: 67
diff changeset
1914 build_one_cst (type));
kono
parents: 67
diff changeset
1915 if (!is_gimple_val (ni_minus_gap))
kono
parents: 67
diff changeset
1916 {
kono
parents: 67
diff changeset
1917 var = create_tmp_var (type, "ni_gap");
kono
parents: 67
diff changeset
1918 gimple *stmts = NULL;
kono
parents: 67
diff changeset
1919 ni_minus_gap = force_gimple_operand (ni_minus_gap, &stmts,
kono
parents: 67
diff changeset
1920 true, var);
kono
parents: 67
diff changeset
1921 gsi_insert_seq_on_edge_immediate (pe, stmts);
kono
parents: 67
diff changeset
1922 }
kono
parents: 67
diff changeset
1923 }
kono
parents: 67
diff changeset
1924 else
kono
parents: 67
diff changeset
1925 ni_minus_gap = niters;
kono
parents: 67
diff changeset
1926
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1927 unsigned HOST_WIDE_INT const_vf;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1928 if (vf.is_constant (&const_vf)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1929 && !LOOP_VINFO_FULLY_MASKED_P (loop_vinfo))
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1930 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1931 /* Create: niters >> log2(vf) */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1932 /* If it's known that niters == number of latch executions + 1 doesn't
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1933 overflow, we can generate niters >> log2(vf); otherwise we generate
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1934 (niters - vf) >> log2(vf) + 1 by using the fact that we know ratio
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1935 will be at least one. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1936 log_vf = build_int_cst (type, exact_log2 (const_vf));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1937 if (niters_no_overflow)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1938 niters_vector = fold_build2 (RSHIFT_EXPR, type, ni_minus_gap, log_vf);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1939 else
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1940 niters_vector
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1941 = fold_build2 (PLUS_EXPR, type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1942 fold_build2 (RSHIFT_EXPR, type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1943 fold_build2 (MINUS_EXPR, type,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1944 ni_minus_gap,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1945 build_int_cst (type, vf)),
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1946 log_vf),
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1947 build_int_cst (type, 1));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1948 step_vector = build_one_cst (type);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1949 }
111
kono
parents: 67
diff changeset
1950 else
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1951 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1952 niters_vector = ni_minus_gap;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1953 step_vector = build_int_cst (type, vf);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1954 }
111
kono
parents: 67
diff changeset
1955
kono
parents: 67
diff changeset
1956 if (!is_gimple_val (niters_vector))
kono
parents: 67
diff changeset
1957 {
kono
parents: 67
diff changeset
1958 var = create_tmp_var (type, "bnd");
kono
parents: 67
diff changeset
1959 gimple_seq stmts = NULL;
kono
parents: 67
diff changeset
1960 niters_vector = force_gimple_operand (niters_vector, &stmts, true, var);
kono
parents: 67
diff changeset
1961 gsi_insert_seq_on_edge_immediate (pe, stmts);
kono
parents: 67
diff changeset
1962 /* Peeling algorithm guarantees that vector loop bound is at least ONE,
kono
parents: 67
diff changeset
1963 we set range information to make niters analyzer's life easier. */
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1964 if (stmts != NULL && log_vf)
111
kono
parents: 67
diff changeset
1965 set_range_info (niters_vector, VR_RANGE,
kono
parents: 67
diff changeset
1966 wi::to_wide (build_int_cst (type, 1)),
kono
parents: 67
diff changeset
1967 wi::to_wide (fold_build2 (RSHIFT_EXPR, type,
kono
parents: 67
diff changeset
1968 TYPE_MAX_VALUE (type),
kono
parents: 67
diff changeset
1969 log_vf)));
kono
parents: 67
diff changeset
1970 }
kono
parents: 67
diff changeset
1971 *niters_vector_ptr = niters_vector;
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1972 *step_vector_ptr = step_vector;
111
kono
parents: 67
diff changeset
1973
kono
parents: 67
diff changeset
1974 return;
kono
parents: 67
diff changeset
1975 }
kono
parents: 67
diff changeset
1976
kono
parents: 67
diff changeset
1977 /* Given NITERS_VECTOR which is the number of iterations for vectorized
kono
parents: 67
diff changeset
1978 loop specified by LOOP_VINFO after vectorization, compute the number
kono
parents: 67
diff changeset
1979 of iterations before vectorization (niters_vector * vf) and store it
kono
parents: 67
diff changeset
1980 to NITERS_VECTOR_MULT_VF_PTR. */
kono
parents: 67
diff changeset
1981
kono
parents: 67
diff changeset
1982 static void
kono
parents: 67
diff changeset
1983 vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
kono
parents: 67
diff changeset
1984 tree niters_vector,
kono
parents: 67
diff changeset
1985 tree *niters_vector_mult_vf_ptr)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
1986 {
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1987 /* We should be using a step_vector of VF if VF is variable. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
1988 int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ();
111
kono
parents: 67
diff changeset
1989 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
kono
parents: 67
diff changeset
1990 tree type = TREE_TYPE (niters_vector);
kono
parents: 67
diff changeset
1991 tree log_vf = build_int_cst (type, exact_log2 (vf));
kono
parents: 67
diff changeset
1992 basic_block exit_bb = single_exit (loop)->dest;
kono
parents: 67
diff changeset
1993
kono
parents: 67
diff changeset
1994 gcc_assert (niters_vector_mult_vf_ptr != NULL);
kono
parents: 67
diff changeset
1995 tree niters_vector_mult_vf = fold_build2 (LSHIFT_EXPR, type,
kono
parents: 67
diff changeset
1996 niters_vector, log_vf);
kono
parents: 67
diff changeset
1997 if (!is_gimple_val (niters_vector_mult_vf))
kono
parents: 67
diff changeset
1998 {
kono
parents: 67
diff changeset
1999 tree var = create_tmp_var (type, "niters_vector_mult_vf");
kono
parents: 67
diff changeset
2000 gimple_seq stmts = NULL;
kono
parents: 67
diff changeset
2001 niters_vector_mult_vf = force_gimple_operand (niters_vector_mult_vf,
kono
parents: 67
diff changeset
2002 &stmts, true, var);
kono
parents: 67
diff changeset
2003 gimple_stmt_iterator gsi = gsi_start_bb (exit_bb);
kono
parents: 67
diff changeset
2004 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
kono
parents: 67
diff changeset
2005 }
kono
parents: 67
diff changeset
2006 *niters_vector_mult_vf_ptr = niters_vector_mult_vf;
kono
parents: 67
diff changeset
2007 }
kono
parents: 67
diff changeset
2008
kono
parents: 67
diff changeset
2009 /* Function slpeel_tree_duplicate_loop_to_edge_cfg duplciates FIRST/SECOND
kono
parents: 67
diff changeset
2010 from SECOND/FIRST and puts it at the original loop's preheader/exit
kono
parents: 67
diff changeset
2011 edge, the two loops are arranged as below:
kono
parents: 67
diff changeset
2012
kono
parents: 67
diff changeset
2013 preheader_a:
kono
parents: 67
diff changeset
2014 first_loop:
kono
parents: 67
diff changeset
2015 header_a:
kono
parents: 67
diff changeset
2016 i_1 = PHI<i_0, i_2>;
kono
parents: 67
diff changeset
2017 ...
kono
parents: 67
diff changeset
2018 i_2 = i_1 + 1;
kono
parents: 67
diff changeset
2019 if (cond_a)
kono
parents: 67
diff changeset
2020 goto latch_a;
kono
parents: 67
diff changeset
2021 else
kono
parents: 67
diff changeset
2022 goto between_bb;
kono
parents: 67
diff changeset
2023 latch_a:
kono
parents: 67
diff changeset
2024 goto header_a;
kono
parents: 67
diff changeset
2025
kono
parents: 67
diff changeset
2026 between_bb:
kono
parents: 67
diff changeset
2027 ;; i_x = PHI<i_2>; ;; LCSSA phi node to be created for FIRST,
kono
parents: 67
diff changeset
2028
kono
parents: 67
diff changeset
2029 second_loop:
kono
parents: 67
diff changeset
2030 header_b:
kono
parents: 67
diff changeset
2031 i_3 = PHI<i_0, i_4>; ;; Use of i_0 to be replaced with i_x,
kono
parents: 67
diff changeset
2032 or with i_2 if no LCSSA phi is created
kono
parents: 67
diff changeset
2033 under condition of CREATE_LCSSA_FOR_IV_PHIS.
kono
parents: 67
diff changeset
2034 ...
kono
parents: 67
diff changeset
2035 i_4 = i_3 + 1;
kono
parents: 67
diff changeset
2036 if (cond_b)
kono
parents: 67
diff changeset
2037 goto latch_b;
kono
parents: 67
diff changeset
2038 else
kono
parents: 67
diff changeset
2039 goto exit_bb;
kono
parents: 67
diff changeset
2040 latch_b:
kono
parents: 67
diff changeset
2041 goto header_b;
kono
parents: 67
diff changeset
2042
kono
parents: 67
diff changeset
2043 exit_bb:
kono
parents: 67
diff changeset
2044
kono
parents: 67
diff changeset
2045 This function creates loop closed SSA for the first loop; update the
kono
parents: 67
diff changeset
2046 second loop's PHI nodes by replacing argument on incoming edge with the
kono
parents: 67
diff changeset
2047 result of newly created lcssa PHI nodes. IF CREATE_LCSSA_FOR_IV_PHIS
kono
parents: 67
diff changeset
2048 is false, Loop closed ssa phis will only be created for non-iv phis for
kono
parents: 67
diff changeset
2049 the first loop.
kono
parents: 67
diff changeset
2050
kono
parents: 67
diff changeset
2051 This function assumes exit bb of the first loop is preheader bb of the
kono
parents: 67
diff changeset
2052 second loop, i.e, between_bb in the example code. With PHIs updated,
kono
parents: 67
diff changeset
2053 the second loop will execute rest iterations of the first. */
kono
parents: 67
diff changeset
2054
kono
parents: 67
diff changeset
2055 static void
kono
parents: 67
diff changeset
2056 slpeel_update_phi_nodes_for_loops (loop_vec_info loop_vinfo,
kono
parents: 67
diff changeset
2057 struct loop *first, struct loop *second,
kono
parents: 67
diff changeset
2058 bool create_lcssa_for_iv_phis)
kono
parents: 67
diff changeset
2059 {
kono
parents: 67
diff changeset
2060 gphi_iterator gsi_update, gsi_orig;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2061 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
111
kono
parents: 67
diff changeset
2062
kono
parents: 67
diff changeset
2063 edge first_latch_e = EDGE_SUCC (first->latch, 0);
kono
parents: 67
diff changeset
2064 edge second_preheader_e = loop_preheader_edge (second);
kono
parents: 67
diff changeset
2065 basic_block between_bb = single_exit (first)->dest;
kono
parents: 67
diff changeset
2066
kono
parents: 67
diff changeset
2067 gcc_assert (between_bb == second_preheader_e->src);
kono
parents: 67
diff changeset
2068 gcc_assert (single_pred_p (between_bb) && single_succ_p (between_bb));
kono
parents: 67
diff changeset
2069 /* Either the first loop or the second is the loop to be vectorized. */
kono
parents: 67
diff changeset
2070 gcc_assert (loop == first || loop == second);
kono
parents: 67
diff changeset
2071
kono
parents: 67
diff changeset
2072 for (gsi_orig = gsi_start_phis (first->header),
kono
parents: 67
diff changeset
2073 gsi_update = gsi_start_phis (second->header);
kono
parents: 67
diff changeset
2074 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
kono
parents: 67
diff changeset
2075 gsi_next (&gsi_orig), gsi_next (&gsi_update))
kono
parents: 67
diff changeset
2076 {
kono
parents: 67
diff changeset
2077 gphi *orig_phi = gsi_orig.phi ();
kono
parents: 67
diff changeset
2078 gphi *update_phi = gsi_update.phi ();
kono
parents: 67
diff changeset
2079
kono
parents: 67
diff changeset
2080 tree arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, first_latch_e);
kono
parents: 67
diff changeset
2081 /* Generate lcssa PHI node for the first loop. */
kono
parents: 67
diff changeset
2082 gphi *vect_phi = (loop == first) ? orig_phi : update_phi;
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2083 stmt_vec_info vect_phi_info = loop_vinfo->lookup_stmt (vect_phi);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2084 if (create_lcssa_for_iv_phis || !iv_phi_p (vect_phi_info))
111
kono
parents: 67
diff changeset
2085 {
kono
parents: 67
diff changeset
2086 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
kono
parents: 67
diff changeset
2087 gphi *lcssa_phi = create_phi_node (new_res, between_bb);
kono
parents: 67
diff changeset
2088 add_phi_arg (lcssa_phi, arg, single_exit (first), UNKNOWN_LOCATION);
kono
parents: 67
diff changeset
2089 arg = new_res;
kono
parents: 67
diff changeset
2090 }
kono
parents: 67
diff changeset
2091
kono
parents: 67
diff changeset
2092 /* Update PHI node in the second loop by replacing arg on the loop's
kono
parents: 67
diff changeset
2093 incoming edge. */
kono
parents: 67
diff changeset
2094 adjust_phi_and_debug_stmts (update_phi, second_preheader_e, arg);
kono
parents: 67
diff changeset
2095 }
kono
parents: 67
diff changeset
2096 }
kono
parents: 67
diff changeset
2097
kono
parents: 67
diff changeset
2098 /* Function slpeel_add_loop_guard adds guard skipping from the beginning
kono
parents: 67
diff changeset
2099 of SKIP_LOOP to the beginning of UPDATE_LOOP. GUARD_EDGE and MERGE_EDGE
kono
parents: 67
diff changeset
2100 are two pred edges of the merge point before UPDATE_LOOP. The two loops
kono
parents: 67
diff changeset
2101 appear like below:
kono
parents: 67
diff changeset
2102
kono
parents: 67
diff changeset
2103 guard_bb:
kono
parents: 67
diff changeset
2104 if (cond)
kono
parents: 67
diff changeset
2105 goto merge_bb;
kono
parents: 67
diff changeset
2106 else
kono
parents: 67
diff changeset
2107 goto skip_loop;
kono
parents: 67
diff changeset
2108
kono
parents: 67
diff changeset
2109 skip_loop:
kono
parents: 67
diff changeset
2110 header_a:
kono
parents: 67
diff changeset
2111 i_1 = PHI<i_0, i_2>;
kono
parents: 67
diff changeset
2112 ...
kono
parents: 67
diff changeset
2113 i_2 = i_1 + 1;
kono
parents: 67
diff changeset
2114 if (cond_a)
kono
parents: 67
diff changeset
2115 goto latch_a;
kono
parents: 67
diff changeset
2116 else
kono
parents: 67
diff changeset
2117 goto exit_a;
kono
parents: 67
diff changeset
2118 latch_a:
kono
parents: 67
diff changeset
2119 goto header_a;
kono
parents: 67
diff changeset
2120
kono
parents: 67
diff changeset
2121 exit_a:
kono
parents: 67
diff changeset
2122 i_5 = PHI<i_2>;
kono
parents: 67
diff changeset
2123
kono
parents: 67
diff changeset
2124 merge_bb:
kono
parents: 67
diff changeset
2125 ;; PHI (i_x = PHI<i_0, i_5>) to be created at merge point.
kono
parents: 67
diff changeset
2126
kono
parents: 67
diff changeset
2127 update_loop:
kono
parents: 67
diff changeset
2128 header_b:
kono
parents: 67
diff changeset
2129 i_3 = PHI<i_5, i_4>; ;; Use of i_5 to be replaced with i_x.
kono
parents: 67
diff changeset
2130 ...
kono
parents: 67
diff changeset
2131 i_4 = i_3 + 1;
kono
parents: 67
diff changeset
2132 if (cond_b)
kono
parents: 67
diff changeset
2133 goto latch_b;
kono
parents: 67
diff changeset
2134 else
kono
parents: 67
diff changeset
2135 goto exit_bb;
kono
parents: 67
diff changeset
2136 latch_b:
kono
parents: 67
diff changeset
2137 goto header_b;
kono
parents: 67
diff changeset
2138
kono
parents: 67
diff changeset
2139 exit_bb:
kono
parents: 67
diff changeset
2140
kono
parents: 67
diff changeset
2141 This function creates PHI nodes at merge_bb and replaces the use of i_5
kono
parents: 67
diff changeset
2142 in the update_loop's PHI node with the result of new PHI result. */
kono
parents: 67
diff changeset
2143
kono
parents: 67
diff changeset
2144 static void
kono
parents: 67
diff changeset
2145 slpeel_update_phi_nodes_for_guard1 (struct loop *skip_loop,
kono
parents: 67
diff changeset
2146 struct loop *update_loop,
kono
parents: 67
diff changeset
2147 edge guard_edge, edge merge_edge)
kono
parents: 67
diff changeset
2148 {
kono
parents: 67
diff changeset
2149 source_location merge_loc, guard_loc;
kono
parents: 67
diff changeset
2150 edge orig_e = loop_preheader_edge (skip_loop);
kono
parents: 67
diff changeset
2151 edge update_e = loop_preheader_edge (update_loop);
kono
parents: 67
diff changeset
2152 gphi_iterator gsi_orig, gsi_update;
kono
parents: 67
diff changeset
2153
kono
parents: 67
diff changeset
2154 for ((gsi_orig = gsi_start_phis (skip_loop->header),
kono
parents: 67
diff changeset
2155 gsi_update = gsi_start_phis (update_loop->header));
kono
parents: 67
diff changeset
2156 !gsi_end_p (gsi_orig) && !gsi_end_p (gsi_update);
kono
parents: 67
diff changeset
2157 gsi_next (&gsi_orig), gsi_next (&gsi_update))
kono
parents: 67
diff changeset
2158 {
kono
parents: 67
diff changeset
2159 gphi *orig_phi = gsi_orig.phi ();
kono
parents: 67
diff changeset
2160 gphi *update_phi = gsi_update.phi ();
kono
parents: 67
diff changeset
2161
kono
parents: 67
diff changeset
2162 /* Generate new phi node at merge bb of the guard. */
kono
parents: 67
diff changeset
2163 tree new_res = copy_ssa_name (PHI_RESULT (orig_phi));
kono
parents: 67
diff changeset
2164 gphi *new_phi = create_phi_node (new_res, guard_edge->dest);
kono
parents: 67
diff changeset
2165
kono
parents: 67
diff changeset
2166 /* Merge bb has two incoming edges: GUARD_EDGE and MERGE_EDGE. Set the
kono
parents: 67
diff changeset
2167 args in NEW_PHI for these edges. */
kono
parents: 67
diff changeset
2168 tree merge_arg = PHI_ARG_DEF_FROM_EDGE (update_phi, update_e);
kono
parents: 67
diff changeset
2169 tree guard_arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, orig_e);
kono
parents: 67
diff changeset
2170 merge_loc = gimple_phi_arg_location_from_edge (update_phi, update_e);
kono
parents: 67
diff changeset
2171 guard_loc = gimple_phi_arg_location_from_edge (orig_phi, orig_e);
kono
parents: 67
diff changeset
2172 add_phi_arg (new_phi, merge_arg, merge_edge, merge_loc);
kono
parents: 67
diff changeset
2173 add_phi_arg (new_phi, guard_arg, guard_edge, guard_loc);
kono
parents: 67
diff changeset
2174
kono
parents: 67
diff changeset
2175 /* Update phi in UPDATE_PHI. */
kono
parents: 67
diff changeset
2176 adjust_phi_and_debug_stmts (update_phi, update_e, new_res);
kono
parents: 67
diff changeset
2177 }
kono
parents: 67
diff changeset
2178 }
kono
parents: 67
diff changeset
2179
kono
parents: 67
diff changeset
2180 /* LCSSA_PHI is a lcssa phi of EPILOG loop which is copied from LOOP,
kono
parents: 67
diff changeset
2181 this function searches for the corresponding lcssa phi node in exit
kono
parents: 67
diff changeset
2182 bb of LOOP. If it is found, return the phi result; otherwise return
kono
parents: 67
diff changeset
2183 NULL. */
kono
parents: 67
diff changeset
2184
kono
parents: 67
diff changeset
2185 static tree
kono
parents: 67
diff changeset
2186 find_guard_arg (struct loop *loop, struct loop *epilog ATTRIBUTE_UNUSED,
kono
parents: 67
diff changeset
2187 gphi *lcssa_phi)
kono
parents: 67
diff changeset
2188 {
kono
parents: 67
diff changeset
2189 gphi_iterator gsi;
kono
parents: 67
diff changeset
2190 edge e = single_exit (loop);
kono
parents: 67
diff changeset
2191
kono
parents: 67
diff changeset
2192 gcc_assert (single_pred_p (e->dest));
kono
parents: 67
diff changeset
2193 for (gsi = gsi_start_phis (e->dest); !gsi_end_p (gsi); gsi_next (&gsi))
kono
parents: 67
diff changeset
2194 {
kono
parents: 67
diff changeset
2195 gphi *phi = gsi.phi ();
kono
parents: 67
diff changeset
2196 if (operand_equal_p (PHI_ARG_DEF (phi, 0),
kono
parents: 67
diff changeset
2197 PHI_ARG_DEF (lcssa_phi, 0), 0))
kono
parents: 67
diff changeset
2198 return PHI_RESULT (phi);
kono
parents: 67
diff changeset
2199 }
kono
parents: 67
diff changeset
2200 return NULL_TREE;
kono
parents: 67
diff changeset
2201 }
kono
parents: 67
diff changeset
2202
kono
parents: 67
diff changeset
2203 /* LOOP and EPILOG are two consecutive loops in CFG and EPILOG is copied
kono
parents: 67
diff changeset
2204 from LOOP. Function slpeel_add_loop_guard adds guard skipping from a
kono
parents: 67
diff changeset
2205 point between the two loops to the end of EPILOG. Edges GUARD_EDGE
kono
parents: 67
diff changeset
2206 and MERGE_EDGE are the two pred edges of merge_bb at the end of EPILOG.
kono
parents: 67
diff changeset
2207 The CFG looks like:
kono
parents: 67
diff changeset
2208
kono
parents: 67
diff changeset
2209 loop:
kono
parents: 67
diff changeset
2210 header_a:
kono
parents: 67
diff changeset
2211 i_1 = PHI<i_0, i_2>;
kono
parents: 67
diff changeset
2212 ...
kono
parents: 67
diff changeset
2213 i_2 = i_1 + 1;
kono
parents: 67
diff changeset
2214 if (cond_a)
kono
parents: 67
diff changeset
2215 goto latch_a;
kono
parents: 67
diff changeset
2216 else
kono
parents: 67
diff changeset
2217 goto exit_a;
kono
parents: 67
diff changeset
2218 latch_a:
kono
parents: 67
diff changeset
2219 goto header_a;
kono
parents: 67
diff changeset
2220
kono
parents: 67
diff changeset
2221 exit_a:
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2222
111
kono
parents: 67
diff changeset
2223 guard_bb:
kono
parents: 67
diff changeset
2224 if (cond)
kono
parents: 67
diff changeset
2225 goto merge_bb;
kono
parents: 67
diff changeset
2226 else
kono
parents: 67
diff changeset
2227 goto epilog_loop;
kono
parents: 67
diff changeset
2228
kono
parents: 67
diff changeset
2229 ;; fall_through_bb
kono
parents: 67
diff changeset
2230
kono
parents: 67
diff changeset
2231 epilog_loop:
kono
parents: 67
diff changeset
2232 header_b:
kono
parents: 67
diff changeset
2233 i_3 = PHI<i_2, i_4>;
kono
parents: 67
diff changeset
2234 ...
kono
parents: 67
diff changeset
2235 i_4 = i_3 + 1;
kono
parents: 67
diff changeset
2236 if (cond_b)
kono
parents: 67
diff changeset
2237 goto latch_b;
kono
parents: 67
diff changeset
2238 else
kono
parents: 67
diff changeset
2239 goto merge_bb;
kono
parents: 67
diff changeset
2240 latch_b:
kono
parents: 67
diff changeset
2241 goto header_b;
kono
parents: 67
diff changeset
2242
kono
parents: 67
diff changeset
2243 merge_bb:
kono
parents: 67
diff changeset
2244 ; PHI node (i_y = PHI<i_2, i_4>) to be created at merge point.
kono
parents: 67
diff changeset
2245
kono
parents: 67
diff changeset
2246 exit_bb:
kono
parents: 67
diff changeset
2247 i_x = PHI<i_4>; ;Use of i_4 to be replaced with i_y in merge_bb.
kono
parents: 67
diff changeset
2248
kono
parents: 67
diff changeset
2249 For each name used out side EPILOG (i.e - for each name that has a lcssa
kono
parents: 67
diff changeset
2250 phi in exit_bb) we create a new PHI in merge_bb. The new PHI has two
kono
parents: 67
diff changeset
2251 args corresponding to GUARD_EDGE and MERGE_EDGE. Arg for MERGE_EDGE is
kono
parents: 67
diff changeset
2252 the arg of the original PHI in exit_bb, arg for GUARD_EDGE is defined
kono
parents: 67
diff changeset
2253 by LOOP and is found in the exit bb of LOOP. Arg of the original PHI
kono
parents: 67
diff changeset
2254 in exit_bb will also be updated. */
kono
parents: 67
diff changeset
2255
kono
parents: 67
diff changeset
2256 static void
kono
parents: 67
diff changeset
2257 slpeel_update_phi_nodes_for_guard2 (struct loop *loop, struct loop *epilog,
kono
parents: 67
diff changeset
2258 edge guard_edge, edge merge_edge)
kono
parents: 67
diff changeset
2259 {
kono
parents: 67
diff changeset
2260 gphi_iterator gsi;
kono
parents: 67
diff changeset
2261 basic_block merge_bb = guard_edge->dest;
kono
parents: 67
diff changeset
2262
kono
parents: 67
diff changeset
2263 gcc_assert (single_succ_p (merge_bb));
kono
parents: 67
diff changeset
2264 edge e = single_succ_edge (merge_bb);
kono
parents: 67
diff changeset
2265 basic_block exit_bb = e->dest;
kono
parents: 67
diff changeset
2266 gcc_assert (single_pred_p (exit_bb));
kono
parents: 67
diff changeset
2267 gcc_assert (single_pred (exit_bb) == single_exit (epilog)->dest);
kono
parents: 67
diff changeset
2268
kono
parents: 67
diff changeset
2269 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
kono
parents: 67
diff changeset
2270 {
kono
parents: 67
diff changeset
2271 gphi *update_phi = gsi.phi ();
kono
parents: 67
diff changeset
2272 tree old_arg = PHI_ARG_DEF (update_phi, 0);
kono
parents: 67
diff changeset
2273 /* This loop-closed-phi actually doesn't represent a use out of the
kono
parents: 67
diff changeset
2274 loop - the phi arg is a constant. */
kono
parents: 67
diff changeset
2275 if (TREE_CODE (old_arg) != SSA_NAME)
kono
parents: 67
diff changeset
2276 continue;
kono
parents: 67
diff changeset
2277
kono
parents: 67
diff changeset
2278 tree merge_arg = get_current_def (old_arg);
kono
parents: 67
diff changeset
2279 if (!merge_arg)
kono
parents: 67
diff changeset
2280 merge_arg = old_arg;
kono
parents: 67
diff changeset
2281
kono
parents: 67
diff changeset
2282 tree guard_arg = find_guard_arg (loop, epilog, update_phi);
kono
parents: 67
diff changeset
2283 /* If the var is live after loop but not a reduction, we simply
kono
parents: 67
diff changeset
2284 use the old arg. */
kono
parents: 67
diff changeset
2285 if (!guard_arg)
kono
parents: 67
diff changeset
2286 guard_arg = old_arg;
kono
parents: 67
diff changeset
2287
kono
parents: 67
diff changeset
2288 /* Create new phi node in MERGE_BB: */
kono
parents: 67
diff changeset
2289 tree new_res = copy_ssa_name (PHI_RESULT (update_phi));
kono
parents: 67
diff changeset
2290 gphi *merge_phi = create_phi_node (new_res, merge_bb);
kono
parents: 67
diff changeset
2291
kono
parents: 67
diff changeset
2292 /* MERGE_BB has two incoming edges: GUARD_EDGE and MERGE_EDGE, Set
kono
parents: 67
diff changeset
2293 the two PHI args in merge_phi for these edges. */
kono
parents: 67
diff changeset
2294 add_phi_arg (merge_phi, merge_arg, merge_edge, UNKNOWN_LOCATION);
kono
parents: 67
diff changeset
2295 add_phi_arg (merge_phi, guard_arg, guard_edge, UNKNOWN_LOCATION);
kono
parents: 67
diff changeset
2296
kono
parents: 67
diff changeset
2297 /* Update the original phi in exit_bb. */
kono
parents: 67
diff changeset
2298 adjust_phi_and_debug_stmts (update_phi, e, new_res);
kono
parents: 67
diff changeset
2299 }
kono
parents: 67
diff changeset
2300 }
kono
parents: 67
diff changeset
2301
kono
parents: 67
diff changeset
2302 /* EPILOG loop is duplicated from the original loop for vectorizing,
kono
parents: 67
diff changeset
2303 the arg of its loop closed ssa PHI needs to be updated. */
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2304
111
kono
parents: 67
diff changeset
2305 static void
kono
parents: 67
diff changeset
2306 slpeel_update_phi_nodes_for_lcssa (struct loop *epilog)
kono
parents: 67
diff changeset
2307 {
kono
parents: 67
diff changeset
2308 gphi_iterator gsi;
kono
parents: 67
diff changeset
2309 basic_block exit_bb = single_exit (epilog)->dest;
kono
parents: 67
diff changeset
2310
kono
parents: 67
diff changeset
2311 gcc_assert (single_pred_p (exit_bb));
kono
parents: 67
diff changeset
2312 edge e = EDGE_PRED (exit_bb, 0);
kono
parents: 67
diff changeset
2313 for (gsi = gsi_start_phis (exit_bb); !gsi_end_p (gsi); gsi_next (&gsi))
kono
parents: 67
diff changeset
2314 rename_use_op (PHI_ARG_DEF_PTR_FROM_EDGE (gsi.phi (), e));
kono
parents: 67
diff changeset
2315 }
kono
parents: 67
diff changeset
2316
kono
parents: 67
diff changeset
2317 /* Function vect_do_peeling.
kono
parents: 67
diff changeset
2318
kono
parents: 67
diff changeset
2319 Input:
kono
parents: 67
diff changeset
2320 - LOOP_VINFO: Represent a loop to be vectorized, which looks like:
kono
parents: 67
diff changeset
2321
kono
parents: 67
diff changeset
2322 preheader:
kono
parents: 67
diff changeset
2323 LOOP:
kono
parents: 67
diff changeset
2324 header_bb:
kono
parents: 67
diff changeset
2325 loop_body
kono
parents: 67
diff changeset
2326 if (exit_loop_cond) goto exit_bb
kono
parents: 67
diff changeset
2327 else goto header_bb
kono
parents: 67
diff changeset
2328 exit_bb:
kono
parents: 67
diff changeset
2329
kono
parents: 67
diff changeset
2330 - NITERS: The number of iterations of the loop.
kono
parents: 67
diff changeset
2331 - NITERSM1: The number of iterations of the loop's latch.
kono
parents: 67
diff changeset
2332 - NITERS_NO_OVERFLOW: No overflow in computing NITERS.
kono
parents: 67
diff changeset
2333 - TH, CHECK_PROFITABILITY: Threshold of niters to vectorize loop if
kono
parents: 67
diff changeset
2334 CHECK_PROFITABILITY is true.
kono
parents: 67
diff changeset
2335 Output:
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2336 - *NITERS_VECTOR and *STEP_VECTOR describe how the main loop should
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2337 iterate after vectorization; see vect_set_loop_condition for details.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2338 - *NITERS_VECTOR_MULT_VF_VAR is either null or an SSA name that
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2339 should be set to the number of scalar iterations handled by the
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2340 vector loop. The SSA name is only used on exit from the loop.
111
kono
parents: 67
diff changeset
2341
kono
parents: 67
diff changeset
2342 This function peels prolog and epilog from the loop, adds guards skipping
kono
parents: 67
diff changeset
2343 PROLOG and EPILOG for various conditions. As a result, the changed CFG
kono
parents: 67
diff changeset
2344 would look like:
kono
parents: 67
diff changeset
2345
kono
parents: 67
diff changeset
2346 guard_bb_1:
kono
parents: 67
diff changeset
2347 if (prefer_scalar_loop) goto merge_bb_1
kono
parents: 67
diff changeset
2348 else goto guard_bb_2
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2349
111
kono
parents: 67
diff changeset
2350 guard_bb_2:
kono
parents: 67
diff changeset
2351 if (skip_prolog) goto merge_bb_2
kono
parents: 67
diff changeset
2352 else goto prolog_preheader
kono
parents: 67
diff changeset
2353
kono
parents: 67
diff changeset
2354 prolog_preheader:
kono
parents: 67
diff changeset
2355 PROLOG:
kono
parents: 67
diff changeset
2356 prolog_header_bb:
kono
parents: 67
diff changeset
2357 prolog_body
kono
parents: 67
diff changeset
2358 if (exit_prolog_cond) goto prolog_exit_bb
kono
parents: 67
diff changeset
2359 else goto prolog_header_bb
kono
parents: 67
diff changeset
2360 prolog_exit_bb:
kono
parents: 67
diff changeset
2361
kono
parents: 67
diff changeset
2362 merge_bb_2:
kono
parents: 67
diff changeset
2363
kono
parents: 67
diff changeset
2364 vector_preheader:
kono
parents: 67
diff changeset
2365 VECTOR LOOP:
kono
parents: 67
diff changeset
2366 vector_header_bb:
kono
parents: 67
diff changeset
2367 vector_body
kono
parents: 67
diff changeset
2368 if (exit_vector_cond) goto vector_exit_bb
kono
parents: 67
diff changeset
2369 else goto vector_header_bb
kono
parents: 67
diff changeset
2370 vector_exit_bb:
kono
parents: 67
diff changeset
2371
kono
parents: 67
diff changeset
2372 guard_bb_3:
kono
parents: 67
diff changeset
2373 if (skip_epilog) goto merge_bb_3
kono
parents: 67
diff changeset
2374 else goto epilog_preheader
kono
parents: 67
diff changeset
2375
kono
parents: 67
diff changeset
2376 merge_bb_1:
kono
parents: 67
diff changeset
2377
kono
parents: 67
diff changeset
2378 epilog_preheader:
kono
parents: 67
diff changeset
2379 EPILOG:
kono
parents: 67
diff changeset
2380 epilog_header_bb:
kono
parents: 67
diff changeset
2381 epilog_body
kono
parents: 67
diff changeset
2382 if (exit_epilog_cond) goto merge_bb_3
kono
parents: 67
diff changeset
2383 else goto epilog_header_bb
kono
parents: 67
diff changeset
2384
kono
parents: 67
diff changeset
2385 merge_bb_3:
kono
parents: 67
diff changeset
2386
kono
parents: 67
diff changeset
2387 Note this function peels prolog and epilog only if it's necessary,
kono
parents: 67
diff changeset
2388 as well as guards.
kono
parents: 67
diff changeset
2389 Returns created epilogue or NULL.
kono
parents: 67
diff changeset
2390
kono
parents: 67
diff changeset
2391 TODO: Guard for prefer_scalar_loop should be emitted along with
kono
parents: 67
diff changeset
2392 versioning conditions if loop versioning is needed. */
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2393
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2394
111
kono
parents: 67
diff changeset
2395 struct loop *
kono
parents: 67
diff changeset
2396 vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2397 tree *niters_vector, tree *step_vector,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2398 tree *niters_vector_mult_vf_var, int th,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2399 bool check_profitability, bool niters_no_overflow)
111
kono
parents: 67
diff changeset
2400 {
kono
parents: 67
diff changeset
2401 edge e, guard_e;
kono
parents: 67
diff changeset
2402 tree type = TREE_TYPE (niters), guard_cond;
kono
parents: 67
diff changeset
2403 basic_block guard_bb, guard_to;
kono
parents: 67
diff changeset
2404 profile_probability prob_prolog, prob_vector, prob_epilog;
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2405 int estimated_vf;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2406 int prolog_peeling = 0;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2407 if (!vect_use_loop_mask_for_alignment_p (loop_vinfo))
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2408 prolog_peeling = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2409
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2410 poly_uint64 vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2411 poly_uint64 bound_epilog = 0;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2412 if (!LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2413 && LOOP_VINFO_PEELING_FOR_NITER (loop_vinfo))
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2414 bound_epilog += vf - 1;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2415 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2416 bound_epilog += 1;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2417 bool epilog_peeling = maybe_ne (bound_epilog, 0U);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2418 poly_uint64 bound_scalar = bound_epilog;
111
kono
parents: 67
diff changeset
2419
kono
parents: 67
diff changeset
2420 if (!prolog_peeling && !epilog_peeling)
kono
parents: 67
diff changeset
2421 return NULL;
kono
parents: 67
diff changeset
2422
kono
parents: 67
diff changeset
2423 prob_vector = profile_probability::guessed_always ().apply_scale (9, 10);
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2424 estimated_vf = vect_vf_for_cost (loop_vinfo);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2425 if (estimated_vf == 2)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2426 estimated_vf = 3;
111
kono
parents: 67
diff changeset
2427 prob_prolog = prob_epilog = profile_probability::guessed_always ()
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2428 .apply_scale (estimated_vf - 1, estimated_vf);
111
kono
parents: 67
diff changeset
2429
kono
parents: 67
diff changeset
2430 struct loop *prolog, *epilog = NULL, *loop = LOOP_VINFO_LOOP (loop_vinfo);
kono
parents: 67
diff changeset
2431 struct loop *first_loop = loop;
kono
parents: 67
diff changeset
2432 bool irred_flag = loop_preheader_edge (loop)->flags & EDGE_IRREDUCIBLE_LOOP;
kono
parents: 67
diff changeset
2433 create_lcssa_for_virtual_phi (loop);
kono
parents: 67
diff changeset
2434 update_ssa (TODO_update_ssa_only_virtuals);
kono
parents: 67
diff changeset
2435
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2436 if (MAY_HAVE_DEBUG_BIND_STMTS)
111
kono
parents: 67
diff changeset
2437 {
kono
parents: 67
diff changeset
2438 gcc_assert (!adjust_vec.exists ());
kono
parents: 67
diff changeset
2439 adjust_vec.create (32);
kono
parents: 67
diff changeset
2440 }
kono
parents: 67
diff changeset
2441 initialize_original_copy_tables ();
kono
parents: 67
diff changeset
2442
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2443 /* Record the anchor bb at which the guard should be placed if the scalar
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2444 loop might be preferred. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2445 basic_block anchor = loop_preheader_edge (loop)->src;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2446
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2447 /* Generate the number of iterations for the prolog loop. We do this here
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2448 so that we can also get the upper bound on the number of iterations. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2449 tree niters_prolog;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2450 int bound_prolog = 0;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2451 if (prolog_peeling)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2452 niters_prolog = vect_gen_prolog_loop_niters (loop_vinfo, anchor,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2453 &bound_prolog);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2454 else
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2455 niters_prolog = build_int_cst (type, 0);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2456
111
kono
parents: 67
diff changeset
2457 /* Prolog loop may be skipped. */
kono
parents: 67
diff changeset
2458 bool skip_prolog = (prolog_peeling != 0);
kono
parents: 67
diff changeset
2459 /* Skip to epilog if scalar loop may be preferred. It's only needed
kono
parents: 67
diff changeset
2460 when we peel for epilog loop and when it hasn't been checked with
kono
parents: 67
diff changeset
2461 loop versioning. */
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2462 bool skip_vector = (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2463 ? maybe_lt (LOOP_VINFO_INT_NITERS (loop_vinfo),
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2464 bound_prolog + bound_epilog)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2465 : !LOOP_REQUIRES_VERSIONING (loop_vinfo));
111
kono
parents: 67
diff changeset
2466 /* Epilog loop must be executed if the number of iterations for epilog
kono
parents: 67
diff changeset
2467 loop is known at compile time, otherwise we need to add a check at
kono
parents: 67
diff changeset
2468 the end of vector loop and skip to the end of epilog loop. */
kono
parents: 67
diff changeset
2469 bool skip_epilog = (prolog_peeling < 0
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2470 || !LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2471 || !vf.is_constant ());
111
kono
parents: 67
diff changeset
2472 /* PEELING_FOR_GAPS is special because epilog loop must be executed. */
kono
parents: 67
diff changeset
2473 if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
kono
parents: 67
diff changeset
2474 skip_epilog = false;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2475
111
kono
parents: 67
diff changeset
2476 if (skip_vector)
kono
parents: 67
diff changeset
2477 {
kono
parents: 67
diff changeset
2478 split_edge (loop_preheader_edge (loop));
kono
parents: 67
diff changeset
2479
kono
parents: 67
diff changeset
2480 /* Due to the order in which we peel prolog and epilog, we first
kono
parents: 67
diff changeset
2481 propagate probability to the whole loop. The purpose is to
kono
parents: 67
diff changeset
2482 avoid adjusting probabilities of both prolog and vector loops
kono
parents: 67
diff changeset
2483 separately. Note in this case, the probability of epilog loop
kono
parents: 67
diff changeset
2484 needs to be scaled back later. */
kono
parents: 67
diff changeset
2485 basic_block bb_before_loop = loop_preheader_edge (loop)->src;
kono
parents: 67
diff changeset
2486 if (prob_vector.initialized_p ())
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2487 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2488 scale_bbs_frequencies (&bb_before_loop, 1, prob_vector);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2489 scale_loop_profile (loop, prob_vector, 0);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2490 }
111
kono
parents: 67
diff changeset
2491 }
kono
parents: 67
diff changeset
2492
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2493 dump_user_location_t loop_loc = find_loop_location (loop);
111
kono
parents: 67
diff changeset
2494 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
kono
parents: 67
diff changeset
2495 if (prolog_peeling)
kono
parents: 67
diff changeset
2496 {
kono
parents: 67
diff changeset
2497 e = loop_preheader_edge (loop);
kono
parents: 67
diff changeset
2498 if (!slpeel_can_duplicate_loop_p (loop, e))
kono
parents: 67
diff changeset
2499 {
kono
parents: 67
diff changeset
2500 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
kono
parents: 67
diff changeset
2501 "loop can't be duplicated to preheader edge.\n");
kono
parents: 67
diff changeset
2502 gcc_unreachable ();
kono
parents: 67
diff changeset
2503 }
kono
parents: 67
diff changeset
2504 /* Peel prolog and put it on preheader edge of loop. */
kono
parents: 67
diff changeset
2505 prolog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
kono
parents: 67
diff changeset
2506 if (!prolog)
kono
parents: 67
diff changeset
2507 {
kono
parents: 67
diff changeset
2508 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
kono
parents: 67
diff changeset
2509 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
kono
parents: 67
diff changeset
2510 gcc_unreachable ();
kono
parents: 67
diff changeset
2511 }
kono
parents: 67
diff changeset
2512 slpeel_update_phi_nodes_for_loops (loop_vinfo, prolog, loop, true);
kono
parents: 67
diff changeset
2513 first_loop = prolog;
kono
parents: 67
diff changeset
2514 reset_original_copy_tables ();
kono
parents: 67
diff changeset
2515
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2516 /* Update the number of iterations for prolog loop. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2517 tree step_prolog = build_one_cst (TREE_TYPE (niters_prolog));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2518 vect_set_loop_condition (prolog, NULL, niters_prolog,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2519 step_prolog, NULL_TREE, false);
111
kono
parents: 67
diff changeset
2520
kono
parents: 67
diff changeset
2521 /* Skip the prolog loop. */
kono
parents: 67
diff changeset
2522 if (skip_prolog)
kono
parents: 67
diff changeset
2523 {
kono
parents: 67
diff changeset
2524 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
kono
parents: 67
diff changeset
2525 niters_prolog, build_int_cst (type, 0));
kono
parents: 67
diff changeset
2526 guard_bb = loop_preheader_edge (prolog)->src;
kono
parents: 67
diff changeset
2527 basic_block bb_after_prolog = loop_preheader_edge (loop)->src;
kono
parents: 67
diff changeset
2528 guard_to = split_edge (loop_preheader_edge (loop));
kono
parents: 67
diff changeset
2529 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
kono
parents: 67
diff changeset
2530 guard_to, guard_bb,
kono
parents: 67
diff changeset
2531 prob_prolog.invert (),
kono
parents: 67
diff changeset
2532 irred_flag);
kono
parents: 67
diff changeset
2533 e = EDGE_PRED (guard_to, 0);
kono
parents: 67
diff changeset
2534 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
kono
parents: 67
diff changeset
2535 slpeel_update_phi_nodes_for_guard1 (prolog, loop, guard_e, e);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2536
111
kono
parents: 67
diff changeset
2537 scale_bbs_frequencies (&bb_after_prolog, 1, prob_prolog);
kono
parents: 67
diff changeset
2538 scale_loop_profile (prolog, prob_prolog, bound_prolog);
kono
parents: 67
diff changeset
2539 }
kono
parents: 67
diff changeset
2540 /* Update init address of DRs. */
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2541 vect_update_inits_of_drs (loop_vinfo, niters_prolog, PLUS_EXPR);
111
kono
parents: 67
diff changeset
2542 /* Update niters for vector loop. */
kono
parents: 67
diff changeset
2543 LOOP_VINFO_NITERS (loop_vinfo)
kono
parents: 67
diff changeset
2544 = fold_build2 (MINUS_EXPR, type, niters, niters_prolog);
kono
parents: 67
diff changeset
2545 LOOP_VINFO_NITERSM1 (loop_vinfo)
kono
parents: 67
diff changeset
2546 = fold_build2 (MINUS_EXPR, type,
kono
parents: 67
diff changeset
2547 LOOP_VINFO_NITERSM1 (loop_vinfo), niters_prolog);
kono
parents: 67
diff changeset
2548 bool new_var_p = false;
kono
parents: 67
diff changeset
2549 niters = vect_build_loop_niters (loop_vinfo, &new_var_p);
kono
parents: 67
diff changeset
2550 /* It's guaranteed that vector loop bound before vectorization is at
kono
parents: 67
diff changeset
2551 least VF, so set range information for newly generated var. */
kono
parents: 67
diff changeset
2552 if (new_var_p)
kono
parents: 67
diff changeset
2553 set_range_info (niters, VR_RANGE,
kono
parents: 67
diff changeset
2554 wi::to_wide (build_int_cst (type, vf)),
kono
parents: 67
diff changeset
2555 wi::to_wide (TYPE_MAX_VALUE (type)));
kono
parents: 67
diff changeset
2556
kono
parents: 67
diff changeset
2557 /* Prolog iterates at most bound_prolog times, latch iterates at
kono
parents: 67
diff changeset
2558 most bound_prolog - 1 times. */
kono
parents: 67
diff changeset
2559 record_niter_bound (prolog, bound_prolog - 1, false, true);
kono
parents: 67
diff changeset
2560 delete_update_ssa ();
kono
parents: 67
diff changeset
2561 adjust_vec_debug_stmts ();
kono
parents: 67
diff changeset
2562 scev_reset ();
kono
parents: 67
diff changeset
2563 }
kono
parents: 67
diff changeset
2564
kono
parents: 67
diff changeset
2565 if (epilog_peeling)
kono
parents: 67
diff changeset
2566 {
kono
parents: 67
diff changeset
2567 e = single_exit (loop);
kono
parents: 67
diff changeset
2568 if (!slpeel_can_duplicate_loop_p (loop, e))
kono
parents: 67
diff changeset
2569 {
kono
parents: 67
diff changeset
2570 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
kono
parents: 67
diff changeset
2571 "loop can't be duplicated to exit edge.\n");
kono
parents: 67
diff changeset
2572 gcc_unreachable ();
kono
parents: 67
diff changeset
2573 }
kono
parents: 67
diff changeset
2574 /* Peel epilog and put it on exit edge of loop. */
kono
parents: 67
diff changeset
2575 epilog = slpeel_tree_duplicate_loop_to_edge_cfg (loop, scalar_loop, e);
kono
parents: 67
diff changeset
2576 if (!epilog)
kono
parents: 67
diff changeset
2577 {
kono
parents: 67
diff changeset
2578 dump_printf_loc (MSG_MISSED_OPTIMIZATION, loop_loc,
kono
parents: 67
diff changeset
2579 "slpeel_tree_duplicate_loop_to_edge_cfg failed.\n");
kono
parents: 67
diff changeset
2580 gcc_unreachable ();
kono
parents: 67
diff changeset
2581 }
kono
parents: 67
diff changeset
2582 slpeel_update_phi_nodes_for_loops (loop_vinfo, loop, epilog, false);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2583
111
kono
parents: 67
diff changeset
2584 /* Scalar version loop may be preferred. In this case, add guard
kono
parents: 67
diff changeset
2585 and skip to epilog. Note this only happens when the number of
kono
parents: 67
diff changeset
2586 iterations of loop is unknown at compile time, otherwise this
kono
parents: 67
diff changeset
2587 won't be vectorized. */
kono
parents: 67
diff changeset
2588 if (skip_vector)
kono
parents: 67
diff changeset
2589 {
kono
parents: 67
diff changeset
2590 /* Additional epilogue iteration is peeled if gap exists. */
kono
parents: 67
diff changeset
2591 tree t = vect_gen_scalar_loop_niters (niters_prolog, prolog_peeling,
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2592 bound_prolog, bound_epilog,
111
kono
parents: 67
diff changeset
2593 th, &bound_scalar,
kono
parents: 67
diff changeset
2594 check_profitability);
kono
parents: 67
diff changeset
2595 /* Build guard against NITERSM1 since NITERS may overflow. */
kono
parents: 67
diff changeset
2596 guard_cond = fold_build2 (LT_EXPR, boolean_type_node, nitersm1, t);
kono
parents: 67
diff changeset
2597 guard_bb = anchor;
kono
parents: 67
diff changeset
2598 guard_to = split_edge (loop_preheader_edge (epilog));
kono
parents: 67
diff changeset
2599 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond,
kono
parents: 67
diff changeset
2600 guard_to, guard_bb,
kono
parents: 67
diff changeset
2601 prob_vector.invert (),
kono
parents: 67
diff changeset
2602 irred_flag);
kono
parents: 67
diff changeset
2603 e = EDGE_PRED (guard_to, 0);
kono
parents: 67
diff changeset
2604 e = (e != guard_e ? e : EDGE_PRED (guard_to, 1));
kono
parents: 67
diff changeset
2605 slpeel_update_phi_nodes_for_guard1 (first_loop, epilog, guard_e, e);
kono
parents: 67
diff changeset
2606
kono
parents: 67
diff changeset
2607 /* Simply propagate profile info from guard_bb to guard_to which is
kono
parents: 67
diff changeset
2608 a merge point of control flow. */
kono
parents: 67
diff changeset
2609 guard_to->count = guard_bb->count;
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2610
111
kono
parents: 67
diff changeset
2611 /* Scale probability of epilog loop back.
kono
parents: 67
diff changeset
2612 FIXME: We should avoid scaling down and back up. Profile may
kono
parents: 67
diff changeset
2613 get lost if we scale down to 0. */
kono
parents: 67
diff changeset
2614 basic_block *bbs = get_loop_body (epilog);
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2615 for (unsigned int i = 0; i < epilog->num_nodes; i++)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2616 bbs[i]->count = bbs[i]->count.apply_scale
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2617 (bbs[i]->count,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2618 bbs[i]->count.apply_probability
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2619 (prob_vector));
111
kono
parents: 67
diff changeset
2620 free (bbs);
kono
parents: 67
diff changeset
2621 }
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2622
111
kono
parents: 67
diff changeset
2623 basic_block bb_before_epilog = loop_preheader_edge (epilog)->src;
kono
parents: 67
diff changeset
2624 tree niters_vector_mult_vf;
kono
parents: 67
diff changeset
2625 /* If loop is peeled for non-zero constant times, now niters refers to
kono
parents: 67
diff changeset
2626 orig_niters - prolog_peeling, it won't overflow even the orig_niters
kono
parents: 67
diff changeset
2627 overflows. */
kono
parents: 67
diff changeset
2628 niters_no_overflow |= (prolog_peeling > 0);
kono
parents: 67
diff changeset
2629 vect_gen_vector_loop_niters (loop_vinfo, niters,
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2630 niters_vector, step_vector,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2631 niters_no_overflow);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2632 if (!integer_onep (*step_vector))
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2633 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2634 /* On exit from the loop we will have an easy way of calcalating
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2635 NITERS_VECTOR / STEP * STEP. Install a dummy definition
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2636 until then. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2637 niters_vector_mult_vf = make_ssa_name (TREE_TYPE (*niters_vector));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2638 SSA_NAME_DEF_STMT (niters_vector_mult_vf) = gimple_build_nop ();
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2639 *niters_vector_mult_vf_var = niters_vector_mult_vf;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2640 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2641 else
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2642 vect_gen_vector_loop_niters_mult_vf (loop_vinfo, *niters_vector,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2643 &niters_vector_mult_vf);
111
kono
parents: 67
diff changeset
2644 /* Update IVs of original loop as if they were advanced by
kono
parents: 67
diff changeset
2645 niters_vector_mult_vf steps. */
kono
parents: 67
diff changeset
2646 gcc_checking_assert (vect_can_advance_ivs_p (loop_vinfo));
kono
parents: 67
diff changeset
2647 edge update_e = skip_vector ? e : loop_preheader_edge (epilog);
kono
parents: 67
diff changeset
2648 vect_update_ivs_after_vectorizer (loop_vinfo, niters_vector_mult_vf,
kono
parents: 67
diff changeset
2649 update_e);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2650
111
kono
parents: 67
diff changeset
2651 if (skip_epilog)
kono
parents: 67
diff changeset
2652 {
kono
parents: 67
diff changeset
2653 guard_cond = fold_build2 (EQ_EXPR, boolean_type_node,
kono
parents: 67
diff changeset
2654 niters, niters_vector_mult_vf);
kono
parents: 67
diff changeset
2655 guard_bb = single_exit (loop)->dest;
kono
parents: 67
diff changeset
2656 guard_to = split_edge (single_exit (epilog));
kono
parents: 67
diff changeset
2657 guard_e = slpeel_add_loop_guard (guard_bb, guard_cond, guard_to,
kono
parents: 67
diff changeset
2658 skip_vector ? anchor : guard_bb,
kono
parents: 67
diff changeset
2659 prob_epilog.invert (),
kono
parents: 67
diff changeset
2660 irred_flag);
kono
parents: 67
diff changeset
2661 slpeel_update_phi_nodes_for_guard2 (loop, epilog, guard_e,
kono
parents: 67
diff changeset
2662 single_exit (epilog));
kono
parents: 67
diff changeset
2663 /* Only need to handle basic block before epilog loop if it's not
kono
parents: 67
diff changeset
2664 the guard_bb, which is the case when skip_vector is true. */
kono
parents: 67
diff changeset
2665 if (guard_bb != bb_before_epilog)
kono
parents: 67
diff changeset
2666 {
kono
parents: 67
diff changeset
2667 prob_epilog = prob_vector * prob_epilog + prob_vector.invert ();
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2668
111
kono
parents: 67
diff changeset
2669 scale_bbs_frequencies (&bb_before_epilog, 1, prob_epilog);
kono
parents: 67
diff changeset
2670 }
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2671 scale_loop_profile (epilog, prob_epilog, 0);
111
kono
parents: 67
diff changeset
2672 }
kono
parents: 67
diff changeset
2673 else
kono
parents: 67
diff changeset
2674 slpeel_update_phi_nodes_for_lcssa (epilog);
kono
parents: 67
diff changeset
2675
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2676 unsigned HOST_WIDE_INT bound;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2677 if (bound_scalar.is_constant (&bound))
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2678 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2679 gcc_assert (bound != 0);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2680 /* -1 to convert loop iterations to latch iterations. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2681 record_niter_bound (epilog, bound - 1, false, true);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2682 }
111
kono
parents: 67
diff changeset
2683
kono
parents: 67
diff changeset
2684 delete_update_ssa ();
kono
parents: 67
diff changeset
2685 adjust_vec_debug_stmts ();
kono
parents: 67
diff changeset
2686 scev_reset ();
kono
parents: 67
diff changeset
2687 }
kono
parents: 67
diff changeset
2688 adjust_vec.release ();
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2689 free_original_copy_tables ();
111
kono
parents: 67
diff changeset
2690
kono
parents: 67
diff changeset
2691 return epilog;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2692 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2693
111
kono
parents: 67
diff changeset
2694 /* Function vect_create_cond_for_niters_checks.
kono
parents: 67
diff changeset
2695
kono
parents: 67
diff changeset
2696 Create a conditional expression that represents the run-time checks for
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2697 loop's niter. The loop is guaranteed to terminate if the run-time
111
kono
parents: 67
diff changeset
2698 checks hold.
kono
parents: 67
diff changeset
2699
kono
parents: 67
diff changeset
2700 Input:
kono
parents: 67
diff changeset
2701 COND_EXPR - input conditional expression. New conditions will be chained
kono
parents: 67
diff changeset
2702 with logical AND operation. If it is NULL, then the function
kono
parents: 67
diff changeset
2703 is used to return the number of alias checks.
kono
parents: 67
diff changeset
2704 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
kono
parents: 67
diff changeset
2705 to be checked.
kono
parents: 67
diff changeset
2706
kono
parents: 67
diff changeset
2707 Output:
kono
parents: 67
diff changeset
2708 COND_EXPR - conditional expression.
kono
parents: 67
diff changeset
2709
kono
parents: 67
diff changeset
2710 The returned COND_EXPR is the conditional expression to be used in the
kono
parents: 67
diff changeset
2711 if statement that controls which version of the loop gets executed at
kono
parents: 67
diff changeset
2712 runtime. */
kono
parents: 67
diff changeset
2713
kono
parents: 67
diff changeset
2714 static void
kono
parents: 67
diff changeset
2715 vect_create_cond_for_niters_checks (loop_vec_info loop_vinfo, tree *cond_expr)
kono
parents: 67
diff changeset
2716 {
kono
parents: 67
diff changeset
2717 tree part_cond_expr = LOOP_VINFO_NITERS_ASSUMPTIONS (loop_vinfo);
kono
parents: 67
diff changeset
2718
kono
parents: 67
diff changeset
2719 if (*cond_expr)
kono
parents: 67
diff changeset
2720 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
kono
parents: 67
diff changeset
2721 *cond_expr, part_cond_expr);
kono
parents: 67
diff changeset
2722 else
kono
parents: 67
diff changeset
2723 *cond_expr = part_cond_expr;
kono
parents: 67
diff changeset
2724 }
kono
parents: 67
diff changeset
2725
kono
parents: 67
diff changeset
2726 /* Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
kono
parents: 67
diff changeset
2727 and PART_COND_EXPR are true. Treat a null *COND_EXPR as "true". */
kono
parents: 67
diff changeset
2728
kono
parents: 67
diff changeset
2729 static void
kono
parents: 67
diff changeset
2730 chain_cond_expr (tree *cond_expr, tree part_cond_expr)
kono
parents: 67
diff changeset
2731 {
kono
parents: 67
diff changeset
2732 if (*cond_expr)
kono
parents: 67
diff changeset
2733 *cond_expr = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
kono
parents: 67
diff changeset
2734 *cond_expr, part_cond_expr);
kono
parents: 67
diff changeset
2735 else
kono
parents: 67
diff changeset
2736 *cond_expr = part_cond_expr;
kono
parents: 67
diff changeset
2737 }
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2738
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2739 /* Function vect_create_cond_for_align_checks.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2740
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2741 Create a conditional expression that represents the alignment checks for
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2742 all of data references (array element references) whose alignment must be
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2743 checked at runtime.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2744
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2745 Input:
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2746 COND_EXPR - input conditional expression. New conditions will be chained
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2747 with logical AND operation.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2748 LOOP_VINFO - two fields of the loop information are used.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2749 LOOP_VINFO_PTR_MASK is the mask used to check the alignment.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2750 LOOP_VINFO_MAY_MISALIGN_STMTS contains the refs to be checked.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2751
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2752 Output:
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2753 COND_EXPR_STMT_LIST - statements needed to construct the conditional
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2754 expression.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2755 The returned value is the conditional expression to be used in the if
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2756 statement that controls which version of the loop gets executed at runtime.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2757
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2758 The algorithm makes two assumptions:
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2759 1) The number of bytes "n" in a vector is a power of 2.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2760 2) An address "a" is aligned if a%n is zero and that this
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2761 test can be done as a&(n-1) == 0. For example, for 16
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2762 byte vectors the test is a&0xf == 0. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2763
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2764 static void
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2765 vect_create_cond_for_align_checks (loop_vec_info loop_vinfo,
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2766 tree *cond_expr,
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2767 gimple_seq *cond_expr_stmt_list)
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2768 {
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2769 vec<stmt_vec_info> may_misalign_stmts
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2770 = LOOP_VINFO_MAY_MISALIGN_STMTS (loop_vinfo);
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2771 stmt_vec_info stmt_info;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2772 int mask = LOOP_VINFO_PTR_MASK (loop_vinfo);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2773 tree mask_cst;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2774 unsigned int i;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2775 tree int_ptrsize_type;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2776 char tmp_name[20];
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2777 tree or_tmp_name = NULL_TREE;
111
kono
parents: 67
diff changeset
2778 tree and_tmp_name;
kono
parents: 67
diff changeset
2779 gimple *and_stmt;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2780 tree ptrsize_zero;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2781 tree part_cond_expr;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2782
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2783 /* Check that mask is one less than a power of 2, i.e., mask is
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2784 all zeros followed by all ones. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2785 gcc_assert ((mask != 0) && ((mask & (mask+1)) == 0));
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2786
111
kono
parents: 67
diff changeset
2787 int_ptrsize_type = signed_type_for (ptr_type_node);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2788
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2789 /* Create expression (mask & (dr_1 || ... || dr_n)) where dr_i is the address
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2790 of the first vector of the i'th data reference. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2791
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2792 FOR_EACH_VEC_ELT (may_misalign_stmts, i, stmt_info)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2793 {
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2794 gimple_seq new_stmt_list = NULL;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2795 tree addr_base;
111
kono
parents: 67
diff changeset
2796 tree addr_tmp_name;
kono
parents: 67
diff changeset
2797 tree new_or_tmp_name;
kono
parents: 67
diff changeset
2798 gimple *addr_stmt, *or_stmt;
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2799 tree vectype = STMT_VINFO_VECTYPE (stmt_info);
67
f6334be47118 update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
2800 bool negative = tree_int_cst_compare
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2801 (DR_STEP (STMT_VINFO_DATA_REF (stmt_info)), size_zero_node) < 0;
67
f6334be47118 update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents: 63
diff changeset
2802 tree offset = negative
111
kono
parents: 67
diff changeset
2803 ? size_int (-TYPE_VECTOR_SUBPARTS (vectype) + 1) : size_zero_node;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2804
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2805 /* create: addr_tmp = (int)(address_of_first_vector) */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2806 addr_base =
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2807 vect_create_addr_base_for_vector_ref (stmt_info, &new_stmt_list,
111
kono
parents: 67
diff changeset
2808 offset);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2809 if (new_stmt_list != NULL)
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2810 gimple_seq_add_seq (cond_expr_stmt_list, new_stmt_list);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2811
111
kono
parents: 67
diff changeset
2812 sprintf (tmp_name, "addr2int%d", i);
kono
parents: 67
diff changeset
2813 addr_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
kono
parents: 67
diff changeset
2814 addr_stmt = gimple_build_assign (addr_tmp_name, NOP_EXPR, addr_base);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2815 gimple_seq_add_stmt (cond_expr_stmt_list, addr_stmt);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2816
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2817 /* The addresses are OR together. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2818
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2819 if (or_tmp_name != NULL_TREE)
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2820 {
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2821 /* create: or_tmp = or_tmp | addr_tmp */
111
kono
parents: 67
diff changeset
2822 sprintf (tmp_name, "orptrs%d", i);
kono
parents: 67
diff changeset
2823 new_or_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, tmp_name);
kono
parents: 67
diff changeset
2824 or_stmt = gimple_build_assign (new_or_tmp_name, BIT_IOR_EXPR,
kono
parents: 67
diff changeset
2825 or_tmp_name, addr_tmp_name);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2826 gimple_seq_add_stmt (cond_expr_stmt_list, or_stmt);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2827 or_tmp_name = new_or_tmp_name;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2828 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2829 else
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2830 or_tmp_name = addr_tmp_name;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2831
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2832 } /* end for i */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2833
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2834 mask_cst = build_int_cst (int_ptrsize_type, mask);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2835
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2836 /* create: and_tmp = or_tmp & mask */
111
kono
parents: 67
diff changeset
2837 and_tmp_name = make_temp_ssa_name (int_ptrsize_type, NULL, "andmask");
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2838
111
kono
parents: 67
diff changeset
2839 and_stmt = gimple_build_assign (and_tmp_name, BIT_AND_EXPR,
kono
parents: 67
diff changeset
2840 or_tmp_name, mask_cst);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2841 gimple_seq_add_stmt (cond_expr_stmt_list, and_stmt);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2842
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2843 /* Make and_tmp the left operand of the conditional test against zero.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2844 if and_tmp has a nonzero bit then some address is unaligned. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2845 ptrsize_zero = build_int_cst (int_ptrsize_type, 0);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2846 part_cond_expr = fold_build2 (EQ_EXPR, boolean_type_node,
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2847 and_tmp_name, ptrsize_zero);
111
kono
parents: 67
diff changeset
2848 chain_cond_expr (cond_expr, part_cond_expr);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2849 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2850
111
kono
parents: 67
diff changeset
2851 /* If LOOP_VINFO_CHECK_UNEQUAL_ADDRS contains <A1, B1>, ..., <An, Bn>,
kono
parents: 67
diff changeset
2852 create a tree representation of: (&A1 != &B1) && ... && (&An != &Bn).
kono
parents: 67
diff changeset
2853 Set *COND_EXPR to a tree that is true when both the original *COND_EXPR
kono
parents: 67
diff changeset
2854 and this new condition are true. Treat a null *COND_EXPR as "true". */
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2855
111
kono
parents: 67
diff changeset
2856 static void
kono
parents: 67
diff changeset
2857 vect_create_cond_for_unequal_addrs (loop_vec_info loop_vinfo, tree *cond_expr)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2858 {
111
kono
parents: 67
diff changeset
2859 vec<vec_object_pair> pairs = LOOP_VINFO_CHECK_UNEQUAL_ADDRS (loop_vinfo);
kono
parents: 67
diff changeset
2860 unsigned int i;
kono
parents: 67
diff changeset
2861 vec_object_pair *pair;
kono
parents: 67
diff changeset
2862 FOR_EACH_VEC_ELT (pairs, i, pair)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2863 {
111
kono
parents: 67
diff changeset
2864 tree addr1 = build_fold_addr_expr (pair->first);
kono
parents: 67
diff changeset
2865 tree addr2 = build_fold_addr_expr (pair->second);
kono
parents: 67
diff changeset
2866 tree part_cond_expr = fold_build2 (NE_EXPR, boolean_type_node,
kono
parents: 67
diff changeset
2867 addr1, addr2);
kono
parents: 67
diff changeset
2868 chain_cond_expr (cond_expr, part_cond_expr);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2869 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2870 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2871
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2872 /* Create an expression that is true when all lower-bound conditions for
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2873 the vectorized loop are met. Chain this condition with *COND_EXPR. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2874
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2875 static void
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2876 vect_create_cond_for_lower_bounds (loop_vec_info loop_vinfo, tree *cond_expr)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2877 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2878 vec<vec_lower_bound> lower_bounds = LOOP_VINFO_LOWER_BOUNDS (loop_vinfo);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2879 for (unsigned int i = 0; i < lower_bounds.length (); ++i)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2880 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2881 tree expr = lower_bounds[i].expr;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2882 tree type = unsigned_type_for (TREE_TYPE (expr));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2883 expr = fold_convert (type, expr);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2884 poly_uint64 bound = lower_bounds[i].min_value;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2885 if (!lower_bounds[i].unsigned_p)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2886 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2887 expr = fold_build2 (PLUS_EXPR, type, expr,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2888 build_int_cstu (type, bound - 1));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2889 bound += bound - 1;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2890 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2891 tree part_cond_expr = fold_build2 (GE_EXPR, boolean_type_node, expr,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2892 build_int_cstu (type, bound));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2893 chain_cond_expr (cond_expr, part_cond_expr);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2894 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2895 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2896
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2897 /* Function vect_create_cond_for_alias_checks.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2898
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2899 Create a conditional expression that represents the run-time checks for
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2900 overlapping of address ranges represented by a list of data references
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2901 relations passed as input.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2902
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2903 Input:
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2904 COND_EXPR - input conditional expression. New conditions will be chained
111
kono
parents: 67
diff changeset
2905 with logical AND operation. If it is NULL, then the function
kono
parents: 67
diff changeset
2906 is used to return the number of alias checks.
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2907 LOOP_VINFO - field LOOP_VINFO_MAY_ALIAS_STMTS contains the list of ddrs
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2908 to be checked.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2909
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2910 Output:
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2911 COND_EXPR - conditional expression.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2912
111
kono
parents: 67
diff changeset
2913 The returned COND_EXPR is the conditional expression to be used in the if
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2914 statement that controls which version of the loop gets executed at runtime.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2915 */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2916
111
kono
parents: 67
diff changeset
2917 void
kono
parents: 67
diff changeset
2918 vect_create_cond_for_alias_checks (loop_vec_info loop_vinfo, tree * cond_expr)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2919 {
111
kono
parents: 67
diff changeset
2920 vec<dr_with_seg_len_pair_t> comp_alias_ddrs =
kono
parents: 67
diff changeset
2921 LOOP_VINFO_COMP_ALIAS_DDRS (loop_vinfo);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2922
111
kono
parents: 67
diff changeset
2923 if (comp_alias_ddrs.is_empty ())
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2924 return;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2925
111
kono
parents: 67
diff changeset
2926 create_runtime_alias_checks (LOOP_VINFO_LOOP (loop_vinfo),
kono
parents: 67
diff changeset
2927 &comp_alias_ddrs, cond_expr);
kono
parents: 67
diff changeset
2928 if (dump_enabled_p ())
kono
parents: 67
diff changeset
2929 dump_printf_loc (MSG_NOTE, vect_location,
kono
parents: 67
diff changeset
2930 "created %u versioning for alias checks.\n",
kono
parents: 67
diff changeset
2931 comp_alias_ddrs.length ());
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2932 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2933
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2934
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2935 /* Function vect_loop_versioning.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2936
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2937 If the loop has data references that may or may not be aligned or/and
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2938 has data reference relations whose independence was not proven then
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2939 two versions of the loop need to be generated, one which is vectorized
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2940 and one which isn't. A test is then generated to control which of the
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2941 loops is executed. The test checks for the alignment of all of the
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2942 data references that may or may not be aligned. An additional
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2943 sequence of runtime tests is generated for each pairs of DDRs whose
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2944 independence was not proven. The vectorized version of loop is
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2945 executed only if both alias and alignment tests are passed.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2946
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2947 The test generated to check which version of loop is executed
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2948 is modified to also check for profitability as indicated by the
111
kono
parents: 67
diff changeset
2949 cost model threshold TH.
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2950
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2951 The versioning precondition(s) are placed in *COND_EXPR and
111
kono
parents: 67
diff changeset
2952 *COND_EXPR_STMT_LIST. */
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2953
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2954 void
111
kono
parents: 67
diff changeset
2955 vect_loop_versioning (loop_vec_info loop_vinfo,
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2956 unsigned int th, bool check_profitability,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2957 poly_uint64 versioning_threshold)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2958 {
111
kono
parents: 67
diff changeset
2959 struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo), *nloop;
kono
parents: 67
diff changeset
2960 struct loop *scalar_loop = LOOP_VINFO_SCALAR_LOOP (loop_vinfo);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2961 basic_block condition_bb;
111
kono
parents: 67
diff changeset
2962 gphi_iterator gsi;
kono
parents: 67
diff changeset
2963 gimple_stmt_iterator cond_exp_gsi;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2964 basic_block merge_bb;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2965 basic_block new_exit_bb;
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2966 edge new_exit_e, e;
111
kono
parents: 67
diff changeset
2967 gphi *orig_phi, *new_phi;
kono
parents: 67
diff changeset
2968 tree cond_expr = NULL_TREE;
kono
parents: 67
diff changeset
2969 gimple_seq cond_expr_stmt_list = NULL;
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2970 tree arg;
111
kono
parents: 67
diff changeset
2971 profile_probability prob = profile_probability::likely ();
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2972 gimple_seq gimplify_stmt_list = NULL;
111
kono
parents: 67
diff changeset
2973 tree scalar_loop_iters = LOOP_VINFO_NITERSM1 (loop_vinfo);
kono
parents: 67
diff changeset
2974 bool version_align = LOOP_REQUIRES_VERSIONING_FOR_ALIGNMENT (loop_vinfo);
kono
parents: 67
diff changeset
2975 bool version_alias = LOOP_REQUIRES_VERSIONING_FOR_ALIAS (loop_vinfo);
kono
parents: 67
diff changeset
2976 bool version_niter = LOOP_REQUIRES_VERSIONING_FOR_NITERS (loop_vinfo);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2977
111
kono
parents: 67
diff changeset
2978 if (check_profitability)
kono
parents: 67
diff changeset
2979 cond_expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
kono
parents: 67
diff changeset
2980 build_int_cst (TREE_TYPE (scalar_loop_iters),
kono
parents: 67
diff changeset
2981 th - 1));
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2982 if (maybe_ne (versioning_threshold, 0U))
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2983 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2984 tree expr = fold_build2 (GE_EXPR, boolean_type_node, scalar_loop_iters,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2985 build_int_cst (TREE_TYPE (scalar_loop_iters),
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2986 versioning_threshold - 1));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2987 if (cond_expr)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2988 cond_expr = fold_build2 (BIT_AND_EXPR, boolean_type_node,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2989 expr, cond_expr);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2990 else
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2991 cond_expr = expr;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2992 }
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2993
111
kono
parents: 67
diff changeset
2994 if (version_niter)
kono
parents: 67
diff changeset
2995 vect_create_cond_for_niters_checks (loop_vinfo, &cond_expr);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
2996
111
kono
parents: 67
diff changeset
2997 if (cond_expr)
kono
parents: 67
diff changeset
2998 cond_expr = force_gimple_operand_1 (cond_expr, &cond_expr_stmt_list,
kono
parents: 67
diff changeset
2999 is_gimple_condexpr, NULL_TREE);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3000
111
kono
parents: 67
diff changeset
3001 if (version_align)
kono
parents: 67
diff changeset
3002 vect_create_cond_for_align_checks (loop_vinfo, &cond_expr,
kono
parents: 67
diff changeset
3003 &cond_expr_stmt_list);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3004
111
kono
parents: 67
diff changeset
3005 if (version_alias)
kono
parents: 67
diff changeset
3006 {
kono
parents: 67
diff changeset
3007 vect_create_cond_for_unequal_addrs (loop_vinfo, &cond_expr);
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3008 vect_create_cond_for_lower_bounds (loop_vinfo, &cond_expr);
111
kono
parents: 67
diff changeset
3009 vect_create_cond_for_alias_checks (loop_vinfo, &cond_expr);
kono
parents: 67
diff changeset
3010 }
kono
parents: 67
diff changeset
3011
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3012 cond_expr = force_gimple_operand_1 (unshare_expr (cond_expr),
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3013 &gimplify_stmt_list,
111
kono
parents: 67
diff changeset
3014 is_gimple_condexpr, NULL_TREE);
kono
parents: 67
diff changeset
3015 gimple_seq_add_seq (&cond_expr_stmt_list, gimplify_stmt_list);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3016
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3017 initialize_original_copy_tables ();
111
kono
parents: 67
diff changeset
3018 if (scalar_loop)
kono
parents: 67
diff changeset
3019 {
kono
parents: 67
diff changeset
3020 edge scalar_e;
kono
parents: 67
diff changeset
3021 basic_block preheader, scalar_preheader;
kono
parents: 67
diff changeset
3022
kono
parents: 67
diff changeset
3023 /* We don't want to scale SCALAR_LOOP's frequencies, we need to
kono
parents: 67
diff changeset
3024 scale LOOP's frequencies instead. */
kono
parents: 67
diff changeset
3025 nloop = loop_version (scalar_loop, cond_expr, &condition_bb,
kono
parents: 67
diff changeset
3026 prob, prob.invert (), prob, prob.invert (), true);
kono
parents: 67
diff changeset
3027 scale_loop_frequencies (loop, prob);
kono
parents: 67
diff changeset
3028 /* CONDITION_BB was created above SCALAR_LOOP's preheader,
kono
parents: 67
diff changeset
3029 while we need to move it above LOOP's preheader. */
kono
parents: 67
diff changeset
3030 e = loop_preheader_edge (loop);
kono
parents: 67
diff changeset
3031 scalar_e = loop_preheader_edge (scalar_loop);
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3032 /* The vector loop preheader might not be empty, since new
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3033 invariants could have been created while analyzing the loop. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3034 gcc_assert (single_pred_p (e->src));
111
kono
parents: 67
diff changeset
3035 gcc_assert (empty_block_p (scalar_e->src)
kono
parents: 67
diff changeset
3036 && single_pred_p (scalar_e->src));
kono
parents: 67
diff changeset
3037 gcc_assert (single_pred_p (condition_bb));
kono
parents: 67
diff changeset
3038 preheader = e->src;
kono
parents: 67
diff changeset
3039 scalar_preheader = scalar_e->src;
kono
parents: 67
diff changeset
3040 scalar_e = find_edge (condition_bb, scalar_preheader);
kono
parents: 67
diff changeset
3041 e = single_pred_edge (preheader);
kono
parents: 67
diff changeset
3042 redirect_edge_and_branch_force (single_pred_edge (condition_bb),
kono
parents: 67
diff changeset
3043 scalar_preheader);
kono
parents: 67
diff changeset
3044 redirect_edge_and_branch_force (scalar_e, preheader);
kono
parents: 67
diff changeset
3045 redirect_edge_and_branch_force (e, condition_bb);
kono
parents: 67
diff changeset
3046 set_immediate_dominator (CDI_DOMINATORS, condition_bb,
kono
parents: 67
diff changeset
3047 single_pred (condition_bb));
kono
parents: 67
diff changeset
3048 set_immediate_dominator (CDI_DOMINATORS, scalar_preheader,
kono
parents: 67
diff changeset
3049 single_pred (scalar_preheader));
kono
parents: 67
diff changeset
3050 set_immediate_dominator (CDI_DOMINATORS, preheader,
kono
parents: 67
diff changeset
3051 condition_bb);
kono
parents: 67
diff changeset
3052 }
kono
parents: 67
diff changeset
3053 else
kono
parents: 67
diff changeset
3054 nloop = loop_version (loop, cond_expr, &condition_bb,
kono
parents: 67
diff changeset
3055 prob, prob.invert (), prob, prob.invert (), true);
kono
parents: 67
diff changeset
3056
kono
parents: 67
diff changeset
3057 if (version_niter)
kono
parents: 67
diff changeset
3058 {
kono
parents: 67
diff changeset
3059 /* The versioned loop could be infinite, we need to clear existing
kono
parents: 67
diff changeset
3060 niter information which is copied from the original loop. */
kono
parents: 67
diff changeset
3061 gcc_assert (loop_constraint_set_p (loop, LOOP_C_FINITE));
kono
parents: 67
diff changeset
3062 vect_free_loop_info_assumptions (nloop);
kono
parents: 67
diff changeset
3063 /* And set constraint LOOP_C_INFINITE for niter analyzer. */
kono
parents: 67
diff changeset
3064 loop_constraint_set (loop, LOOP_C_INFINITE);
kono
parents: 67
diff changeset
3065 }
kono
parents: 67
diff changeset
3066
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3067 if (LOCATION_LOCUS (vect_location.get_location_t ()) != UNKNOWN_LOCATION
111
kono
parents: 67
diff changeset
3068 && dump_enabled_p ())
kono
parents: 67
diff changeset
3069 {
kono
parents: 67
diff changeset
3070 if (version_alias)
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3071 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | MSG_PRIORITY_USER_FACING,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3072 vect_location,
111
kono
parents: 67
diff changeset
3073 "loop versioned for vectorization because of "
kono
parents: 67
diff changeset
3074 "possible aliasing\n");
kono
parents: 67
diff changeset
3075 if (version_align)
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3076 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS | MSG_PRIORITY_USER_FACING,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
3077 vect_location,
111
kono
parents: 67
diff changeset
3078 "loop versioned for vectorization to enhance "
kono
parents: 67
diff changeset
3079 "alignment\n");
kono
parents: 67
diff changeset
3080
kono
parents: 67
diff changeset
3081 }
kono
parents: 67
diff changeset
3082 free_original_copy_tables ();
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3083
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3084 /* Loop versioning violates an assumption we try to maintain during
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3085 vectorization - that the loop exit block has a single predecessor.
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3086 After versioning, the exit block of both loop versions is the same
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3087 basic block (i.e. it has two predecessors). Just in order to simplify
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3088 following transformations in the vectorizer, we fix this situation
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3089 here by adding a new (empty) block on the exit-edge of the loop,
111
kono
parents: 67
diff changeset
3090 with the proper loop-exit phis to maintain loop-closed-form.
kono
parents: 67
diff changeset
3091 If loop versioning wasn't done from loop, but scalar_loop instead,
kono
parents: 67
diff changeset
3092 merge_bb will have already just a single successor. */
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3093
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3094 merge_bb = single_exit (loop)->dest;
111
kono
parents: 67
diff changeset
3095 if (scalar_loop == NULL || EDGE_COUNT (merge_bb->preds) >= 2)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3096 {
111
kono
parents: 67
diff changeset
3097 gcc_assert (EDGE_COUNT (merge_bb->preds) >= 2);
kono
parents: 67
diff changeset
3098 new_exit_bb = split_edge (single_exit (loop));
kono
parents: 67
diff changeset
3099 new_exit_e = single_exit (loop);
kono
parents: 67
diff changeset
3100 e = EDGE_SUCC (new_exit_bb, 0);
kono
parents: 67
diff changeset
3101
kono
parents: 67
diff changeset
3102 for (gsi = gsi_start_phis (merge_bb); !gsi_end_p (gsi); gsi_next (&gsi))
kono
parents: 67
diff changeset
3103 {
kono
parents: 67
diff changeset
3104 tree new_res;
kono
parents: 67
diff changeset
3105 orig_phi = gsi.phi ();
kono
parents: 67
diff changeset
3106 new_res = copy_ssa_name (PHI_RESULT (orig_phi));
kono
parents: 67
diff changeset
3107 new_phi = create_phi_node (new_res, new_exit_bb);
kono
parents: 67
diff changeset
3108 arg = PHI_ARG_DEF_FROM_EDGE (orig_phi, e);
kono
parents: 67
diff changeset
3109 add_phi_arg (new_phi, arg, new_exit_e,
kono
parents: 67
diff changeset
3110 gimple_phi_arg_location_from_edge (orig_phi, e));
kono
parents: 67
diff changeset
3111 adjust_phi_and_debug_stmts (orig_phi, e, PHI_RESULT (new_phi));
kono
parents: 67
diff changeset
3112 }
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3113 }
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3114
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3115 /* End loop-exit-fixes after versioning. */
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3116
111
kono
parents: 67
diff changeset
3117 if (cond_expr_stmt_list)
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3118 {
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3119 cond_exp_gsi = gsi_last_bb (condition_bb);
111
kono
parents: 67
diff changeset
3120 gsi_insert_seq_before (&cond_exp_gsi, cond_expr_stmt_list,
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3121 GSI_SAME_STMT);
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3122 }
111
kono
parents: 67
diff changeset
3123 update_ssa (TODO_update_ssa);
55
77e2b8dfacca update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
diff changeset
3124 }