annotate gcc/gimple-loop-versioning.cc @ 158:494b0b89df80 default tip

...
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Mon, 25 May 2020 18:13:55 +0900
parents 1830386684a0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
145
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1 /* Loop versioning pass.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
2 Copyright (C) 2018-2020 Free Software Foundation, Inc.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
3
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
4 This file is part of GCC.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
5
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
6 GCC is free software; you can redistribute it and/or modify it
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
7 under the terms of the GNU General Public License as published by the
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
8 Free Software Foundation; either version 3, or (at your option) any
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
9 later version.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
10
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
11 GCC is distributed in the hope that it will be useful, but WITHOUT
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
14 for more details.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
15
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
16 You should have received a copy of the GNU General Public License
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
17 along with GCC; see the file COPYING3. If not see
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
18 <http://www.gnu.org/licenses/>. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
19
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
20 #include "config.h"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
21 #include "system.h"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
22 #include "coretypes.h"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
23 #include "backend.h"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
24 #include "tree.h"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
25 #include "gimple.h"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
26 #include "gimple-iterator.h"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
27 #include "tree-pass.h"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
28 #include "gimplify-me.h"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
29 #include "cfgloop.h"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
30 #include "tree-ssa-loop.h"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
31 #include "ssa.h"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
32 #include "tree-scalar-evolution.h"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
33 #include "tree-chrec.h"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
34 #include "tree-ssa-loop-ivopts.h"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
35 #include "fold-const.h"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
36 #include "tree-ssa-propagate.h"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
37 #include "tree-inline.h"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
38 #include "domwalk.h"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
39 #include "alloc-pool.h"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
40 #include "vr-values.h"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
41 #include "gimple-ssa-evrp-analyze.h"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
42 #include "tree-vectorizer.h"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
43 #include "omp-general.h"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
44 #include "predict.h"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
45 #include "tree-into-ssa.h"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
46
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
47 namespace {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
48
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
49 /* This pass looks for loops that could be simplified if certain loop
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
50 invariant conditions were true. It is effectively a form of loop
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
51 splitting in which the pass produces the split conditions itself,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
52 instead of using ones that are already present in the IL.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
53
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
54 Versioning for when strides are 1
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
55 ---------------------------------
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
56
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
57 At the moment the only thing the pass looks for are memory references
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
58 like:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
59
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
60 for (auto i : ...)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
61 ...x[i * stride]...
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
62
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
63 It considers changing such loops to:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
64
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
65 if (stride == 1)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
66 for (auto i : ...) [A]
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
67 ...x[i]...
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
68 else
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
69 for (auto i : ...) [B]
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
70 ...x[i * stride]...
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
71
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
72 This can have several benefits:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
73
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
74 (1) [A] is often easier or cheaper to vectorize than [B].
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
75
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
76 (2) The scalar code in [A] is simpler than the scalar code in [B]
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
77 (if the loops cannot be vectorized or need an epilogue loop).
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
78
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
79 (3) We might recognize [A] as a pattern, such as a memcpy or memset.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
80
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
81 (4) [A] has simpler address evolutions, which can help other passes
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
82 like loop interchange.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
83
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
84 The optimization is particularly useful for assumed-shape arrays in
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
85 Fortran, where the stride of the innermost dimension depends on the
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
86 array descriptor but is often equal to 1 in practice. For example:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
87
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
88 subroutine f1(x)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
89 real :: x(:)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
90 x(:) = 100
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
91 end subroutine f1
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
92
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
93 generates the equivalent of:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
94
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
95 raw_stride = *x.dim[0].stride;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
96 stride = raw_stride != 0 ? raw_stride : 1;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
97 x_base = *x.data;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
98 ...
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
99 tmp1 = stride * S;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
100 tmp2 = tmp1 - stride;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
101 *x_base[tmp2] = 1.0e+2;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
102
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
103 but in the common case that stride == 1, the last three statements
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
104 simplify to:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
105
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
106 tmp3 = S + -1;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
107 *x_base[tmp3] = 1.0e+2;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
108
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
109 The optimization is in principle very simple. The difficult parts are:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
110
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
111 (a) deciding which parts of a general address calculation correspond
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
112 to the inner dimension of an array, since this usually isn't explicit
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
113 in the IL, and for C often isn't even explicit in the source code
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
114
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
115 (b) estimating when the transformation is worthwhile
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
116
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
117 Structure
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
118 ---------
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
119
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
120 The pass has four phases:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
121
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
122 (1) Walk through the statements looking for and recording potential
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
123 versioning opportunities. Stop if there are none.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
124
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
125 (2) Use context-sensitive range information to see whether any versioning
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
126 conditions are impossible in practice. Remove them if so, and stop
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
127 if no opportunities remain.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
128
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
129 (We do this only after (1) to keep compile time down when no
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
130 versioning opportunities exist.)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
131
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
132 (3) Apply the cost model. Decide which versioning opportunities are
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
133 worthwhile and at which nesting level they should be applied.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
134
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
135 (4) Attempt to version all the loops selected by (3), so that:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
136
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
137 for (...)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
138 ...
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
139
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
140 becomes:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
141
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
142 if (!cond)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
143 for (...) // Original loop
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
144 ...
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
145 else
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
146 for (...) // New loop
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
147 ...
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
148
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
149 Use the version condition COND to simplify the new loop. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
150
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
151 /* Enumerates the likelihood that a particular value indexes the inner
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
152 dimension of an array. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
153 enum inner_likelihood {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
154 INNER_UNLIKELY,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
155 INNER_DONT_KNOW,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
156 INNER_LIKELY
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
157 };
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
158
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
159 /* Information about one term of an address_info. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
160 struct address_term_info
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
161 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
162 /* The value of the term is EXPR * MULTIPLIER. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
163 tree expr;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
164 unsigned HOST_WIDE_INT multiplier;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
165
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
166 /* The stride applied by EXPR in each iteration of some unrecorded loop,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
167 or null if no stride has been identified. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
168 tree stride;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
169
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
170 /* Enumerates the likelihood that EXPR indexes the inner dimension
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
171 of an array. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
172 enum inner_likelihood inner_likelihood;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
173
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
174 /* True if STRIDE == 1 is a versioning opportunity when considered
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
175 in isolation. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
176 bool versioning_opportunity_p;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
177 };
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
178
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
179 /* Information about an address calculation, and the range of constant
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
180 offsets applied to it. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
181 class address_info
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
182 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
183 public:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
184 static const unsigned int MAX_TERMS = 8;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
185
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
186 /* One statement that calculates the address. If multiple statements
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
187 share the same address, we only record the first. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
188 gimple *stmt;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
189
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
190 /* The loop containing STMT (cached for convenience). If multiple
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
191 statements share the same address, they all belong to this loop. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
192 class loop *loop;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
193
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
194 /* A decomposition of the calculation into a sum of terms plus an
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
195 optional base. When BASE is provided, it is never an SSA name.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
196 Once initialization is complete, all members of TERMs are SSA names. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
197 tree base;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
198 auto_vec<address_term_info, MAX_TERMS> terms;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
199
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
200 /* All bytes accessed from the address fall in the offset range
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
201 [MIN_OFFSET, MAX_OFFSET). */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
202 HOST_WIDE_INT min_offset, max_offset;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
203 };
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
204
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
205 /* Stores addresses based on their base and terms (ignoring the offsets). */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
206 struct address_info_hasher : nofree_ptr_hash <address_info>
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
207 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
208 static hashval_t hash (const address_info *);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
209 static bool equal (const address_info *, const address_info *);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
210 };
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
211
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
212 /* Information about the versioning we'd like to apply to a loop. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
213 class loop_info
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
214 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
215 public:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
216 bool worth_versioning_p () const;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
217
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
218 /* True if we've decided not to version this loop. The remaining
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
219 fields are meaningless if so. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
220 bool rejected_p;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
221
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
222 /* True if at least one subloop of this loop benefits from versioning. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
223 bool subloops_benefit_p;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
224
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
225 /* An estimate of the total number of instructions in the loop,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
226 excluding those in subloops that benefit from versioning. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
227 unsigned int num_insns;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
228
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
229 /* The outermost loop that can handle all the version checks
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
230 described below. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
231 class loop *outermost;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
232
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
233 /* The first entry in the list of blocks that belong to this loop
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
234 (and not to subloops). m_next_block_in_loop provides the chain
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
235 pointers for the list. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
236 basic_block block_list;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
237
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
238 /* We'd like to version the loop for the case in which these SSA names
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
239 (keyed off their SSA_NAME_VERSION) are all equal to 1 at runtime. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
240 bitmap_head unity_names;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
241
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
242 /* If versioning succeeds, this points the version of the loop that
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
243 assumes the version conditions holds. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
244 class loop *optimized_loop;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
245 };
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
246
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
247 /* The main pass structure. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
248 class loop_versioning
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
249 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
250 public:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
251 loop_versioning (function *);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
252 ~loop_versioning ();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
253 unsigned int run ();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
254
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
255 private:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
256 /* Used to walk the dominator tree to find loop versioning conditions
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
257 that are always false. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
258 class lv_dom_walker : public dom_walker
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
259 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
260 public:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
261 lv_dom_walker (loop_versioning &);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
262
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
263 edge before_dom_children (basic_block) FINAL OVERRIDE;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
264 void after_dom_children (basic_block) FINAL OVERRIDE;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
265
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
266 private:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
267 /* The parent pass. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
268 loop_versioning &m_lv;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
269
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
270 /* Used to build context-dependent range information. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
271 evrp_range_analyzer m_range_analyzer;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
272 };
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
273
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
274 /* Used to simplify statements based on conditions that are established
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
275 by the version checks. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
276 class name_prop : public substitute_and_fold_engine
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
277 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
278 public:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
279 name_prop (loop_info &li) : m_li (li) {}
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
280 tree get_value (tree) FINAL OVERRIDE;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
281
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
282 private:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
283 /* Information about the versioning we've performed on the loop. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
284 loop_info &m_li;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
285 };
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
286
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
287 loop_info &get_loop_info (class loop *loop) { return m_loops[loop->num]; }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
288
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
289 unsigned int max_insns_for_loop (class loop *);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
290 bool expensive_stmt_p (gimple *);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
291
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
292 void version_for_unity (gimple *, tree);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
293 bool acceptable_multiplier_p (tree, unsigned HOST_WIDE_INT,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
294 unsigned HOST_WIDE_INT * = 0);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
295 bool acceptable_type_p (tree, unsigned HOST_WIDE_INT *);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
296 bool multiply_term_by (address_term_info &, tree);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
297 inner_likelihood get_inner_likelihood (tree, unsigned HOST_WIDE_INT);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
298 void dump_inner_likelihood (address_info &, address_term_info &);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
299 void analyze_stride (address_info &, address_term_info &,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
300 tree, class loop *);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
301 bool find_per_loop_multiplication (address_info &, address_term_info &);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
302 bool analyze_term_using_scevs (address_info &, address_term_info &);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
303 void analyze_arbitrary_term (address_info &, address_term_info &);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
304 void analyze_address_fragment (address_info &);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
305 void record_address_fragment (gimple *, unsigned HOST_WIDE_INT,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
306 tree, unsigned HOST_WIDE_INT, HOST_WIDE_INT);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
307 void analyze_expr (gimple *, tree);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
308 bool analyze_block (basic_block);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
309 bool analyze_blocks ();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
310
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
311 void prune_loop_conditions (class loop *, vr_values *);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
312 bool prune_conditions ();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
313
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
314 void merge_loop_info (class loop *, class loop *);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
315 void add_loop_to_queue (class loop *);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
316 bool decide_whether_loop_is_versionable (class loop *);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
317 bool make_versioning_decisions ();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
318
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
319 bool version_loop (class loop *);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
320 void implement_versioning_decisions ();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
321
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
322 /* The function we're optimizing. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
323 function *m_fn;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
324
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
325 /* The obstack to use for all pass-specific bitmaps. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
326 bitmap_obstack m_bitmap_obstack;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
327
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
328 /* An obstack to use for general allocation. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
329 obstack m_obstack;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
330
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
331 /* The number of loops in the function. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
332 unsigned int m_nloops;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
333
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
334 /* The total number of loop version conditions we've found. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
335 unsigned int m_num_conditions;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
336
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
337 /* Assume that an address fragment of the form i * stride * scale
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
338 (for variable stride and constant scale) will not benefit from
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
339 versioning for stride == 1 when scale is greater than this value. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
340 unsigned HOST_WIDE_INT m_maximum_scale;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
341
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
342 /* Information about each loop. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
343 auto_vec<loop_info> m_loops;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
344
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
345 /* Used to form a linked list of blocks that belong to a loop,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
346 started by loop_info::block_list. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
347 auto_vec<basic_block> m_next_block_in_loop;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
348
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
349 /* The list of loops that we've decided to version. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
350 auto_vec<class loop *> m_loops_to_version;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
351
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
352 /* A table of addresses in the current loop, keyed off their values
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
353 but not their offsets. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
354 hash_table <address_info_hasher> m_address_table;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
355
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
356 /* A list of all addresses in M_ADDRESS_TABLE, in a predictable order. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
357 auto_vec <address_info *, 32> m_address_list;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
358 };
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
359
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
360 /* If EXPR is an SSA name and not a default definition, return the
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
361 defining statement, otherwise return null. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
362
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
363 static gimple *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
364 maybe_get_stmt (tree expr)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
365 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
366 if (TREE_CODE (expr) == SSA_NAME && !SSA_NAME_IS_DEFAULT_DEF (expr))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
367 return SSA_NAME_DEF_STMT (expr);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
368 return NULL;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
369 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
370
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
371 /* Like maybe_get_stmt, but also return null if the defining
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
372 statement isn't an assignment. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
373
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
374 static gassign *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
375 maybe_get_assign (tree expr)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
376 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
377 return safe_dyn_cast <gassign *> (maybe_get_stmt (expr));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
378 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
379
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
380 /* Return true if this pass should look through a cast of expression FROM
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
381 to type TYPE when analyzing pieces of an address. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
382
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
383 static bool
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
384 look_through_cast_p (tree type, tree from)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
385 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
386 return (INTEGRAL_TYPE_P (TREE_TYPE (from)) == INTEGRAL_TYPE_P (type)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
387 && POINTER_TYPE_P (TREE_TYPE (from)) == POINTER_TYPE_P (type));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
388 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
389
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
390 /* Strip all conversions of integers or pointers from EXPR, regardless
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
391 of whether the conversions are nops. This is useful in the context
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
392 of this pass because we're not trying to fold or simulate the
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
393 expression; we just want to see how it's structured. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
394
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
395 static tree
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
396 strip_casts (tree expr)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
397 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
398 const unsigned int MAX_NITERS = 4;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
399
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
400 tree type = TREE_TYPE (expr);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
401 while (CONVERT_EXPR_P (expr)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
402 && look_through_cast_p (type, TREE_OPERAND (expr, 0)))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
403 expr = TREE_OPERAND (expr, 0);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
404
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
405 for (unsigned int niters = 0; niters < MAX_NITERS; ++niters)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
406 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
407 gassign *assign = maybe_get_assign (expr);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
408 if (assign
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
409 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
410 && look_through_cast_p (type, gimple_assign_rhs1 (assign)))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
411 expr = gimple_assign_rhs1 (assign);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
412 else
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
413 break;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
414 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
415 return expr;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
416 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
417
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
418 /* Compare two address_term_infos in the same address_info. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
419
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
420 static int
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
421 compare_address_terms (const void *a_uncast, const void *b_uncast)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
422 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
423 const address_term_info *a = (const address_term_info *) a_uncast;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
424 const address_term_info *b = (const address_term_info *) b_uncast;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
425
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
426 if (a->expr != b->expr)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
427 return SSA_NAME_VERSION (a->expr) < SSA_NAME_VERSION (b->expr) ? -1 : 1;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
428
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
429 if (a->multiplier != b->multiplier)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
430 return a->multiplier < b->multiplier ? -1 : 1;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
431
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
432 return 0;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
433 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
434
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
435 /* Dump ADDRESS using flags FLAGS. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
436
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
437 static void
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
438 dump_address_info (dump_flags_t flags, address_info &address)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
439 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
440 if (address.base)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
441 dump_printf (flags, "%T + ", address.base);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
442 for (unsigned int i = 0; i < address.terms.length (); ++i)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
443 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
444 if (i != 0)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
445 dump_printf (flags, " + ");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
446 dump_printf (flags, "%T", address.terms[i].expr);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
447 if (address.terms[i].multiplier != 1)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
448 dump_printf (flags, " * %wd", address.terms[i].multiplier);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
449 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
450 dump_printf (flags, " + [%wd, %wd]",
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
451 address.min_offset, address.max_offset - 1);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
452 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
453
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
454 /* Hash an address_info based on its base and terms. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
455
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
456 hashval_t
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
457 address_info_hasher::hash (const address_info *info)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
458 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
459 inchash::hash hash;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
460 hash.add_int (info->base ? TREE_CODE (info->base) : 0);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
461 hash.add_int (info->terms.length ());
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
462 for (unsigned int i = 0; i < info->terms.length (); ++i)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
463 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
464 hash.add_int (SSA_NAME_VERSION (info->terms[i].expr));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
465 hash.add_hwi (info->terms[i].multiplier);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
466 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
467 return hash.end ();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
468 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
469
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
470 /* Return true if two address_infos have equal bases and terms. Other
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
471 properties might be different (such as the statement or constant
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
472 offset range). */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
473
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
474 bool
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
475 address_info_hasher::equal (const address_info *a, const address_info *b)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
476 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
477 if (a->base != b->base
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
478 && (!a->base || !b->base || !operand_equal_p (a->base, b->base, 0)))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
479 return false;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
480
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
481 if (a->terms.length () != b->terms.length ())
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
482 return false;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
483
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
484 for (unsigned int i = 0; i < a->terms.length (); ++i)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
485 if (a->terms[i].expr != b->terms[i].expr
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
486 || a->terms[i].multiplier != b->terms[i].multiplier)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
487 return false;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
488
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
489 return true;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
490 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
491
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
492 /* Return true if we want to version the loop, i.e. if we have a
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
493 specific reason for doing so and no specific reason not to. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
494
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
495 bool
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
496 loop_info::worth_versioning_p () const
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
497 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
498 return (!rejected_p
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
499 && (!bitmap_empty_p (&unity_names) || subloops_benefit_p));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
500 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
501
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
502 loop_versioning::lv_dom_walker::lv_dom_walker (loop_versioning &lv)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
503 : dom_walker (CDI_DOMINATORS), m_lv (lv), m_range_analyzer (false)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
504 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
505 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
506
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
507 /* Process BB before processing the blocks it dominates. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
508
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
509 edge
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
510 loop_versioning::lv_dom_walker::before_dom_children (basic_block bb)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
511 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
512 m_range_analyzer.enter (bb);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
513
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
514 if (bb == bb->loop_father->header)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
515 m_lv.prune_loop_conditions (bb->loop_father,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
516 m_range_analyzer.get_vr_values ());
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
517
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
518 for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
519 gsi_next (&si))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
520 m_range_analyzer.record_ranges_from_stmt (gsi_stmt (si), false);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
521
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
522 return NULL;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
523 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
524
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
525 /* Process BB after processing the blocks it dominates. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
526
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
527 void
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
528 loop_versioning::lv_dom_walker::after_dom_children (basic_block bb)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
529 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
530 m_range_analyzer.leave (bb);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
531 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
532
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
533 /* Decide whether to replace VAL with a new value in a versioned loop.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
534 Return the new value if so, otherwise return null. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
535
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
536 tree
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
537 loop_versioning::name_prop::get_value (tree val)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
538 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
539 if (TREE_CODE (val) == SSA_NAME
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
540 && bitmap_bit_p (&m_li.unity_names, SSA_NAME_VERSION (val)))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
541 return build_one_cst (TREE_TYPE (val));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
542 return NULL_TREE;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
543 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
544
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
545 /* Initialize the structure to optimize FN. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
546
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
547 loop_versioning::loop_versioning (function *fn)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
548 : m_fn (fn),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
549 m_nloops (number_of_loops (fn)),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
550 m_num_conditions (0),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
551 m_address_table (31)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
552 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
553 bitmap_obstack_initialize (&m_bitmap_obstack);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
554 gcc_obstack_init (&m_obstack);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
555
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
556 /* Initialize the loop information. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
557 m_loops.safe_grow_cleared (m_nloops);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
558 for (unsigned int i = 0; i < m_nloops; ++i)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
559 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
560 m_loops[i].outermost = get_loop (m_fn, 0);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
561 bitmap_initialize (&m_loops[i].unity_names, &m_bitmap_obstack);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
562 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
563
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
564 /* Initialize the list of blocks that belong to each loop. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
565 unsigned int nbbs = last_basic_block_for_fn (fn);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
566 m_next_block_in_loop.safe_grow (nbbs);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
567 basic_block bb;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
568 FOR_EACH_BB_FN (bb, fn)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
569 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
570 loop_info &li = get_loop_info (bb->loop_father);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
571 m_next_block_in_loop[bb->index] = li.block_list;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
572 li.block_list = bb;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
573 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
574
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
575 /* MAX_FIXED_MODE_SIZE should be a reasonable maximum scale for
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
576 unvectorizable code, since it is the largest size that can be
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
577 handled efficiently by scalar code. omp_max_vf calculates the
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
578 maximum number of bytes in a vector, when such a value is relevant
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
579 to loop optimization. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
580 m_maximum_scale = estimated_poly_value (omp_max_vf ());
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
581 m_maximum_scale = MAX (m_maximum_scale, MAX_FIXED_MODE_SIZE);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
582 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
583
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
584 loop_versioning::~loop_versioning ()
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
585 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
586 bitmap_obstack_release (&m_bitmap_obstack);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
587 obstack_free (&m_obstack, NULL);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
588 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
589
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
590 /* Return the maximum number of instructions allowed in LOOP before
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
591 it becomes too big for versioning.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
592
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
593 There are separate limits for inner and outer loops. The limit for
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
594 inner loops applies only to loops that benefit directly from versioning.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
595 The limit for outer loops applies to all code in the outer loop and
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
596 its subloops that *doesn't* benefit directly from versioning; such code
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
597 would be "taken along for the ride". The idea is that if the cost of
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
598 the latter is small, it is better to version outer loops rather than
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
599 inner loops, both to reduce the number of repeated checks and to enable
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
600 more of the loop nest to be optimized as a natural nest (e.g. by loop
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
601 interchange or outer-loop vectorization). */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
602
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
603 unsigned int
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
604 loop_versioning::max_insns_for_loop (class loop *loop)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
605 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
606 return (loop->inner
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
607 ? param_loop_versioning_max_outer_insns
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
608 : param_loop_versioning_max_inner_insns);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
609 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
610
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
611 /* Return true if for cost reasons we should avoid versioning any loop
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
612 that contains STMT.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
613
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
614 Note that we don't need to check whether versioning is invalid for
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
615 correctness reasons, since the versioning process does that for us.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
616 The conditions involved are too rare to be worth duplicating here. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
617
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
618 bool
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
619 loop_versioning::expensive_stmt_p (gimple *stmt)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
620 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
621 if (gcall *call = dyn_cast <gcall *> (stmt))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
622 /* Assume for now that the time spent in an "expensive" call would
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
623 overwhelm any saving from versioning. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
624 return !gimple_inexpensive_call_p (call);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
625 return false;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
626 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
627
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
628 /* Record that we want to version the loop that contains STMT for the
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
629 case in which SSA name NAME is equal to 1. We already know that NAME
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
630 is invariant in the loop. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
631
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
632 void
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
633 loop_versioning::version_for_unity (gimple *stmt, tree name)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
634 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
635 class loop *loop = loop_containing_stmt (stmt);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
636 loop_info &li = get_loop_info (loop);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
637
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
638 if (bitmap_set_bit (&li.unity_names, SSA_NAME_VERSION (name)))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
639 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
640 /* This is the first time we've wanted to version LOOP for NAME.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
641 Keep track of the outermost loop that can handle all versioning
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
642 checks in LI. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
643 class loop *outermost
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
644 = outermost_invariant_loop_for_expr (loop, name);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
645 if (loop_depth (li.outermost) < loop_depth (outermost))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
646 li.outermost = outermost;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
647
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
648 if (dump_enabled_p ())
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
649 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
650 dump_printf_loc (MSG_NOTE, stmt, "want to version containing loop"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
651 " for when %T == 1", name);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
652 if (outermost == loop)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
653 dump_printf (MSG_NOTE, "; cannot hoist check further");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
654 else
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
655 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
656 dump_printf (MSG_NOTE, "; could implement the check at loop"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
657 " depth %d", loop_depth (outermost));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
658 if (loop_depth (li.outermost) > loop_depth (outermost))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
659 dump_printf (MSG_NOTE, ", but other checks only allow"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
660 " a depth of %d", loop_depth (li.outermost));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
661 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
662 dump_printf (MSG_NOTE, "\n");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
663 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
664
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
665 m_num_conditions += 1;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
666 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
667 else
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
668 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
669 /* This is a duplicate request. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
670 if (dump_enabled_p ())
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
671 dump_printf_loc (MSG_NOTE, stmt, "already asked to version containing"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
672 " loop for when %T == 1\n", name);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
673 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
674 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
675
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
676 /* Return true if OP1_TREE is constant and if in principle it is worth
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
677 versioning an address fragment of the form:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
678
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
679 i * OP1_TREE * OP2 * stride
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
680
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
681 for the case in which stride == 1. This in practice means testing
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
682 whether:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
683
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
684 OP1_TREE * OP2 <= M_MAXIMUM_SCALE.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
685
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
686 If RESULT is nonnull, store OP1_TREE * OP2 there when returning true. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
687
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
688 bool
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
689 loop_versioning::acceptable_multiplier_p (tree op1_tree,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
690 unsigned HOST_WIDE_INT op2,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
691 unsigned HOST_WIDE_INT *result)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
692 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
693 if (tree_fits_uhwi_p (op1_tree))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
694 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
695 unsigned HOST_WIDE_INT op1 = tree_to_uhwi (op1_tree);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
696 /* The first part checks for overflow. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
697 if (op1 * op2 >= op2 && op1 * op2 <= m_maximum_scale)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
698 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
699 if (result)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
700 *result = op1 * op2;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
701 return true;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
702 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
703 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
704 return false;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
705 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
706
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
707 /* Return true if it is worth using loop versioning on a memory access
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
708 of type TYPE. Store the size of the access in *SIZE if so. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
709
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
710 bool
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
711 loop_versioning::acceptable_type_p (tree type, unsigned HOST_WIDE_INT *size)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
712 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
713 return (TYPE_SIZE_UNIT (type)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
714 && acceptable_multiplier_p (TYPE_SIZE_UNIT (type), 1, size));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
715 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
716
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
717 /* See whether OP is constant and whether we can multiply TERM by that
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
718 constant without exceeding M_MAXIMUM_SCALE. Return true and update
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
719 TERM if so. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
720
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
721 bool
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
722 loop_versioning::multiply_term_by (address_term_info &term, tree op)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
723 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
724 return acceptable_multiplier_p (op, term.multiplier, &term.multiplier);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
725 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
726
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
727 /* Decide whether an address fragment of the form STRIDE * MULTIPLIER
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
728 is likely to be indexing an innermost dimension, returning the result
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
729 as an INNER_* probability. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
730
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
731 inner_likelihood
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
732 loop_versioning::get_inner_likelihood (tree stride,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
733 unsigned HOST_WIDE_INT multiplier)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
734 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
735 const unsigned int MAX_NITERS = 8;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
736
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
737 /* Iterate over possible values of STRIDE. Return INNER_LIKELY if at
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
738 least one of those values is likely to be for the innermost dimension.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
739 Record in UNLIKELY_P if at least one of those values is unlikely to be
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
740 for the innermost dimension.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
741
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
742 E.g. for:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
743
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
744 stride = cond ? a * b : 1
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
745
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
746 we should treat STRIDE as being a likely inner dimension, since
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
747 we know that it is 1 under at least some circumstances. (See the
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
748 Fortran example below.) However:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
749
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
750 stride = a * b
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
751
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
752 on its own is unlikely to be for the innermost dimension, since
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
753 that would require both a and b to be 1 at runtime. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
754 bool unlikely_p = false;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
755 tree worklist[MAX_NITERS];
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
756 unsigned int length = 0;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
757 worklist[length++] = stride;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
758 for (unsigned int i = 0; i < length; ++i)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
759 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
760 tree expr = worklist[i];
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
761
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
762 if (CONSTANT_CLASS_P (expr))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
763 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
764 /* See if EXPR * MULTIPLIER would be consistent with an individual
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
765 access or a small grouped access. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
766 if (acceptable_multiplier_p (expr, multiplier))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
767 return INNER_LIKELY;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
768 else
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
769 unlikely_p = true;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
770 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
771 else if (gimple *stmt = maybe_get_stmt (expr))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
772 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
773 /* If EXPR is set by a PHI node, queue its arguments in case
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
774 we find one that is consistent with an inner dimension.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
775
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
776 An important instance of this is the Fortran handling of array
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
777 descriptors, which calculates the stride of the inner dimension
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
778 using a PHI equivalent of:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
779
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
780 raw_stride = a.dim[0].stride;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
781 stride = raw_stride != 0 ? raw_stride : 1;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
782
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
783 (Strides for outer dimensions do not treat 0 specially.) */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
784 if (gphi *phi = dyn_cast <gphi *> (stmt))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
785 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
786 unsigned int nargs = gimple_phi_num_args (phi);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
787 for (unsigned int j = 0; j < nargs && length < MAX_NITERS; ++j)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
788 worklist[length++] = strip_casts (gimple_phi_arg_def (phi, j));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
789 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
790 /* If the value is set by an assignment, expect it to be read
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
791 from memory (such as an array descriptor) rather than be
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
792 calculated. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
793 else if (gassign *assign = dyn_cast <gassign *> (stmt))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
794 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
795 if (!gimple_assign_load_p (assign))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
796 unlikely_p = true;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
797 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
798 /* Things like calls don't really tell us anything. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
799 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
800 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
801
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
802 /* We didn't find any possible values of STRIDE that were likely to be
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
803 for the innermost dimension. If we found one that was actively
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
804 unlikely to be for the innermost dimension, assume that that applies
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
805 to STRIDE too. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
806 return unlikely_p ? INNER_UNLIKELY : INNER_DONT_KNOW;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
807 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
808
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
809 /* Dump the likelihood that TERM's stride is for the innermost dimension.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
810 ADDRESS is the address that contains TERM. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
811
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
812 void
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
813 loop_versioning::dump_inner_likelihood (address_info &address,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
814 address_term_info &term)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
815 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
816 if (term.inner_likelihood == INNER_LIKELY)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
817 dump_printf_loc (MSG_NOTE, address.stmt, "%T is likely to be the"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
818 " innermost dimension\n", term.stride);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
819 else if (term.inner_likelihood == INNER_UNLIKELY)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
820 dump_printf_loc (MSG_NOTE, address.stmt, "%T is probably not the"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
821 " innermost dimension\n", term.stride);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
822 else
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
823 dump_printf_loc (MSG_NOTE, address.stmt, "cannot tell whether %T"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
824 " is the innermost dimension\n", term.stride);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
825 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
826
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
827 /* The caller has identified that STRIDE is the stride of interest
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
828 in TERM, and that the stride is applied in OP_LOOP. Record this
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
829 information in TERM, deciding whether STRIDE is likely to be for
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
830 the innermost dimension of an array and whether it represents a
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
831 versioning opportunity. ADDRESS is the address that contains TERM. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
832
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
833 void
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
834 loop_versioning::analyze_stride (address_info &address,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
835 address_term_info &term,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
836 tree stride, class loop *op_loop)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
837 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
838 term.stride = stride;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
839
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
840 term.inner_likelihood = get_inner_likelihood (stride, term.multiplier);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
841 if (dump_enabled_p ())
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
842 dump_inner_likelihood (address, term);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
843
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
844 /* To be a versioning opportunity we require:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
845
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
846 - The multiplier applied by TERM is equal to the access size,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
847 so that when STRIDE is 1, the accesses in successive loop
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
848 iterations are consecutive.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
849
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
850 This is deliberately conservative. We could relax it to handle
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
851 other cases (such as those with gaps between iterations) if we
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
852 find any real testcases for which it's useful.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
853
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
854 - the stride is applied in the same loop as STMT rather than
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
855 in an outer loop. Although versioning for strides applied in
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
856 outer loops could help in some cases -- such as enabling
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
857 more loop interchange -- the savings are much lower than for
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
858 inner loops.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
859
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
860 - the stride is an SSA name that is invariant in STMT's loop,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
861 since otherwise versioning isn't possible. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
862 unsigned HOST_WIDE_INT access_size = address.max_offset - address.min_offset;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
863 if (term.multiplier == access_size
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
864 && address.loop == op_loop
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
865 && TREE_CODE (stride) == SSA_NAME
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
866 && expr_invariant_in_loop_p (address.loop, stride))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
867 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
868 term.versioning_opportunity_p = true;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
869 if (dump_enabled_p ())
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
870 dump_printf_loc (MSG_NOTE, address.stmt, "%T == 1 is a versioning"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
871 " opportunity\n", stride);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
872 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
873 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
874
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
875 /* See whether address term TERM (which belongs to ADDRESS) is the result
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
876 of multiplying a varying SSA name by a loop-invariant SSA name.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
877 Return true and update TERM if so.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
878
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
879 This handles both cases that SCEV might handle, such as:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
880
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
881 for (int i = 0; i < n; ++i)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
882 res += a[i * stride];
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
883
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
884 and ones in which the term varies arbitrarily between iterations, such as:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
885
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
886 for (int i = 0; i < n; ++i)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
887 res += a[index[i] * stride]; */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
888
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
889 bool
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
890 loop_versioning::find_per_loop_multiplication (address_info &address,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
891 address_term_info &term)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
892 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
893 gassign *mult = maybe_get_assign (term.expr);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
894 if (!mult || gimple_assign_rhs_code (mult) != MULT_EXPR)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
895 return false;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
896
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
897 class loop *mult_loop = loop_containing_stmt (mult);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
898 if (!loop_outer (mult_loop))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
899 return false;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
900
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
901 tree op1 = strip_casts (gimple_assign_rhs1 (mult));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
902 tree op2 = strip_casts (gimple_assign_rhs2 (mult));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
903 if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
904 return false;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
905
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
906 bool invariant1_p = expr_invariant_in_loop_p (mult_loop, op1);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
907 bool invariant2_p = expr_invariant_in_loop_p (mult_loop, op2);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
908 if (invariant1_p == invariant2_p)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
909 return false;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
910
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
911 /* Make sure that the loop invariant is OP2 rather than OP1. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
912 if (invariant1_p)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
913 std::swap (op1, op2);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
914
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
915 if (dump_enabled_p ())
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
916 dump_printf_loc (MSG_NOTE, address.stmt, "address term %T = varying %T"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
917 " * loop-invariant %T\n", term.expr, op1, op2);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
918 analyze_stride (address, term, op2, mult_loop);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
919 return true;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
920 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
921
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
922 /* Try to use scalar evolutions to find an address stride for TERM,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
923 which belongs to ADDRESS. Return true and update TERM if so.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
924
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
925 Here we are interested in any evolution information we can find,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
926 not just evolutions wrt ADDRESS->LOOP. For example, if we find that
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
927 an outer loop obviously iterates over the inner dimension of an array,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
928 that information can help us eliminate worthless versioning opportunities
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
929 in inner loops. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
930
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
931 bool
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
932 loop_versioning::analyze_term_using_scevs (address_info &address,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
933 address_term_info &term)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
934 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
935 gimple *setter = maybe_get_stmt (term.expr);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
936 if (!setter)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
937 return false;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
938
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
939 class loop *wrt_loop = loop_containing_stmt (setter);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
940 if (!loop_outer (wrt_loop))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
941 return false;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
942
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
943 tree chrec = strip_casts (analyze_scalar_evolution (wrt_loop, term.expr));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
944 if (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
945 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
946 if (dump_enabled_p ())
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
947 dump_printf_loc (MSG_NOTE, address.stmt,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
948 "address term %T = %T\n", term.expr, chrec);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
949
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
950 /* Peel casts and accumulate constant multiplications, up to the
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
951 limit allowed by M_MAXIMUM_SCALE. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
952 tree stride = strip_casts (CHREC_RIGHT (chrec));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
953 while (TREE_CODE (stride) == MULT_EXPR
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
954 && multiply_term_by (term, TREE_OPERAND (stride, 1)))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
955 stride = strip_casts (TREE_OPERAND (stride, 0));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
956
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
957 gassign *assign;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
958 while ((assign = maybe_get_assign (stride))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
959 && gimple_assign_rhs_code (assign) == MULT_EXPR
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
960 && multiply_term_by (term, gimple_assign_rhs2 (assign)))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
961 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
962 if (dump_enabled_p ())
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
963 dump_printf_loc (MSG_NOTE, address.stmt,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
964 "looking through %G", assign);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
965 stride = strip_casts (gimple_assign_rhs1 (assign));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
966 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
967
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
968 analyze_stride (address, term, stride, get_chrec_loop (chrec));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
969 return true;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
970 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
971
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
972 return false;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
973 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
974
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
975 /* Address term TERM is an arbitrary term that provides no versioning
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
976 opportunities. Analyze it to see whether it contains any likely
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
977 inner strides, so that we don't mistakenly version for other
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
978 (less likely) candidates.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
979
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
980 This copes with invariant innermost indices such as:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
981
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
982 x(i, :) = 100
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
983
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
984 where the "i" component of the address is invariant in the loop
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
985 but provides the real inner stride.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
986
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
987 ADDRESS is the address that contains TERM. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
988
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
989 void
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
990 loop_versioning::analyze_arbitrary_term (address_info &address,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
991 address_term_info &term)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
992
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
993 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
994 /* A multiplication offers two potential strides. Pick the one that
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
995 is most likely to be an innermost stride. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
996 tree expr = term.expr, alt = NULL_TREE;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
997 gassign *mult = maybe_get_assign (expr);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
998 if (mult && gimple_assign_rhs_code (mult) == MULT_EXPR)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
999 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1000 expr = strip_casts (gimple_assign_rhs1 (mult));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1001 alt = strip_casts (gimple_assign_rhs2 (mult));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1002 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1003 term.stride = expr;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1004 term.inner_likelihood = get_inner_likelihood (expr, term.multiplier);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1005 if (alt)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1006 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1007 inner_likelihood alt_l = get_inner_likelihood (alt, term.multiplier);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1008 if (alt_l > term.inner_likelihood)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1009 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1010 term.stride = alt;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1011 term.inner_likelihood = alt_l;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1012 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1013 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1014 if (dump_enabled_p ())
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1015 dump_inner_likelihood (address, term);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1016 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1017
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1018 /* Try to identify loop strides in ADDRESS and try to choose realistic
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1019 versioning opportunities based on these strides.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1020
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1021 The main difficulty here isn't finding strides that could be used
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1022 in a version check (that's pretty easy). The problem instead is to
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1023 avoid versioning for some stride S that is unlikely ever to be 1 at
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1024 runtime. Versioning for S == 1 on its own would lead to unnecessary
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1025 code bloat, while adding S == 1 to more realistic version conditions
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1026 would lose the optimisation opportunity offered by those other conditions.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1027
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1028 For example, versioning for a stride of 1 in the Fortran code:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1029
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1030 integer :: a(:,:)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1031 a(1,:) = 1
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1032
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1033 is not usually a good idea, since the assignment is iterating over
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1034 an outer dimension and is relatively unlikely to have a stride of 1.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1035 (It isn't impossible, since the inner dimension might be 1, or the
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1036 array might be transposed.) Similarly, in:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1037
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1038 integer :: a(:,:), b(:,:)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1039 b(:,1) = a(1,:)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1040
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1041 b(:,1) is relatively likely to have a stride of 1 while a(1,:) isn't.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1042 Versioning for when both strides are 1 would lose most of the benefit
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1043 of versioning for b's access.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1044
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1045 The approach we take is as follows:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1046
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1047 - Analyze each term to see whether it has an identifiable stride,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1048 regardless of which loop applies the stride.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1049
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1050 - Evaluate the likelihood that each such stride is for the innermost
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1051 dimension of an array, on the scale "likely", "don't know" or "unlikely".
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1052
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1053 - If there is a single "likely" innermost stride, and that stride is
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1054 applied in the loop that contains STMT, version the loop for when the
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1055 stride is 1. This deals with the cases in which we're fairly
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1056 confident of doing the right thing, such as the b(:,1) reference above.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1057
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1058 - If there are no "likely" innermost strides, and the loop that contains
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1059 STMT uses a stride that we rated as "don't know", version for when
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1060 that stride is 1. This is principally used for C code such as:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1061
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1062 for (int i = 0; i < n; ++i)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1063 a[i * x] = ...;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1064
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1065 and:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1066
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1067 for (int j = 0; j < n; ++j)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1068 for (int i = 0; i < n; ++i)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1069 a[i * x + j * y] = ...;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1070
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1071 where nothing in the way "x" and "y" are set gives a hint as to
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1072 whether "i" iterates over the innermost dimension of the array.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1073 In these situations it seems reasonable to assume the the
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1074 programmer has nested the loops appropriately (although of course
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1075 there are examples like GEMM in which this assumption doesn't hold
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1076 for all accesses in the loop).
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1077
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1078 This case is also useful for the Fortran equivalent of the
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1079 above C code. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1080
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1081 void
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1082 loop_versioning::analyze_address_fragment (address_info &address)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1083 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1084 if (dump_enabled_p ())
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1085 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1086 dump_printf_loc (MSG_NOTE, address.stmt, "analyzing address fragment ");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1087 dump_address_info (MSG_NOTE, address);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1088 dump_printf (MSG_NOTE, "\n");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1089 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1090
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1091 /* Analyze each component of the sum to see whether it involves an
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1092 apparent stride.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1093
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1094 There is an overlap between the addresses that
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1095 find_per_loop_multiplication and analyze_term_using_scevs can handle,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1096 but the former is much cheaper than SCEV analysis, so try it first. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1097 for (unsigned int i = 0; i < address.terms.length (); ++i)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1098 if (!find_per_loop_multiplication (address, address.terms[i])
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1099 && !analyze_term_using_scevs (address, address.terms[i])
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1100 && !POINTER_TYPE_P (TREE_TYPE (address.terms[i].expr)))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1101 analyze_arbitrary_term (address, address.terms[i]);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1102
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1103 /* Check for strides that are likely to be for the innermost dimension.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1104
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1105 1. If there is a single likely inner stride, if it is an SSA name,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1106 and if it is worth versioning the loop for when the SSA name
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1107 equals 1, record that we want to do so.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1108
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1109 2. Otherwise, if there any likely inner strides, bail out. This means
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1110 one of:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1111
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1112 (a) There are multiple likely inner strides. This suggests we're
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1113 confused and be can't be confident of doing the right thing.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1114
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1115 (b) There is a single likely inner stride and it is a constant
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1116 rather than an SSA name. This can mean either that the access
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1117 is a natural one without any variable strides, such as:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1118
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1119 for (int i = 0; i < n; ++i)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1120 a[i] += 1;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1121
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1122 or that a variable stride is applied to an outer dimension,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1123 such as:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1124
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1125 for (int i = 0; i < n; ++i)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1126 for (int j = 0; j < n; ++j)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1127 a[j * stride][i] += 1;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1128
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1129 (c) There is a single likely inner stride, and it is an SSA name,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1130 but it isn't a worthwhile versioning opportunity. This usually
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1131 means that the variable stride is applied by an outer loop,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1132 such as:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1133
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1134 for (int i = 0; i < n; ++i)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1135 for (int j = 0; j < n; ++j)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1136 a[j][i * stride] += 1;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1137
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1138 or (using an example with a more natural loop nesting):
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1139
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1140 for (int i = 0; i < n; ++i)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1141 for (int j = 0; j < n; ++j)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1142 a[i][j] += b[i * stride];
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1143
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1144 in cases where b[i * stride] cannot (yet) be hoisted for
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1145 aliasing reasons.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1146
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1147 3. If there are no likely inner strides, fall through to the next
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1148 set of checks.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1149
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1150 Pointer equality is enough to check for uniqueness in (1), since we
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1151 only care about SSA names. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1152 tree chosen_stride = NULL_TREE;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1153 tree version_stride = NULL_TREE;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1154 for (unsigned int i = 0; i < address.terms.length (); ++i)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1155 if (chosen_stride != address.terms[i].stride
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1156 && address.terms[i].inner_likelihood == INNER_LIKELY)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1157 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1158 if (chosen_stride)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1159 return;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1160 chosen_stride = address.terms[i].stride;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1161 if (address.terms[i].versioning_opportunity_p)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1162 version_stride = chosen_stride;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1163 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1164
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1165 /* If there are no likely inner strides, see if there is a single
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1166 versioning opportunity for a stride that was rated as INNER_DONT_KNOW.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1167 See the comment above the function for the cases that this code
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1168 handles. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1169 if (!chosen_stride)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1170 for (unsigned int i = 0; i < address.terms.length (); ++i)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1171 if (version_stride != address.terms[i].stride
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1172 && address.terms[i].inner_likelihood == INNER_DONT_KNOW
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1173 && address.terms[i].versioning_opportunity_p)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1174 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1175 if (version_stride)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1176 return;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1177 version_stride = address.terms[i].stride;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1178 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1179
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1180 if (version_stride)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1181 version_for_unity (address.stmt, version_stride);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1182 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1183
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1184 /* Treat EXPR * MULTIPLIER + OFFSET as a fragment of an address that addresses
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1185 TYPE_SIZE bytes and record this address fragment for later processing.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1186 STMT is the statement that contains the address. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1187
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1188 void
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1189 loop_versioning::record_address_fragment (gimple *stmt,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1190 unsigned HOST_WIDE_INT type_size,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1191 tree expr,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1192 unsigned HOST_WIDE_INT multiplier,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1193 HOST_WIDE_INT offset)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1194 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1195 /* We're only interested in computed values. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1196 if (TREE_CODE (expr) != SSA_NAME)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1197 return;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1198
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1199 /* Quick exit if no part of the address is calculated in STMT's loop,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1200 since such addresses have no versioning opportunities. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1201 class loop *loop = loop_containing_stmt (stmt);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1202 if (expr_invariant_in_loop_p (loop, expr))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1203 return;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1204
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1205 /* Set up an address_info for EXPR * MULTIPLIER. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1206 address_info *address = XOBNEW (&m_obstack, address_info);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1207 new (address) address_info;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1208 address->stmt = stmt;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1209 address->loop = loop;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1210 address->base = NULL_TREE;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1211 address->terms.quick_grow (1);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1212 address->terms[0].expr = expr;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1213 address->terms[0].multiplier = multiplier;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1214 address->terms[0].stride = NULL_TREE;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1215 address->terms[0].inner_likelihood = INNER_UNLIKELY;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1216 address->terms[0].versioning_opportunity_p = false;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1217 address->min_offset = offset;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1218
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1219 /* Peel apart the expression into a sum of address_terms, where each
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1220 term is multiplied by a constant. Treat a + b and a - b the same,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1221 since it doesn't matter for our purposes whether an address is
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1222 increasing or decreasing. Distribute (a + b) * constant into
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1223 a * constant + b * constant.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1224
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1225 We don't care which loop each term belongs to, since we want to
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1226 examine as many candidate strides as possible when determining
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1227 which is likely to be for the innermost dimension. We therefore
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1228 don't limit the search to statements in STMT's loop. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1229 for (unsigned int i = 0; i < address->terms.length (); )
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1230 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1231 if (gassign *assign = maybe_get_assign (address->terms[i].expr))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1232 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1233 tree_code code = gimple_assign_rhs_code (assign);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1234 if (code == PLUS_EXPR
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1235 || code == POINTER_PLUS_EXPR
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1236 || code == MINUS_EXPR)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1237 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1238 tree op1 = gimple_assign_rhs1 (assign);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1239 tree op2 = gimple_assign_rhs2 (assign);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1240 if (TREE_CODE (op2) == INTEGER_CST)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1241 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1242 address->terms[i].expr = strip_casts (op1);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1243 /* This is heuristic only, so don't worry about truncation
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1244 or overflow. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1245 address->min_offset += (TREE_INT_CST_LOW (op2)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1246 * address->terms[i].multiplier);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1247 continue;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1248 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1249 else if (address->terms.length () < address_info::MAX_TERMS)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1250 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1251 unsigned int j = address->terms.length ();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1252 address->terms.quick_push (address->terms[i]);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1253 address->terms[i].expr = strip_casts (op1);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1254 address->terms[j].expr = strip_casts (op2);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1255 continue;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1256 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1257 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1258 if (code == MULT_EXPR)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1259 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1260 tree op1 = gimple_assign_rhs1 (assign);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1261 tree op2 = gimple_assign_rhs2 (assign);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1262 if (multiply_term_by (address->terms[i], op2))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1263 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1264 address->terms[i].expr = strip_casts (op1);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1265 continue;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1266 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1267 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1268 if (CONVERT_EXPR_CODE_P (code))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1269 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1270 tree op1 = gimple_assign_rhs1 (assign);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1271 address->terms[i].expr = strip_casts (op1);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1272 continue;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1273 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1274 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1275 i += 1;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1276 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1277
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1278 /* Peel off any symbolic pointer. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1279 if (TREE_CODE (address->terms[0].expr) != SSA_NAME
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1280 && address->terms[0].multiplier == 1)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1281 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1282 if (address->terms.length () == 1)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1283 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1284 obstack_free (&m_obstack, address);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1285 return;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1286 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1287 address->base = address->terms[0].expr;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1288 address->terms.ordered_remove (0);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1289 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1290
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1291 /* Require all remaining terms to be SSA names. (This could be false
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1292 for unfolded statements, but they aren't worth dealing with.) */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1293 for (unsigned int i = 0; i < address->terms.length (); ++i)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1294 if (TREE_CODE (address->terms[i].expr) != SSA_NAME)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1295 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1296 obstack_free (&m_obstack, address);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1297 return;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1298 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1299
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1300 /* The loop above set MIN_OFFSET based on the first byte of the
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1301 referenced data. Calculate the end + 1. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1302 address->max_offset = address->min_offset + type_size;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1303
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1304 /* Put the terms into a canonical order for the hash table lookup below. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1305 address->terms.qsort (compare_address_terms);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1306
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1307 if (dump_enabled_p ())
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1308 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1309 dump_printf_loc (MSG_NOTE, stmt, "recording address fragment %T", expr);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1310 if (multiplier != 1)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1311 dump_printf (MSG_NOTE, " * %wd", multiplier);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1312 dump_printf (MSG_NOTE, " = ");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1313 dump_address_info (MSG_NOTE, *address);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1314 dump_printf (MSG_NOTE, "\n");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1315 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1316
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1317 /* Pool address information with the same terms (but potentially
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1318 different offsets). */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1319 address_info **slot = m_address_table.find_slot (address, INSERT);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1320 if (address_info *old_address = *slot)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1321 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1322 /* We've already seen an address with the same terms. Extend the
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1323 offset range to account for this access. Doing this can paper
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1324 over gaps, such as in:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1325
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1326 a[i * stride * 4] + a[i * stride * 4 + 3];
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1327
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1328 where nothing references "+ 1" or "+ 2". However, the vectorizer
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1329 handles such gapped accesses without problems, so it's not worth
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1330 trying to exclude them. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1331 if (old_address->min_offset > address->min_offset)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1332 old_address->min_offset = address->min_offset;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1333 if (old_address->max_offset < address->max_offset)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1334 old_address->max_offset = address->max_offset;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1335 obstack_free (&m_obstack, address);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1336 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1337 else
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1338 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1339 /* This is the first time we've seen an address with these terms. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1340 *slot = address;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1341 m_address_list.safe_push (address);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1342 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1343 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1344
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1345 /* Analyze expression EXPR, which occurs in STMT. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1346
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1347 void
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1348 loop_versioning::analyze_expr (gimple *stmt, tree expr)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1349 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1350 unsigned HOST_WIDE_INT type_size;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1351
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1352 while (handled_component_p (expr))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1353 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1354 /* See whether we can use versioning to avoid a multiplication
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1355 in an array index. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1356 if (TREE_CODE (expr) == ARRAY_REF
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1357 && acceptable_type_p (TREE_TYPE (expr), &type_size))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1358 record_address_fragment (stmt, type_size,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1359 TREE_OPERAND (expr, 1), type_size, 0);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1360 expr = TREE_OPERAND (expr, 0);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1361 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1362
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1363 /* See whether we can use versioning to avoid a multiplication
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1364 in the pointer calculation of a MEM_REF. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1365 if (TREE_CODE (expr) == MEM_REF
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1366 && acceptable_type_p (TREE_TYPE (expr), &type_size))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1367 record_address_fragment (stmt, type_size, TREE_OPERAND (expr, 0), 1,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1368 /* This is heuristic only, so don't worry
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1369 about truncation or overflow. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1370 TREE_INT_CST_LOW (TREE_OPERAND (expr, 1)));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1371
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1372 /* These would be easy to handle if they existed at this stage. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1373 gcc_checking_assert (TREE_CODE (expr) != TARGET_MEM_REF);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1374 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1375
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1376 /* Analyze all the statements in BB looking for useful version checks.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1377 Return true on success, false if something prevents the block from
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1378 being versioned. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1379
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1380 bool
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1381 loop_versioning::analyze_block (basic_block bb)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1382 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1383 class loop *loop = bb->loop_father;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1384 loop_info &li = get_loop_info (loop);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1385 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1386 gsi_next (&gsi))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1387 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1388 gimple *stmt = gsi_stmt (gsi);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1389 if (is_gimple_debug (stmt))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1390 continue;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1391
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1392 if (expensive_stmt_p (stmt))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1393 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1394 if (dump_enabled_p ())
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1395 dump_printf_loc (MSG_NOTE, stmt, "expensive statement"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1396 " prevents versioning: %G", stmt);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1397 return false;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1398 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1399
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1400 /* Only look for direct versioning opportunities in inner loops
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1401 since the benefit tends to be much smaller for outer loops. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1402 if (!loop->inner)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1403 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1404 unsigned int nops = gimple_num_ops (stmt);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1405 for (unsigned int i = 0; i < nops; ++i)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1406 if (tree op = gimple_op (stmt, i))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1407 analyze_expr (stmt, op);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1408 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1409
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1410 /* The point of the instruction limit is to prevent excessive
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1411 code growth, so this is a size-based estimate even though
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1412 the optimization is aimed at speed. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1413 li.num_insns += estimate_num_insns (stmt, &eni_size_weights);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1414 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1415
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1416 return true;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1417 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1418
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1419 /* Analyze all the blocks in the function, looking for useful version checks.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1420 Return true if we found one. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1421
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1422 bool
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1423 loop_versioning::analyze_blocks ()
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1424 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1425 AUTO_DUMP_SCOPE ("analyze_blocks",
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1426 dump_user_location_t::from_function_decl (m_fn->decl));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1427
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1428 /* For now we don't try to version the whole function, although
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1429 versioning at that level could be useful in some cases. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1430 get_loop_info (get_loop (m_fn, 0)).rejected_p = true;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1431
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1432 class loop *loop;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1433 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1434 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1435 loop_info &linfo = get_loop_info (loop);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1436
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1437 /* Ignore cold loops. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1438 if (!optimize_loop_for_speed_p (loop))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1439 linfo.rejected_p = true;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1440
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1441 /* See whether an inner loop prevents versioning of this loop. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1442 if (!linfo.rejected_p)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1443 for (class loop *inner = loop->inner; inner; inner = inner->next)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1444 if (get_loop_info (inner).rejected_p)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1445 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1446 linfo.rejected_p = true;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1447 break;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1448 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1449
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1450 /* If versioning the loop is still a possibility, examine the
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1451 statements in the loop to look for versioning opportunities. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1452 if (!linfo.rejected_p)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1453 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1454 void *start_point = obstack_alloc (&m_obstack, 0);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1455
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1456 for (basic_block bb = linfo.block_list; bb;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1457 bb = m_next_block_in_loop[bb->index])
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1458 if (!analyze_block (bb))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1459 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1460 linfo.rejected_p = true;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1461 break;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1462 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1463
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1464 if (!linfo.rejected_p)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1465 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1466 /* Process any queued address fragments, now that we have
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1467 complete grouping information. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1468 address_info *address;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1469 unsigned int i;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1470 FOR_EACH_VEC_ELT (m_address_list, i, address)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1471 analyze_address_fragment (*address);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1472 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1473
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1474 m_address_table.empty ();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1475 m_address_list.truncate (0);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1476 obstack_free (&m_obstack, start_point);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1477 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1478 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1479
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1480 return m_num_conditions != 0;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1481 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1482
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1483 /* Use the ranges in VRS to remove impossible versioning conditions from
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1484 LOOP. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1485
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1486 void
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1487 loop_versioning::prune_loop_conditions (class loop *loop, vr_values *vrs)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1488 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1489 loop_info &li = get_loop_info (loop);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1490
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1491 int to_remove = -1;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1492 bitmap_iterator bi;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1493 unsigned int i;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1494 EXECUTE_IF_SET_IN_BITMAP (&li.unity_names, 0, i, bi)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1495 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1496 tree name = ssa_name (i);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1497 const value_range_equiv *vr = vrs->get_value_range (name);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1498 if (vr && !vr->may_contain_p (build_one_cst (TREE_TYPE (name))))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1499 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1500 if (dump_enabled_p ())
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1501 dump_printf_loc (MSG_NOTE, find_loop_location (loop),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1502 "%T can never be 1 in this loop\n", name);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1503
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1504 if (to_remove >= 0)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1505 bitmap_clear_bit (&li.unity_names, to_remove);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1506 to_remove = i;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1507 m_num_conditions -= 1;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1508 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1509 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1510 if (to_remove >= 0)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1511 bitmap_clear_bit (&li.unity_names, to_remove);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1512 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1513
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1514 /* Remove any scheduled loop version conditions that will never be true.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1515 Return true if any remain. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1516
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1517 bool
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1518 loop_versioning::prune_conditions ()
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1519 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1520 AUTO_DUMP_SCOPE ("prune_loop_conditions",
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1521 dump_user_location_t::from_function_decl (m_fn->decl));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1522
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1523 calculate_dominance_info (CDI_DOMINATORS);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1524 lv_dom_walker dom_walker (*this);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1525 dom_walker.walk (ENTRY_BLOCK_PTR_FOR_FN (m_fn));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1526 return m_num_conditions != 0;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1527 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1528
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1529 /* Merge the version checks for INNER into immediately-enclosing loop
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1530 OUTER. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1531
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1532 void
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1533 loop_versioning::merge_loop_info (class loop *outer, class loop *inner)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1534 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1535 loop_info &inner_li = get_loop_info (inner);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1536 loop_info &outer_li = get_loop_info (outer);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1537
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1538 if (dump_enabled_p ())
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1539 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1540 bitmap_iterator bi;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1541 unsigned int i;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1542 EXECUTE_IF_SET_IN_BITMAP (&inner_li.unity_names, 0, i, bi)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1543 if (!bitmap_bit_p (&outer_li.unity_names, i))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1544 dump_printf_loc (MSG_NOTE, find_loop_location (inner),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1545 "hoisting check that %T == 1 to outer loop\n",
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1546 ssa_name (i));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1547 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1548
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1549 bitmap_ior_into (&outer_li.unity_names, &inner_li.unity_names);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1550 if (loop_depth (outer_li.outermost) < loop_depth (inner_li.outermost))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1551 outer_li.outermost = inner_li.outermost;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1552 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1553
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1554 /* Add LOOP to the queue of loops to version. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1555
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1556 void
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1557 loop_versioning::add_loop_to_queue (class loop *loop)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1558 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1559 loop_info &li = get_loop_info (loop);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1560
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1561 if (dump_enabled_p ())
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1562 dump_printf_loc (MSG_NOTE, find_loop_location (loop),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1563 "queuing this loop for versioning\n");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1564 m_loops_to_version.safe_push (loop);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1565
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1566 /* Don't try to version superloops. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1567 li.rejected_p = true;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1568 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1569
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1570 /* Decide whether the cost model would allow us to version LOOP,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1571 either directly or as part of a parent loop, and return true if so.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1572 This does not imply that the loop is actually worth versioning in its
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1573 own right, just that it would be valid to version it if something
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1574 benefited.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1575
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1576 We have already made this decision for all inner loops of LOOP. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1577
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1578 bool
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1579 loop_versioning::decide_whether_loop_is_versionable (class loop *loop)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1580 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1581 loop_info &li = get_loop_info (loop);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1582
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1583 if (li.rejected_p)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1584 return false;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1585
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1586 /* Examine the decisions made for inner loops. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1587 for (class loop *inner = loop->inner; inner; inner = inner->next)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1588 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1589 loop_info &inner_li = get_loop_info (inner);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1590 if (inner_li.rejected_p)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1591 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1592 if (dump_enabled_p ())
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1593 dump_printf_loc (MSG_NOTE, find_loop_location (loop),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1594 "not versioning this loop because one of its"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1595 " inner loops should not be versioned\n");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1596 return false;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1597 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1598
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1599 if (inner_li.worth_versioning_p ())
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1600 li.subloops_benefit_p = true;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1601
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1602 /* Accumulate the number of instructions from subloops that are not
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1603 the innermost, or that don't benefit from versioning. Only the
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1604 instructions from innermost loops that benefit from versioning
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1605 should be weighed against loop-versioning-max-inner-insns;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1606 everything else should be weighed against
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1607 loop-versioning-max-outer-insns. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1608 if (!inner_li.worth_versioning_p () || inner->inner)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1609 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1610 if (dump_enabled_p ())
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1611 dump_printf_loc (MSG_NOTE, find_loop_location (loop),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1612 "counting %d instructions from this loop"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1613 " against its parent loop\n", inner_li.num_insns);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1614 li.num_insns += inner_li.num_insns;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1615 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1616 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1617
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1618 /* Enforce the size limits. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1619 if (li.worth_versioning_p ())
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1620 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1621 unsigned int max_num_insns = max_insns_for_loop (loop);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1622 if (dump_enabled_p ())
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1623 dump_printf_loc (MSG_NOTE, find_loop_location (loop),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1624 "this loop has %d instructions, against"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1625 " a versioning limit of %d\n",
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1626 li.num_insns, max_num_insns);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1627 if (li.num_insns > max_num_insns)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1628 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1629 if (dump_enabled_p ())
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1630 dump_printf_loc (MSG_MISSED_OPTIMIZATION
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1631 | MSG_PRIORITY_USER_FACING,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1632 find_loop_location (loop),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1633 "this loop is too big to version");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1634 return false;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1635 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1636 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1637
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1638 /* Hoist all version checks from subloops to this loop. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1639 for (class loop *subloop = loop->inner; subloop; subloop = subloop->next)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1640 merge_loop_info (loop, subloop);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1641
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1642 return true;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1643 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1644
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1645 /* Decide which loops to version and add them to the versioning queue.
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1646 Return true if there are any loops to version. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1647
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1648 bool
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1649 loop_versioning::make_versioning_decisions ()
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1650 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1651 AUTO_DUMP_SCOPE ("make_versioning_decisions",
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1652 dump_user_location_t::from_function_decl (m_fn->decl));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1653
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1654 class loop *loop;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1655 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1656 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1657 loop_info &linfo = get_loop_info (loop);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1658 if (decide_whether_loop_is_versionable (loop))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1659 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1660 /* Commit to versioning LOOP directly if we can't hoist the
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1661 version checks any further. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1662 if (linfo.worth_versioning_p ()
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1663 && (loop_depth (loop) == 1 || linfo.outermost == loop))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1664 add_loop_to_queue (loop);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1665 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1666 else
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1667 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1668 /* We can't version this loop, so individually version any
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1669 subloops that would benefit and haven't been versioned yet. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1670 linfo.rejected_p = true;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1671 for (class loop *subloop = loop->inner; subloop;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1672 subloop = subloop->next)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1673 if (get_loop_info (subloop).worth_versioning_p ())
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1674 add_loop_to_queue (subloop);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1675 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1676 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1677
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1678 return !m_loops_to_version.is_empty ();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1679 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1680
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1681 /* Attempt to implement loop versioning for LOOP, using the information
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1682 cached in the associated loop_info. Return true on success. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1683
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1684 bool
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1685 loop_versioning::version_loop (class loop *loop)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1686 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1687 loop_info &li = get_loop_info (loop);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1688
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1689 /* Build up a condition that selects the original loop instead of
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1690 the simplified loop. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1691 tree cond = boolean_false_node;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1692 bitmap_iterator bi;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1693 unsigned int i;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1694 EXECUTE_IF_SET_IN_BITMAP (&li.unity_names, 0, i, bi)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1695 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1696 tree name = ssa_name (i);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1697 tree ne_one = fold_build2 (NE_EXPR, boolean_type_node, name,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1698 build_one_cst (TREE_TYPE (name)));
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1699 cond = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, cond, ne_one);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1700 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1701
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1702 /* Convert the condition into a suitable gcond. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1703 gimple_seq stmts = NULL;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1704 cond = force_gimple_operand_1 (cond, &stmts, is_gimple_condexpr, NULL_TREE);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1705
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1706 /* Version the loop. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1707 initialize_original_copy_tables ();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1708 basic_block cond_bb;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1709 li.optimized_loop = loop_version (loop, cond, &cond_bb,
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1710 profile_probability::unlikely (),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1711 profile_probability::likely (),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1712 profile_probability::unlikely (),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1713 profile_probability::likely (), true);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1714 free_original_copy_tables ();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1715 if (!li.optimized_loop)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1716 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1717 if (dump_enabled_p ())
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1718 dump_printf_loc (MSG_MISSED_OPTIMIZATION, find_loop_location (loop),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1719 "tried but failed to version this loop for when"
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1720 " certain strides are 1\n");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1721 return false;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1722 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1723
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1724 if (dump_enabled_p ())
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1725 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, find_loop_location (loop),
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1726 "versioned this loop for when certain strides are 1\n");
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1727
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1728 /* Insert the statements that feed COND. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1729 if (stmts)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1730 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1731 gimple_stmt_iterator gsi = gsi_last_bb (cond_bb);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1732 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1733 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1734
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1735 return true;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1736 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1737
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1738 /* Attempt to version all loops in the versioning queue. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1739
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1740 void
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1741 loop_versioning::implement_versioning_decisions ()
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1742 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1743 /* No AUTO_DUMP_SCOPE here since all messages are top-level and
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1744 user-facing at this point. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1745
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1746 bool any_succeeded_p = false;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1747 class loop *loop;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1748 unsigned int i;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1749 FOR_EACH_VEC_ELT (m_loops_to_version, i, loop)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1750 if (version_loop (loop))
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1751 any_succeeded_p = true;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1752 if (!any_succeeded_p)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1753 return;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1754
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1755 update_ssa (TODO_update_ssa);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1756
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1757 /* Simplify the new loop, which is used when COND is false. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1758 FOR_EACH_VEC_ELT (m_loops_to_version, i, loop)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1759 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1760 loop_info &linfo = get_loop_info (loop);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1761 if (linfo.optimized_loop)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1762 name_prop (linfo).substitute_and_fold (linfo.optimized_loop->header);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1763 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1764 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1765
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1766 /* Run the pass and return a set of TODO_* flags. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1767
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1768 unsigned int
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1769 loop_versioning::run ()
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1770 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1771 gcc_assert (scev_initialized_p ());
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1772
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1773 if (analyze_blocks ()
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1774 && prune_conditions ()
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1775 && make_versioning_decisions ())
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1776 implement_versioning_decisions ();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1777
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1778 return 0;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1779 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1780
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1781 /* Loop versioning pass. */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1782
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1783 const pass_data pass_data_loop_versioning =
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1784 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1785 GIMPLE_PASS, /* type */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1786 "lversion", /* name */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1787 OPTGROUP_LOOP, /* optinfo_flags */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1788 TV_LOOP_VERSIONING, /* tv_id */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1789 PROP_cfg, /* properties_required */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1790 0, /* properties_provided */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1791 0, /* properties_destroyed */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1792 0, /* todo_flags_start */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1793 0, /* todo_flags_finish */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1794 };
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1795
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1796 class pass_loop_versioning : public gimple_opt_pass
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1797 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1798 public:
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1799 pass_loop_versioning (gcc::context *ctxt)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1800 : gimple_opt_pass (pass_data_loop_versioning, ctxt)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1801 {}
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1802
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1803 /* opt_pass methods: */
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1804 virtual bool gate (function *) { return flag_version_loops_for_strides; }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1805 virtual unsigned int execute (function *);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1806 };
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1807
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1808 unsigned int
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1809 pass_loop_versioning::execute (function *fn)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1810 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1811 if (number_of_loops (fn) <= 1)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1812 return 0;
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1813
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1814 return loop_versioning (fn).run ();
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1815 }
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1816
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1817 } // anon namespace
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1818
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1819 gimple_opt_pass *
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1820 make_pass_loop_versioning (gcc::context *ctxt)
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1821 {
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1822 return new pass_loop_versioning (ctxt);
1830386684a0 gcc-9.2.0
anatofuz
parents:
diff changeset
1823 }