111
|
1 /* Straight-line strength reduction.
|
145
|
2 Copyright (C) 2012-2020 Free Software Foundation, Inc.
|
111
|
3 Contributed by Bill Schmidt, IBM <wschmidt@linux.ibm.com>
|
|
4
|
|
5 This file is part of GCC.
|
|
6
|
|
7 GCC is free software; you can redistribute it and/or modify it under
|
|
8 the terms of the GNU General Public License as published by the Free
|
|
9 Software Foundation; either version 3, or (at your option) any later
|
|
10 version.
|
|
11
|
|
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
15 for more details.
|
|
16
|
|
17 You should have received a copy of the GNU General Public License
|
|
18 along with GCC; see the file COPYING3. If not see
|
|
19 <http://www.gnu.org/licenses/>. */
|
|
20
|
|
21 /* There are many algorithms for performing strength reduction on
|
|
22 loops. This is not one of them. IVOPTS handles strength reduction
|
|
23 of induction variables just fine. This pass is intended to pick
|
|
24 up the crumbs it leaves behind, by considering opportunities for
|
|
25 strength reduction along dominator paths.
|
|
26
|
|
27 Strength reduction addresses explicit multiplies, and certain
|
|
28 multiplies implicit in addressing expressions. It would also be
|
|
29 possible to apply strength reduction to divisions and modulos,
|
|
30 but such opportunities are relatively uncommon.
|
|
31
|
|
32 Strength reduction is also currently restricted to integer operations.
|
|
33 If desired, it could be extended to floating-point operations under
|
|
34 control of something like -funsafe-math-optimizations. */
|
|
35
|
|
36 #include "config.h"
|
|
37 #include "system.h"
|
|
38 #include "coretypes.h"
|
|
39 #include "backend.h"
|
|
40 #include "rtl.h"
|
|
41 #include "tree.h"
|
|
42 #include "gimple.h"
|
|
43 #include "cfghooks.h"
|
|
44 #include "tree-pass.h"
|
|
45 #include "ssa.h"
|
|
46 #include "expmed.h"
|
|
47 #include "gimple-pretty-print.h"
|
|
48 #include "fold-const.h"
|
|
49 #include "gimple-iterator.h"
|
|
50 #include "gimplify-me.h"
|
|
51 #include "stor-layout.h"
|
|
52 #include "cfgloop.h"
|
|
53 #include "tree-cfg.h"
|
|
54 #include "domwalk.h"
|
|
55 #include "tree-ssa-address.h"
|
|
56 #include "tree-affine.h"
|
131
|
57 #include "tree-eh.h"
|
111
|
58 #include "builtins.h"
|
|
59
|
|
60 /* Information about a strength reduction candidate. Each statement
|
|
61 in the candidate table represents an expression of one of the
|
|
62 following forms (the special case of CAND_REF will be described
|
|
63 later):
|
|
64
|
|
65 (CAND_MULT) S1: X = (B + i) * S
|
|
66 (CAND_ADD) S1: X = B + (i * S)
|
|
67
|
|
68 Here X and B are SSA names, i is an integer constant, and S is
|
|
69 either an SSA name or a constant. We call B the "base," i the
|
|
70 "index", and S the "stride."
|
|
71
|
|
72 Any statement S0 that dominates S1 and is of the form:
|
|
73
|
|
74 (CAND_MULT) S0: Y = (B + i') * S
|
|
75 (CAND_ADD) S0: Y = B + (i' * S)
|
|
76
|
|
77 is called a "basis" for S1. In both cases, S1 may be replaced by
|
|
78
|
|
79 S1': X = Y + (i - i') * S,
|
|
80
|
|
81 where (i - i') * S is folded to the extent possible.
|
|
82
|
|
83 All gimple statements are visited in dominator order, and each
|
|
84 statement that may contribute to one of the forms of S1 above is
|
|
85 given at least one entry in the candidate table. Such statements
|
|
86 include addition, pointer addition, subtraction, multiplication,
|
|
87 negation, copies, and nontrivial type casts. If a statement may
|
|
88 represent more than one expression of the forms of S1 above,
|
|
89 multiple "interpretations" are stored in the table and chained
|
|
90 together. Examples:
|
|
91
|
|
92 * An add of two SSA names may treat either operand as the base.
|
|
93 * A multiply of two SSA names, likewise.
|
|
94 * A copy or cast may be thought of as either a CAND_MULT with
|
|
95 i = 0 and S = 1, or as a CAND_ADD with i = 0 or S = 0.
|
|
96
|
|
97 Candidate records are allocated from an obstack. They are addressed
|
|
98 both from a hash table keyed on S1, and from a vector of candidate
|
|
99 pointers arranged in predominator order.
|
|
100
|
|
101 Opportunity note
|
|
102 ----------------
|
|
103 Currently we don't recognize:
|
|
104
|
|
105 S0: Y = (S * i') - B
|
|
106 S1: X = (S * i) - B
|
|
107
|
|
108 as a strength reduction opportunity, even though this S1 would
|
|
109 also be replaceable by the S1' above. This can be added if it
|
|
110 comes up in practice.
|
|
111
|
|
112 Strength reduction in addressing
|
|
113 --------------------------------
|
|
114 There is another kind of candidate known as CAND_REF. A CAND_REF
|
|
115 describes a statement containing a memory reference having
|
|
116 complex addressing that might benefit from strength reduction.
|
|
117 Specifically, we are interested in references for which
|
|
118 get_inner_reference returns a base address, offset, and bitpos as
|
|
119 follows:
|
|
120
|
|
121 base: MEM_REF (T1, C1)
|
|
122 offset: MULT_EXPR (PLUS_EXPR (T2, C2), C3)
|
|
123 bitpos: C4 * BITS_PER_UNIT
|
|
124
|
|
125 Here T1 and T2 are arbitrary trees, and C1, C2, C3, C4 are
|
|
126 arbitrary integer constants. Note that C2 may be zero, in which
|
|
127 case the offset will be MULT_EXPR (T2, C3).
|
|
128
|
|
129 When this pattern is recognized, the original memory reference
|
|
130 can be replaced with:
|
|
131
|
|
132 MEM_REF (POINTER_PLUS_EXPR (T1, MULT_EXPR (T2, C3)),
|
|
133 C1 + (C2 * C3) + C4)
|
|
134
|
|
135 which distributes the multiply to allow constant folding. When
|
|
136 two or more addressing expressions can be represented by MEM_REFs
|
|
137 of this form, differing only in the constants C1, C2, and C4,
|
|
138 making this substitution produces more efficient addressing during
|
|
139 the RTL phases. When there are not at least two expressions with
|
|
140 the same values of T1, T2, and C3, there is nothing to be gained
|
|
141 by the replacement.
|
|
142
|
|
143 Strength reduction of CAND_REFs uses the same infrastructure as
|
|
144 that used by CAND_MULTs and CAND_ADDs. We record T1 in the base (B)
|
|
145 field, MULT_EXPR (T2, C3) in the stride (S) field, and
|
|
146 C1 + (C2 * C3) + C4 in the index (i) field. A basis for a CAND_REF
|
|
147 is thus another CAND_REF with the same B and S values. When at
|
|
148 least two CAND_REFs are chained together using the basis relation,
|
|
149 each of them is replaced as above, resulting in improved code
|
|
150 generation for addressing.
|
|
151
|
|
152 Conditional candidates
|
|
153 ======================
|
|
154
|
|
155 Conditional candidates are best illustrated with an example.
|
|
156 Consider the code sequence:
|
|
157
|
|
158 (1) x_0 = ...;
|
|
159 (2) a_0 = x_0 * 5; MULT (B: x_0; i: 0; S: 5)
|
|
160 if (...)
|
|
161 (3) x_1 = x_0 + 1; ADD (B: x_0, i: 1; S: 1)
|
|
162 (4) x_2 = PHI <x_0, x_1>; PHI (B: x_0, i: 0, S: 1)
|
|
163 (5) x_3 = x_2 + 1; ADD (B: x_2, i: 1, S: 1)
|
|
164 (6) a_1 = x_3 * 5; MULT (B: x_2, i: 1; S: 5)
|
|
165
|
|
166 Here strength reduction is complicated by the uncertain value of x_2.
|
|
167 A legitimate transformation is:
|
|
168
|
|
169 (1) x_0 = ...;
|
|
170 (2) a_0 = x_0 * 5;
|
|
171 if (...)
|
|
172 {
|
|
173 (3) [x_1 = x_0 + 1;]
|
|
174 (3a) t_1 = a_0 + 5;
|
|
175 }
|
|
176 (4) [x_2 = PHI <x_0, x_1>;]
|
|
177 (4a) t_2 = PHI <a_0, t_1>;
|
|
178 (5) [x_3 = x_2 + 1;]
|
|
179 (6r) a_1 = t_2 + 5;
|
|
180
|
|
181 where the bracketed instructions may go dead.
|
|
182
|
|
183 To recognize this opportunity, we have to observe that statement (6)
|
|
184 has a "hidden basis" (2). The hidden basis is unlike a normal basis
|
|
185 in that the statement and the hidden basis have different base SSA
|
|
186 names (x_2 and x_0, respectively). The relationship is established
|
|
187 when a statement's base name (x_2) is defined by a phi statement (4),
|
|
188 each argument of which (x_0, x_1) has an identical "derived base name."
|
|
189 If the argument is defined by a candidate (as x_1 is by (3)) that is a
|
|
190 CAND_ADD having a stride of 1, the derived base name of the argument is
|
|
191 the base name of the candidate (x_0). Otherwise, the argument itself
|
|
192 is its derived base name (as is the case with argument x_0).
|
|
193
|
|
194 The hidden basis for statement (6) is the nearest dominating candidate
|
|
195 whose base name is the derived base name (x_0) of the feeding phi (4),
|
|
196 and whose stride is identical to that of the statement. We can then
|
|
197 create the new "phi basis" (4a) and feeding adds along incoming arcs (3a),
|
|
198 allowing the final replacement of (6) by the strength-reduced (6r).
|
|
199
|
|
200 To facilitate this, a new kind of candidate (CAND_PHI) is introduced.
|
|
201 A CAND_PHI is not a candidate for replacement, but is maintained in the
|
|
202 candidate table to ease discovery of hidden bases. Any phi statement
|
|
203 whose arguments share a common derived base name is entered into the
|
|
204 table with the derived base name, an (arbitrary) index of zero, and a
|
|
205 stride of 1. A statement with a hidden basis can then be detected by
|
|
206 simply looking up its feeding phi definition in the candidate table,
|
|
207 extracting the derived base name, and searching for a basis in the
|
|
208 usual manner after substituting the derived base name.
|
|
209
|
|
210 Note that the transformation is only valid when the original phi and
|
|
211 the statements that define the phi's arguments are all at the same
|
|
212 position in the loop hierarchy. */
|
|
213
|
|
214
|
|
215 /* Index into the candidate vector, offset by 1. VECs are zero-based,
|
|
216 while cand_idx's are one-based, with zero indicating null. */
|
|
217 typedef unsigned cand_idx;
|
|
218
|
|
219 /* The kind of candidate. */
|
|
220 enum cand_kind
|
|
221 {
|
|
222 CAND_MULT,
|
|
223 CAND_ADD,
|
|
224 CAND_REF,
|
|
225 CAND_PHI
|
|
226 };
|
|
227
|
145
|
228 class slsr_cand_d
|
111
|
229 {
|
145
|
230 public:
|
111
|
231 /* The candidate statement S1. */
|
|
232 gimple *cand_stmt;
|
|
233
|
|
234 /* The base expression B: often an SSA name, but not always. */
|
|
235 tree base_expr;
|
|
236
|
|
237 /* The stride S. */
|
|
238 tree stride;
|
|
239
|
|
240 /* The index constant i. */
|
|
241 widest_int index;
|
|
242
|
|
243 /* The type of the candidate. This is normally the type of base_expr,
|
|
244 but casts may have occurred when combining feeding instructions.
|
|
245 A candidate can only be a basis for candidates of the same final type.
|
|
246 (For CAND_REFs, this is the type to be used for operand 1 of the
|
|
247 replacement MEM_REF.) */
|
|
248 tree cand_type;
|
|
249
|
|
250 /* The type to be used to interpret the stride field when the stride
|
|
251 is not a constant. Normally the same as the type of the recorded
|
|
252 stride, but when the stride has been cast we need to maintain that
|
|
253 knowledge in order to make legal substitutions without losing
|
|
254 precision. When the stride is a constant, this will be sizetype. */
|
|
255 tree stride_type;
|
|
256
|
|
257 /* The kind of candidate (CAND_MULT, etc.). */
|
|
258 enum cand_kind kind;
|
|
259
|
|
260 /* Index of this candidate in the candidate vector. */
|
|
261 cand_idx cand_num;
|
|
262
|
|
263 /* Index of the next candidate record for the same statement.
|
|
264 A statement may be useful in more than one way (e.g., due to
|
|
265 commutativity). So we can have multiple "interpretations"
|
|
266 of a statement. */
|
|
267 cand_idx next_interp;
|
|
268
|
131
|
269 /* Index of the first candidate record in a chain for the same
|
|
270 statement. */
|
|
271 cand_idx first_interp;
|
|
272
|
111
|
273 /* Index of the basis statement S0, if any, in the candidate vector. */
|
|
274 cand_idx basis;
|
|
275
|
|
276 /* First candidate for which this candidate is a basis, if one exists. */
|
|
277 cand_idx dependent;
|
|
278
|
|
279 /* Next candidate having the same basis as this one. */
|
|
280 cand_idx sibling;
|
|
281
|
|
282 /* If this is a conditional candidate, the CAND_PHI candidate
|
|
283 that defines the base SSA name B. */
|
|
284 cand_idx def_phi;
|
|
285
|
|
286 /* Savings that can be expected from eliminating dead code if this
|
|
287 candidate is replaced. */
|
|
288 int dead_savings;
|
|
289
|
|
290 /* For PHI candidates, use a visited flag to keep from processing the
|
|
291 same PHI twice from multiple paths. */
|
|
292 int visited;
|
|
293
|
|
294 /* We sometimes have to cache a phi basis with a phi candidate to
|
|
295 avoid processing it twice. Valid only if visited==1. */
|
|
296 tree cached_basis;
|
|
297 };
|
|
298
|
145
|
299 typedef class slsr_cand_d slsr_cand, *slsr_cand_t;
|
|
300 typedef const class slsr_cand_d *const_slsr_cand_t;
|
111
|
301
|
|
302 /* Pointers to candidates are chained together as part of a mapping
|
|
303 from base expressions to the candidates that use them. */
|
|
304
|
|
305 struct cand_chain_d
|
|
306 {
|
|
307 /* Base expression for the chain of candidates: often, but not
|
|
308 always, an SSA name. */
|
|
309 tree base_expr;
|
|
310
|
|
311 /* Pointer to a candidate. */
|
|
312 slsr_cand_t cand;
|
|
313
|
|
314 /* Chain pointer. */
|
|
315 struct cand_chain_d *next;
|
|
316
|
|
317 };
|
|
318
|
|
319 typedef struct cand_chain_d cand_chain, *cand_chain_t;
|
|
320 typedef const struct cand_chain_d *const_cand_chain_t;
|
|
321
|
|
322 /* Information about a unique "increment" associated with candidates
|
|
323 having an SSA name for a stride. An increment is the difference
|
|
324 between the index of the candidate and the index of its basis,
|
|
325 i.e., (i - i') as discussed in the module commentary.
|
|
326
|
|
327 When we are not going to generate address arithmetic we treat
|
|
328 increments that differ only in sign as the same, allowing sharing
|
|
329 of the cost of initializers. The absolute value of the increment
|
|
330 is stored in the incr_info. */
|
|
331
|
145
|
332 class incr_info_d
|
111
|
333 {
|
145
|
334 public:
|
111
|
335 /* The increment that relates a candidate to its basis. */
|
|
336 widest_int incr;
|
|
337
|
|
338 /* How many times the increment occurs in the candidate tree. */
|
|
339 unsigned count;
|
|
340
|
|
341 /* Cost of replacing candidates using this increment. Negative and
|
|
342 zero costs indicate replacement should be performed. */
|
|
343 int cost;
|
|
344
|
|
345 /* If this increment is profitable but is not -1, 0, or 1, it requires
|
|
346 an initializer T_0 = stride * incr to be found or introduced in the
|
|
347 nearest common dominator of all candidates. This field holds T_0
|
|
348 for subsequent use. */
|
|
349 tree initializer;
|
|
350
|
|
351 /* If the initializer was found to already exist, this is the block
|
|
352 where it was found. */
|
|
353 basic_block init_bb;
|
|
354 };
|
|
355
|
145
|
356 typedef class incr_info_d incr_info, *incr_info_t;
|
111
|
357
|
|
358 /* Candidates are maintained in a vector. If candidate X dominates
|
|
359 candidate Y, then X appears before Y in the vector; but the
|
|
360 converse does not necessarily hold. */
|
|
361 static vec<slsr_cand_t> cand_vec;
|
|
362
|
|
363 enum cost_consts
|
|
364 {
|
|
365 COST_NEUTRAL = 0,
|
|
366 COST_INFINITE = 1000
|
|
367 };
|
|
368
|
|
369 enum stride_status
|
|
370 {
|
|
371 UNKNOWN_STRIDE = 0,
|
|
372 KNOWN_STRIDE = 1
|
|
373 };
|
|
374
|
|
375 enum phi_adjust_status
|
|
376 {
|
|
377 NOT_PHI_ADJUST = 0,
|
|
378 PHI_ADJUST = 1
|
|
379 };
|
|
380
|
|
381 enum count_phis_status
|
|
382 {
|
|
383 DONT_COUNT_PHIS = 0,
|
|
384 COUNT_PHIS = 1
|
|
385 };
|
|
386
|
|
387 /* Constrain how many PHI nodes we will visit for a conditional
|
|
388 candidate (depth and breadth). */
|
|
389 const int MAX_SPREAD = 16;
|
|
390
|
|
391 /* Pointer map embodying a mapping from statements to candidates. */
|
|
392 static hash_map<gimple *, slsr_cand_t> *stmt_cand_map;
|
|
393
|
|
394 /* Obstack for candidates. */
|
|
395 static struct obstack cand_obstack;
|
|
396
|
|
397 /* Obstack for candidate chains. */
|
|
398 static struct obstack chain_obstack;
|
|
399
|
|
400 /* An array INCR_VEC of incr_infos is used during analysis of related
|
|
401 candidates having an SSA name for a stride. INCR_VEC_LEN describes
|
|
402 its current length. MAX_INCR_VEC_LEN is used to avoid costly
|
|
403 pathological cases. */
|
|
404 static incr_info_t incr_vec;
|
|
405 static unsigned incr_vec_len;
|
|
406 const int MAX_INCR_VEC_LEN = 16;
|
|
407
|
|
408 /* For a chain of candidates with unknown stride, indicates whether or not
|
|
409 we must generate pointer arithmetic when replacing statements. */
|
|
410 static bool address_arithmetic_p;
|
|
411
|
|
412 /* Forward function declarations. */
|
|
413 static slsr_cand_t base_cand_from_table (tree);
|
|
414 static tree introduce_cast_before_cand (slsr_cand_t, tree, tree);
|
|
415 static bool legal_cast_p_1 (tree, tree);
|
|
416
|
|
417 /* Produce a pointer to the IDX'th candidate in the candidate vector. */
|
|
418
|
|
419 static slsr_cand_t
|
|
420 lookup_cand (cand_idx idx)
|
|
421 {
|
145
|
422 return cand_vec[idx];
|
111
|
423 }
|
|
424
|
|
425 /* Helper for hashing a candidate chain header. */
|
|
426
|
|
427 struct cand_chain_hasher : nofree_ptr_hash <cand_chain>
|
|
428 {
|
|
429 static inline hashval_t hash (const cand_chain *);
|
|
430 static inline bool equal (const cand_chain *, const cand_chain *);
|
|
431 };
|
|
432
|
|
433 inline hashval_t
|
|
434 cand_chain_hasher::hash (const cand_chain *p)
|
|
435 {
|
|
436 tree base_expr = p->base_expr;
|
|
437 return iterative_hash_expr (base_expr, 0);
|
|
438 }
|
|
439
|
|
440 inline bool
|
|
441 cand_chain_hasher::equal (const cand_chain *chain1, const cand_chain *chain2)
|
|
442 {
|
|
443 return operand_equal_p (chain1->base_expr, chain2->base_expr, 0);
|
|
444 }
|
|
445
|
|
446 /* Hash table embodying a mapping from base exprs to chains of candidates. */
|
|
447 static hash_table<cand_chain_hasher> *base_cand_map;
|
|
448
|
|
449 /* Pointer map used by tree_to_aff_combination_expand. */
|
|
450 static hash_map<tree, name_expansion *> *name_expansions;
|
|
451 /* Pointer map embodying a mapping from bases to alternative bases. */
|
|
452 static hash_map<tree, tree> *alt_base_map;
|
|
453
|
|
454 /* Given BASE, use the tree affine combiniation facilities to
|
|
455 find the underlying tree expression for BASE, with any
|
|
456 immediate offset excluded.
|
|
457
|
|
458 N.B. we should eliminate this backtracking with better forward
|
|
459 analysis in a future release. */
|
|
460
|
|
461 static tree
|
|
462 get_alternative_base (tree base)
|
|
463 {
|
|
464 tree *result = alt_base_map->get (base);
|
|
465
|
|
466 if (result == NULL)
|
|
467 {
|
|
468 tree expr;
|
|
469 aff_tree aff;
|
|
470
|
|
471 tree_to_aff_combination_expand (base, TREE_TYPE (base),
|
|
472 &aff, &name_expansions);
|
|
473 aff.offset = 0;
|
|
474 expr = aff_combination_to_tree (&aff);
|
|
475
|
|
476 gcc_assert (!alt_base_map->put (base, base == expr ? NULL : expr));
|
|
477
|
|
478 return expr == base ? NULL : expr;
|
|
479 }
|
|
480
|
|
481 return *result;
|
|
482 }
|
|
483
|
|
484 /* Look in the candidate table for a CAND_PHI that defines BASE and
|
|
485 return it if found; otherwise return NULL. */
|
|
486
|
|
487 static cand_idx
|
|
488 find_phi_def (tree base)
|
|
489 {
|
|
490 slsr_cand_t c;
|
|
491
|
|
492 if (TREE_CODE (base) != SSA_NAME)
|
|
493 return 0;
|
|
494
|
|
495 c = base_cand_from_table (base);
|
|
496
|
|
497 if (!c || c->kind != CAND_PHI
|
|
498 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_phi_result (c->cand_stmt)))
|
|
499 return 0;
|
|
500
|
|
501 return c->cand_num;
|
|
502 }
|
|
503
|
|
504 /* Determine whether all uses of NAME are directly or indirectly
|
|
505 used by STMT. That is, we want to know whether if STMT goes
|
|
506 dead, the definition of NAME also goes dead. */
|
|
507 static bool
|
|
508 uses_consumed_by_stmt (tree name, gimple *stmt, unsigned recurse = 0)
|
|
509 {
|
|
510 gimple *use_stmt;
|
|
511 imm_use_iterator iter;
|
|
512 bool retval = true;
|
|
513
|
|
514 FOR_EACH_IMM_USE_STMT (use_stmt, iter, name)
|
|
515 {
|
|
516 if (use_stmt == stmt || is_gimple_debug (use_stmt))
|
|
517 continue;
|
|
518
|
|
519 if (!is_gimple_assign (use_stmt)
|
|
520 || !gimple_get_lhs (use_stmt)
|
|
521 || !is_gimple_reg (gimple_get_lhs (use_stmt))
|
|
522 || recurse >= 10
|
|
523 || !uses_consumed_by_stmt (gimple_get_lhs (use_stmt), stmt,
|
|
524 recurse + 1))
|
|
525 {
|
|
526 retval = false;
|
|
527 BREAK_FROM_IMM_USE_STMT (iter);
|
|
528 }
|
|
529 }
|
|
530
|
|
531 return retval;
|
|
532 }
|
|
533
|
|
534 /* Helper routine for find_basis_for_candidate. May be called twice:
|
|
535 once for the candidate's base expr, and optionally again either for
|
|
536 the candidate's phi definition or for a CAND_REF's alternative base
|
|
537 expression. */
|
|
538
|
|
539 static slsr_cand_t
|
|
540 find_basis_for_base_expr (slsr_cand_t c, tree base_expr)
|
|
541 {
|
|
542 cand_chain mapping_key;
|
|
543 cand_chain_t chain;
|
|
544 slsr_cand_t basis = NULL;
|
|
545
|
|
546 // Limit potential of N^2 behavior for long candidate chains.
|
|
547 int iters = 0;
|
145
|
548 int max_iters = param_max_slsr_candidate_scan;
|
111
|
549
|
|
550 mapping_key.base_expr = base_expr;
|
|
551 chain = base_cand_map->find (&mapping_key);
|
|
552
|
|
553 for (; chain && iters < max_iters; chain = chain->next, ++iters)
|
|
554 {
|
|
555 slsr_cand_t one_basis = chain->cand;
|
|
556
|
|
557 if (one_basis->kind != c->kind
|
|
558 || one_basis->cand_stmt == c->cand_stmt
|
|
559 || !operand_equal_p (one_basis->stride, c->stride, 0)
|
|
560 || !types_compatible_p (one_basis->cand_type, c->cand_type)
|
|
561 || !types_compatible_p (one_basis->stride_type, c->stride_type)
|
|
562 || !dominated_by_p (CDI_DOMINATORS,
|
|
563 gimple_bb (c->cand_stmt),
|
|
564 gimple_bb (one_basis->cand_stmt)))
|
|
565 continue;
|
|
566
|
|
567 tree lhs = gimple_assign_lhs (one_basis->cand_stmt);
|
|
568 if (lhs && TREE_CODE (lhs) == SSA_NAME
|
|
569 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (lhs))
|
|
570 continue;
|
|
571
|
|
572 if (!basis || basis->cand_num < one_basis->cand_num)
|
|
573 basis = one_basis;
|
|
574 }
|
|
575
|
|
576 return basis;
|
|
577 }
|
|
578
|
|
579 /* Use the base expr from candidate C to look for possible candidates
|
|
580 that can serve as a basis for C. Each potential basis must also
|
|
581 appear in a block that dominates the candidate statement and have
|
|
582 the same stride and type. If more than one possible basis exists,
|
|
583 the one with highest index in the vector is chosen; this will be
|
|
584 the most immediately dominating basis. */
|
|
585
|
|
586 static int
|
|
587 find_basis_for_candidate (slsr_cand_t c)
|
|
588 {
|
|
589 slsr_cand_t basis = find_basis_for_base_expr (c, c->base_expr);
|
|
590
|
|
591 /* If a candidate doesn't have a basis using its base expression,
|
|
592 it may have a basis hidden by one or more intervening phis. */
|
|
593 if (!basis && c->def_phi)
|
|
594 {
|
|
595 basic_block basis_bb, phi_bb;
|
|
596 slsr_cand_t phi_cand = lookup_cand (c->def_phi);
|
|
597 basis = find_basis_for_base_expr (c, phi_cand->base_expr);
|
|
598
|
|
599 if (basis)
|
|
600 {
|
|
601 /* A hidden basis must dominate the phi-definition of the
|
|
602 candidate's base name. */
|
|
603 phi_bb = gimple_bb (phi_cand->cand_stmt);
|
|
604 basis_bb = gimple_bb (basis->cand_stmt);
|
|
605
|
|
606 if (phi_bb == basis_bb
|
|
607 || !dominated_by_p (CDI_DOMINATORS, phi_bb, basis_bb))
|
|
608 {
|
|
609 basis = NULL;
|
|
610 c->basis = 0;
|
|
611 }
|
|
612
|
|
613 /* If we found a hidden basis, estimate additional dead-code
|
|
614 savings if the phi and its feeding statements can be removed. */
|
|
615 tree feeding_var = gimple_phi_result (phi_cand->cand_stmt);
|
|
616 if (basis && uses_consumed_by_stmt (feeding_var, c->cand_stmt))
|
|
617 c->dead_savings += phi_cand->dead_savings;
|
|
618 }
|
|
619 }
|
|
620
|
|
621 if (flag_expensive_optimizations && !basis && c->kind == CAND_REF)
|
|
622 {
|
|
623 tree alt_base_expr = get_alternative_base (c->base_expr);
|
|
624 if (alt_base_expr)
|
|
625 basis = find_basis_for_base_expr (c, alt_base_expr);
|
|
626 }
|
|
627
|
|
628 if (basis)
|
|
629 {
|
|
630 c->sibling = basis->dependent;
|
|
631 basis->dependent = c->cand_num;
|
|
632 return basis->cand_num;
|
|
633 }
|
|
634
|
|
635 return 0;
|
|
636 }
|
|
637
|
|
638 /* Record a mapping from BASE to C, indicating that C may potentially serve
|
|
639 as a basis using that base expression. BASE may be the same as
|
|
640 C->BASE_EXPR; alternatively BASE can be a different tree that share the
|
|
641 underlining expression of C->BASE_EXPR. */
|
|
642
|
|
643 static void
|
|
644 record_potential_basis (slsr_cand_t c, tree base)
|
|
645 {
|
|
646 cand_chain_t node;
|
|
647 cand_chain **slot;
|
|
648
|
|
649 gcc_assert (base);
|
|
650
|
|
651 node = (cand_chain_t) obstack_alloc (&chain_obstack, sizeof (cand_chain));
|
|
652 node->base_expr = base;
|
|
653 node->cand = c;
|
|
654 node->next = NULL;
|
|
655 slot = base_cand_map->find_slot (node, INSERT);
|
|
656
|
|
657 if (*slot)
|
|
658 {
|
|
659 cand_chain_t head = (cand_chain_t) (*slot);
|
|
660 node->next = head->next;
|
|
661 head->next = node;
|
|
662 }
|
|
663 else
|
|
664 *slot = node;
|
|
665 }
|
|
666
|
|
667 /* Allocate storage for a new candidate and initialize its fields.
|
|
668 Attempt to find a basis for the candidate.
|
|
669
|
|
670 For CAND_REF, an alternative base may also be recorded and used
|
|
671 to find a basis. This helps cases where the expression hidden
|
|
672 behind BASE (which is usually an SSA_NAME) has immediate offset,
|
|
673 e.g.
|
|
674
|
|
675 a2[i][j] = 1;
|
|
676 a2[i + 20][j] = 2; */
|
|
677
|
|
678 static slsr_cand_t
|
|
679 alloc_cand_and_find_basis (enum cand_kind kind, gimple *gs, tree base,
|
|
680 const widest_int &index, tree stride, tree ctype,
|
|
681 tree stype, unsigned savings)
|
|
682 {
|
|
683 slsr_cand_t c = (slsr_cand_t) obstack_alloc (&cand_obstack,
|
|
684 sizeof (slsr_cand));
|
|
685 c->cand_stmt = gs;
|
|
686 c->base_expr = base;
|
|
687 c->stride = stride;
|
|
688 c->index = index;
|
|
689 c->cand_type = ctype;
|
|
690 c->stride_type = stype;
|
|
691 c->kind = kind;
|
145
|
692 c->cand_num = cand_vec.length ();
|
111
|
693 c->next_interp = 0;
|
131
|
694 c->first_interp = c->cand_num;
|
111
|
695 c->dependent = 0;
|
|
696 c->sibling = 0;
|
|
697 c->def_phi = kind == CAND_MULT ? find_phi_def (base) : 0;
|
|
698 c->dead_savings = savings;
|
|
699 c->visited = 0;
|
|
700 c->cached_basis = NULL_TREE;
|
|
701
|
|
702 cand_vec.safe_push (c);
|
|
703
|
|
704 if (kind == CAND_PHI)
|
|
705 c->basis = 0;
|
|
706 else
|
|
707 c->basis = find_basis_for_candidate (c);
|
|
708
|
|
709 record_potential_basis (c, base);
|
|
710 if (flag_expensive_optimizations && kind == CAND_REF)
|
|
711 {
|
|
712 tree alt_base = get_alternative_base (base);
|
|
713 if (alt_base)
|
|
714 record_potential_basis (c, alt_base);
|
|
715 }
|
|
716
|
|
717 return c;
|
|
718 }
|
|
719
|
|
720 /* Determine the target cost of statement GS when compiling according
|
|
721 to SPEED. */
|
|
722
|
|
723 static int
|
|
724 stmt_cost (gimple *gs, bool speed)
|
|
725 {
|
|
726 tree lhs, rhs1, rhs2;
|
|
727 machine_mode lhs_mode;
|
|
728
|
|
729 gcc_assert (is_gimple_assign (gs));
|
|
730 lhs = gimple_assign_lhs (gs);
|
|
731 rhs1 = gimple_assign_rhs1 (gs);
|
|
732 lhs_mode = TYPE_MODE (TREE_TYPE (lhs));
|
|
733
|
|
734 switch (gimple_assign_rhs_code (gs))
|
|
735 {
|
|
736 case MULT_EXPR:
|
|
737 rhs2 = gimple_assign_rhs2 (gs);
|
|
738
|
|
739 if (tree_fits_shwi_p (rhs2))
|
|
740 return mult_by_coeff_cost (tree_to_shwi (rhs2), lhs_mode, speed);
|
|
741
|
|
742 gcc_assert (TREE_CODE (rhs1) != INTEGER_CST);
|
|
743 return mul_cost (speed, lhs_mode);
|
|
744
|
|
745 case PLUS_EXPR:
|
|
746 case POINTER_PLUS_EXPR:
|
|
747 case MINUS_EXPR:
|
|
748 return add_cost (speed, lhs_mode);
|
|
749
|
|
750 case NEGATE_EXPR:
|
|
751 return neg_cost (speed, lhs_mode);
|
|
752
|
|
753 CASE_CONVERT:
|
|
754 return convert_cost (lhs_mode, TYPE_MODE (TREE_TYPE (rhs1)), speed);
|
|
755
|
|
756 /* Note that we don't assign costs to copies that in most cases
|
|
757 will go away. */
|
|
758 case SSA_NAME:
|
|
759 return 0;
|
|
760
|
|
761 default:
|
|
762 ;
|
|
763 }
|
|
764
|
|
765 gcc_unreachable ();
|
|
766 return 0;
|
|
767 }
|
|
768
|
|
769 /* Look up the defining statement for BASE_IN and return a pointer
|
|
770 to its candidate in the candidate table, if any; otherwise NULL.
|
|
771 Only CAND_ADD and CAND_MULT candidates are returned. */
|
|
772
|
|
773 static slsr_cand_t
|
|
774 base_cand_from_table (tree base_in)
|
|
775 {
|
|
776 slsr_cand_t *result;
|
|
777
|
|
778 gimple *def = SSA_NAME_DEF_STMT (base_in);
|
|
779 if (!def)
|
|
780 return (slsr_cand_t) NULL;
|
|
781
|
|
782 result = stmt_cand_map->get (def);
|
|
783
|
|
784 if (result && (*result)->kind != CAND_REF)
|
|
785 return *result;
|
|
786
|
|
787 return (slsr_cand_t) NULL;
|
|
788 }
|
|
789
|
|
790 /* Add an entry to the statement-to-candidate mapping. */
|
|
791
|
|
792 static void
|
|
793 add_cand_for_stmt (gimple *gs, slsr_cand_t c)
|
|
794 {
|
|
795 gcc_assert (!stmt_cand_map->put (gs, c));
|
|
796 }
|
|
797
|
|
798 /* Given PHI which contains a phi statement, determine whether it
|
|
799 satisfies all the requirements of a phi candidate. If so, create
|
|
800 a candidate. Note that a CAND_PHI never has a basis itself, but
|
|
801 is used to help find a basis for subsequent candidates. */
|
|
802
|
|
803 static void
|
|
804 slsr_process_phi (gphi *phi, bool speed)
|
|
805 {
|
|
806 unsigned i;
|
|
807 tree arg0_base = NULL_TREE, base_type;
|
|
808 slsr_cand_t c;
|
145
|
809 class loop *cand_loop = gimple_bb (phi)->loop_father;
|
111
|
810 unsigned savings = 0;
|
|
811
|
|
812 /* A CAND_PHI requires each of its arguments to have the same
|
|
813 derived base name. (See the module header commentary for a
|
|
814 definition of derived base names.) Furthermore, all feeding
|
|
815 definitions must be in the same position in the loop hierarchy
|
|
816 as PHI. */
|
|
817
|
|
818 for (i = 0; i < gimple_phi_num_args (phi); i++)
|
|
819 {
|
|
820 slsr_cand_t arg_cand;
|
|
821 tree arg = gimple_phi_arg_def (phi, i);
|
|
822 tree derived_base_name = NULL_TREE;
|
|
823 gimple *arg_stmt = NULL;
|
|
824 basic_block arg_bb = NULL;
|
|
825
|
|
826 if (TREE_CODE (arg) != SSA_NAME)
|
|
827 return;
|
|
828
|
|
829 arg_cand = base_cand_from_table (arg);
|
|
830
|
|
831 if (arg_cand)
|
|
832 {
|
|
833 while (arg_cand->kind != CAND_ADD && arg_cand->kind != CAND_PHI)
|
|
834 {
|
|
835 if (!arg_cand->next_interp)
|
|
836 return;
|
|
837
|
|
838 arg_cand = lookup_cand (arg_cand->next_interp);
|
|
839 }
|
|
840
|
|
841 if (!integer_onep (arg_cand->stride))
|
|
842 return;
|
|
843
|
|
844 derived_base_name = arg_cand->base_expr;
|
|
845 arg_stmt = arg_cand->cand_stmt;
|
|
846 arg_bb = gimple_bb (arg_stmt);
|
|
847
|
|
848 /* Gather potential dead code savings if the phi statement
|
|
849 can be removed later on. */
|
|
850 if (uses_consumed_by_stmt (arg, phi))
|
|
851 {
|
|
852 if (gimple_code (arg_stmt) == GIMPLE_PHI)
|
|
853 savings += arg_cand->dead_savings;
|
|
854 else
|
|
855 savings += stmt_cost (arg_stmt, speed);
|
|
856 }
|
|
857 }
|
|
858 else if (SSA_NAME_IS_DEFAULT_DEF (arg))
|
|
859 {
|
|
860 derived_base_name = arg;
|
|
861 arg_bb = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun));
|
|
862 }
|
|
863
|
|
864 if (!arg_bb || arg_bb->loop_father != cand_loop)
|
|
865 return;
|
|
866
|
|
867 if (i == 0)
|
|
868 arg0_base = derived_base_name;
|
|
869 else if (!operand_equal_p (derived_base_name, arg0_base, 0))
|
|
870 return;
|
|
871 }
|
|
872
|
|
873 /* Create the candidate. "alloc_cand_and_find_basis" is named
|
|
874 misleadingly for this case, as no basis will be sought for a
|
|
875 CAND_PHI. */
|
|
876 base_type = TREE_TYPE (arg0_base);
|
|
877
|
|
878 c = alloc_cand_and_find_basis (CAND_PHI, phi, arg0_base,
|
|
879 0, integer_one_node, base_type,
|
|
880 sizetype, savings);
|
|
881
|
|
882 /* Add the candidate to the statement-candidate mapping. */
|
|
883 add_cand_for_stmt (phi, c);
|
|
884 }
|
|
885
|
|
886 /* Given PBASE which is a pointer to tree, look up the defining
|
|
887 statement for it and check whether the candidate is in the
|
|
888 form of:
|
|
889
|
|
890 X = B + (1 * S), S is integer constant
|
|
891 X = B + (i * S), S is integer one
|
|
892
|
|
893 If so, set PBASE to the candidate's base_expr and return double
|
|
894 int (i * S).
|
|
895 Otherwise, just return double int zero. */
|
|
896
|
|
897 static widest_int
|
|
898 backtrace_base_for_ref (tree *pbase)
|
|
899 {
|
|
900 tree base_in = *pbase;
|
|
901 slsr_cand_t base_cand;
|
|
902
|
|
903 STRIP_NOPS (base_in);
|
|
904
|
|
905 /* Strip off widening conversion(s) to handle cases where
|
|
906 e.g. 'B' is widened from an 'int' in order to calculate
|
|
907 a 64-bit address. */
|
|
908 if (CONVERT_EXPR_P (base_in)
|
|
909 && legal_cast_p_1 (TREE_TYPE (base_in),
|
|
910 TREE_TYPE (TREE_OPERAND (base_in, 0))))
|
|
911 base_in = get_unwidened (base_in, NULL_TREE);
|
|
912
|
|
913 if (TREE_CODE (base_in) != SSA_NAME)
|
|
914 return 0;
|
|
915
|
|
916 base_cand = base_cand_from_table (base_in);
|
|
917
|
|
918 while (base_cand && base_cand->kind != CAND_PHI)
|
|
919 {
|
|
920 if (base_cand->kind == CAND_ADD
|
|
921 && base_cand->index == 1
|
|
922 && TREE_CODE (base_cand->stride) == INTEGER_CST)
|
|
923 {
|
|
924 /* X = B + (1 * S), S is integer constant. */
|
|
925 *pbase = base_cand->base_expr;
|
|
926 return wi::to_widest (base_cand->stride);
|
|
927 }
|
|
928 else if (base_cand->kind == CAND_ADD
|
|
929 && TREE_CODE (base_cand->stride) == INTEGER_CST
|
|
930 && integer_onep (base_cand->stride))
|
|
931 {
|
|
932 /* X = B + (i * S), S is integer one. */
|
|
933 *pbase = base_cand->base_expr;
|
|
934 return base_cand->index;
|
|
935 }
|
|
936
|
145
|
937 base_cand = lookup_cand (base_cand->next_interp);
|
111
|
938 }
|
|
939
|
|
940 return 0;
|
|
941 }
|
|
942
|
|
943 /* Look for the following pattern:
|
|
944
|
|
945 *PBASE: MEM_REF (T1, C1)
|
|
946
|
|
947 *POFFSET: MULT_EXPR (T2, C3) [C2 is zero]
|
|
948 or
|
|
949 MULT_EXPR (PLUS_EXPR (T2, C2), C3)
|
|
950 or
|
|
951 MULT_EXPR (MINUS_EXPR (T2, -C2), C3)
|
|
952
|
|
953 *PINDEX: C4 * BITS_PER_UNIT
|
|
954
|
|
955 If not present, leave the input values unchanged and return FALSE.
|
|
956 Otherwise, modify the input values as follows and return TRUE:
|
|
957
|
|
958 *PBASE: T1
|
|
959 *POFFSET: MULT_EXPR (T2, C3)
|
|
960 *PINDEX: C1 + (C2 * C3) + C4
|
|
961
|
|
962 When T2 is recorded by a CAND_ADD in the form of (T2' + C5), it
|
|
963 will be further restructured to:
|
|
964
|
|
965 *PBASE: T1
|
|
966 *POFFSET: MULT_EXPR (T2', C3)
|
|
967 *PINDEX: C1 + (C2 * C3) + C4 + (C5 * C3) */
|
|
968
|
|
969 static bool
|
|
970 restructure_reference (tree *pbase, tree *poffset, widest_int *pindex,
|
|
971 tree *ptype)
|
|
972 {
|
|
973 tree base = *pbase, offset = *poffset;
|
|
974 widest_int index = *pindex;
|
|
975 tree mult_op0, t1, t2, type;
|
|
976 widest_int c1, c2, c3, c4, c5;
|
131
|
977 offset_int mem_offset;
|
111
|
978
|
|
979 if (!base
|
|
980 || !offset
|
|
981 || TREE_CODE (base) != MEM_REF
|
131
|
982 || !mem_ref_offset (base).is_constant (&mem_offset)
|
111
|
983 || TREE_CODE (offset) != MULT_EXPR
|
|
984 || TREE_CODE (TREE_OPERAND (offset, 1)) != INTEGER_CST
|
|
985 || wi::umod_floor (index, BITS_PER_UNIT) != 0)
|
|
986 return false;
|
|
987
|
|
988 t1 = TREE_OPERAND (base, 0);
|
131
|
989 c1 = widest_int::from (mem_offset, SIGNED);
|
111
|
990 type = TREE_TYPE (TREE_OPERAND (base, 1));
|
|
991
|
|
992 mult_op0 = TREE_OPERAND (offset, 0);
|
|
993 c3 = wi::to_widest (TREE_OPERAND (offset, 1));
|
|
994
|
|
995 if (TREE_CODE (mult_op0) == PLUS_EXPR)
|
|
996
|
|
997 if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
|
|
998 {
|
|
999 t2 = TREE_OPERAND (mult_op0, 0);
|
|
1000 c2 = wi::to_widest (TREE_OPERAND (mult_op0, 1));
|
|
1001 }
|
|
1002 else
|
|
1003 return false;
|
|
1004
|
|
1005 else if (TREE_CODE (mult_op0) == MINUS_EXPR)
|
|
1006
|
|
1007 if (TREE_CODE (TREE_OPERAND (mult_op0, 1)) == INTEGER_CST)
|
|
1008 {
|
|
1009 t2 = TREE_OPERAND (mult_op0, 0);
|
|
1010 c2 = -wi::to_widest (TREE_OPERAND (mult_op0, 1));
|
|
1011 }
|
|
1012 else
|
|
1013 return false;
|
|
1014
|
|
1015 else
|
|
1016 {
|
|
1017 t2 = mult_op0;
|
|
1018 c2 = 0;
|
|
1019 }
|
|
1020
|
|
1021 c4 = index >> LOG2_BITS_PER_UNIT;
|
|
1022 c5 = backtrace_base_for_ref (&t2);
|
|
1023
|
|
1024 *pbase = t1;
|
|
1025 *poffset = fold_build2 (MULT_EXPR, sizetype, fold_convert (sizetype, t2),
|
|
1026 wide_int_to_tree (sizetype, c3));
|
|
1027 *pindex = c1 + c2 * c3 + c4 + c5 * c3;
|
|
1028 *ptype = type;
|
|
1029
|
|
1030 return true;
|
|
1031 }
|
|
1032
|
|
1033 /* Given GS which contains a data reference, create a CAND_REF entry in
|
|
1034 the candidate table and attempt to find a basis. */
|
|
1035
|
|
1036 static void
|
|
1037 slsr_process_ref (gimple *gs)
|
|
1038 {
|
|
1039 tree ref_expr, base, offset, type;
|
131
|
1040 poly_int64 bitsize, bitpos;
|
111
|
1041 machine_mode mode;
|
|
1042 int unsignedp, reversep, volatilep;
|
|
1043 slsr_cand_t c;
|
|
1044
|
|
1045 if (gimple_vdef (gs))
|
|
1046 ref_expr = gimple_assign_lhs (gs);
|
|
1047 else
|
|
1048 ref_expr = gimple_assign_rhs1 (gs);
|
|
1049
|
|
1050 if (!handled_component_p (ref_expr)
|
|
1051 || TREE_CODE (ref_expr) == BIT_FIELD_REF
|
|
1052 || (TREE_CODE (ref_expr) == COMPONENT_REF
|
|
1053 && DECL_BIT_FIELD (TREE_OPERAND (ref_expr, 1))))
|
|
1054 return;
|
|
1055
|
|
1056 base = get_inner_reference (ref_expr, &bitsize, &bitpos, &offset, &mode,
|
|
1057 &unsignedp, &reversep, &volatilep);
|
131
|
1058 HOST_WIDE_INT cbitpos;
|
|
1059 if (reversep || !bitpos.is_constant (&cbitpos))
|
111
|
1060 return;
|
131
|
1061 widest_int index = cbitpos;
|
111
|
1062
|
|
1063 if (!restructure_reference (&base, &offset, &index, &type))
|
|
1064 return;
|
|
1065
|
|
1066 c = alloc_cand_and_find_basis (CAND_REF, gs, base, index, offset,
|
|
1067 type, sizetype, 0);
|
|
1068
|
|
1069 /* Add the candidate to the statement-candidate mapping. */
|
|
1070 add_cand_for_stmt (gs, c);
|
|
1071 }
|
|
1072
|
|
1073 /* Create a candidate entry for a statement GS, where GS multiplies
|
|
1074 two SSA names BASE_IN and STRIDE_IN. Propagate any known information
|
|
1075 about the two SSA names into the new candidate. Return the new
|
|
1076 candidate. */
|
|
1077
|
|
1078 static slsr_cand_t
|
|
1079 create_mul_ssa_cand (gimple *gs, tree base_in, tree stride_in, bool speed)
|
|
1080 {
|
|
1081 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
|
|
1082 tree stype = NULL_TREE;
|
|
1083 widest_int index;
|
|
1084 unsigned savings = 0;
|
|
1085 slsr_cand_t c;
|
|
1086 slsr_cand_t base_cand = base_cand_from_table (base_in);
|
|
1087
|
|
1088 /* Look at all interpretations of the base candidate, if necessary,
|
|
1089 to find information to propagate into this candidate. */
|
|
1090 while (base_cand && !base && base_cand->kind != CAND_PHI)
|
|
1091 {
|
|
1092
|
|
1093 if (base_cand->kind == CAND_MULT && integer_onep (base_cand->stride))
|
|
1094 {
|
|
1095 /* Y = (B + i') * 1
|
|
1096 X = Y * Z
|
|
1097 ================
|
|
1098 X = (B + i') * Z */
|
|
1099 base = base_cand->base_expr;
|
|
1100 index = base_cand->index;
|
|
1101 stride = stride_in;
|
|
1102 ctype = base_cand->cand_type;
|
|
1103 stype = TREE_TYPE (stride_in);
|
|
1104 if (has_single_use (base_in))
|
|
1105 savings = (base_cand->dead_savings
|
|
1106 + stmt_cost (base_cand->cand_stmt, speed));
|
|
1107 }
|
|
1108 else if (base_cand->kind == CAND_ADD
|
|
1109 && TREE_CODE (base_cand->stride) == INTEGER_CST)
|
|
1110 {
|
|
1111 /* Y = B + (i' * S), S constant
|
|
1112 X = Y * Z
|
|
1113 ============================
|
|
1114 X = B + ((i' * S) * Z) */
|
|
1115 base = base_cand->base_expr;
|
|
1116 index = base_cand->index * wi::to_widest (base_cand->stride);
|
|
1117 stride = stride_in;
|
|
1118 ctype = base_cand->cand_type;
|
|
1119 stype = TREE_TYPE (stride_in);
|
|
1120 if (has_single_use (base_in))
|
|
1121 savings = (base_cand->dead_savings
|
|
1122 + stmt_cost (base_cand->cand_stmt, speed));
|
|
1123 }
|
|
1124
|
145
|
1125 base_cand = lookup_cand (base_cand->next_interp);
|
111
|
1126 }
|
|
1127
|
|
1128 if (!base)
|
|
1129 {
|
|
1130 /* No interpretations had anything useful to propagate, so
|
|
1131 produce X = (Y + 0) * Z. */
|
|
1132 base = base_in;
|
|
1133 index = 0;
|
|
1134 stride = stride_in;
|
|
1135 ctype = TREE_TYPE (base_in);
|
|
1136 stype = TREE_TYPE (stride_in);
|
|
1137 }
|
|
1138
|
|
1139 c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
|
|
1140 ctype, stype, savings);
|
|
1141 return c;
|
|
1142 }
|
|
1143
|
|
1144 /* Create a candidate entry for a statement GS, where GS multiplies
|
|
1145 SSA name BASE_IN by constant STRIDE_IN. Propagate any known
|
|
1146 information about BASE_IN into the new candidate. Return the new
|
|
1147 candidate. */
|
|
1148
|
|
1149 static slsr_cand_t
|
|
1150 create_mul_imm_cand (gimple *gs, tree base_in, tree stride_in, bool speed)
|
|
1151 {
|
|
1152 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
|
|
1153 widest_int index, temp;
|
|
1154 unsigned savings = 0;
|
|
1155 slsr_cand_t c;
|
|
1156 slsr_cand_t base_cand = base_cand_from_table (base_in);
|
|
1157
|
|
1158 /* Look at all interpretations of the base candidate, if necessary,
|
|
1159 to find information to propagate into this candidate. */
|
|
1160 while (base_cand && !base && base_cand->kind != CAND_PHI)
|
|
1161 {
|
|
1162 if (base_cand->kind == CAND_MULT
|
|
1163 && TREE_CODE (base_cand->stride) == INTEGER_CST)
|
|
1164 {
|
|
1165 /* Y = (B + i') * S, S constant
|
|
1166 X = Y * c
|
|
1167 ============================
|
|
1168 X = (B + i') * (S * c) */
|
|
1169 temp = wi::to_widest (base_cand->stride) * wi::to_widest (stride_in);
|
|
1170 if (wi::fits_to_tree_p (temp, TREE_TYPE (stride_in)))
|
|
1171 {
|
|
1172 base = base_cand->base_expr;
|
|
1173 index = base_cand->index;
|
|
1174 stride = wide_int_to_tree (TREE_TYPE (stride_in), temp);
|
|
1175 ctype = base_cand->cand_type;
|
|
1176 if (has_single_use (base_in))
|
|
1177 savings = (base_cand->dead_savings
|
|
1178 + stmt_cost (base_cand->cand_stmt, speed));
|
|
1179 }
|
|
1180 }
|
|
1181 else if (base_cand->kind == CAND_ADD && integer_onep (base_cand->stride))
|
|
1182 {
|
|
1183 /* Y = B + (i' * 1)
|
|
1184 X = Y * c
|
|
1185 ===========================
|
|
1186 X = (B + i') * c */
|
|
1187 base = base_cand->base_expr;
|
|
1188 index = base_cand->index;
|
|
1189 stride = stride_in;
|
|
1190 ctype = base_cand->cand_type;
|
|
1191 if (has_single_use (base_in))
|
|
1192 savings = (base_cand->dead_savings
|
|
1193 + stmt_cost (base_cand->cand_stmt, speed));
|
|
1194 }
|
|
1195 else if (base_cand->kind == CAND_ADD
|
|
1196 && base_cand->index == 1
|
|
1197 && TREE_CODE (base_cand->stride) == INTEGER_CST)
|
|
1198 {
|
|
1199 /* Y = B + (1 * S), S constant
|
|
1200 X = Y * c
|
|
1201 ===========================
|
|
1202 X = (B + S) * c */
|
|
1203 base = base_cand->base_expr;
|
|
1204 index = wi::to_widest (base_cand->stride);
|
|
1205 stride = stride_in;
|
|
1206 ctype = base_cand->cand_type;
|
|
1207 if (has_single_use (base_in))
|
|
1208 savings = (base_cand->dead_savings
|
|
1209 + stmt_cost (base_cand->cand_stmt, speed));
|
|
1210 }
|
|
1211
|
145
|
1212 base_cand = lookup_cand (base_cand->next_interp);
|
111
|
1213 }
|
|
1214
|
|
1215 if (!base)
|
|
1216 {
|
|
1217 /* No interpretations had anything useful to propagate, so
|
|
1218 produce X = (Y + 0) * c. */
|
|
1219 base = base_in;
|
|
1220 index = 0;
|
|
1221 stride = stride_in;
|
|
1222 ctype = TREE_TYPE (base_in);
|
|
1223 }
|
|
1224
|
|
1225 c = alloc_cand_and_find_basis (CAND_MULT, gs, base, index, stride,
|
|
1226 ctype, sizetype, savings);
|
|
1227 return c;
|
|
1228 }
|
|
1229
|
|
1230 /* Given GS which is a multiply of scalar integers, make an appropriate
|
|
1231 entry in the candidate table. If this is a multiply of two SSA names,
|
|
1232 create two CAND_MULT interpretations and attempt to find a basis for
|
|
1233 each of them. Otherwise, create a single CAND_MULT and attempt to
|
|
1234 find a basis. */
|
|
1235
|
|
1236 static void
|
|
1237 slsr_process_mul (gimple *gs, tree rhs1, tree rhs2, bool speed)
|
|
1238 {
|
|
1239 slsr_cand_t c, c2;
|
|
1240
|
|
1241 /* If this is a multiply of an SSA name with itself, it is highly
|
|
1242 unlikely that we will get a strength reduction opportunity, so
|
|
1243 don't record it as a candidate. This simplifies the logic for
|
|
1244 finding a basis, so if this is removed that must be considered. */
|
|
1245 if (rhs1 == rhs2)
|
|
1246 return;
|
|
1247
|
|
1248 if (TREE_CODE (rhs2) == SSA_NAME)
|
|
1249 {
|
|
1250 /* Record an interpretation of this statement in the candidate table
|
|
1251 assuming RHS1 is the base expression and RHS2 is the stride. */
|
|
1252 c = create_mul_ssa_cand (gs, rhs1, rhs2, speed);
|
|
1253
|
|
1254 /* Add the first interpretation to the statement-candidate mapping. */
|
|
1255 add_cand_for_stmt (gs, c);
|
|
1256
|
|
1257 /* Record another interpretation of this statement assuming RHS1
|
|
1258 is the stride and RHS2 is the base expression. */
|
|
1259 c2 = create_mul_ssa_cand (gs, rhs2, rhs1, speed);
|
|
1260 c->next_interp = c2->cand_num;
|
131
|
1261 c2->first_interp = c->cand_num;
|
111
|
1262 }
|
145
|
1263 else if (TREE_CODE (rhs2) == INTEGER_CST && !integer_zerop (rhs2))
|
111
|
1264 {
|
|
1265 /* Record an interpretation for the multiply-immediate. */
|
|
1266 c = create_mul_imm_cand (gs, rhs1, rhs2, speed);
|
|
1267
|
|
1268 /* Add the interpretation to the statement-candidate mapping. */
|
|
1269 add_cand_for_stmt (gs, c);
|
|
1270 }
|
|
1271 }
|
|
1272
|
|
1273 /* Create a candidate entry for a statement GS, where GS adds two
|
|
1274 SSA names BASE_IN and ADDEND_IN if SUBTRACT_P is false, and
|
|
1275 subtracts ADDEND_IN from BASE_IN otherwise. Propagate any known
|
|
1276 information about the two SSA names into the new candidate.
|
|
1277 Return the new candidate. */
|
|
1278
|
|
1279 static slsr_cand_t
|
|
1280 create_add_ssa_cand (gimple *gs, tree base_in, tree addend_in,
|
|
1281 bool subtract_p, bool speed)
|
|
1282 {
|
|
1283 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
|
|
1284 tree stype = NULL_TREE;
|
|
1285 widest_int index;
|
|
1286 unsigned savings = 0;
|
|
1287 slsr_cand_t c;
|
|
1288 slsr_cand_t base_cand = base_cand_from_table (base_in);
|
|
1289 slsr_cand_t addend_cand = base_cand_from_table (addend_in);
|
|
1290
|
|
1291 /* The most useful transformation is a multiply-immediate feeding
|
|
1292 an add or subtract. Look for that first. */
|
|
1293 while (addend_cand && !base && addend_cand->kind != CAND_PHI)
|
|
1294 {
|
|
1295 if (addend_cand->kind == CAND_MULT
|
|
1296 && addend_cand->index == 0
|
|
1297 && TREE_CODE (addend_cand->stride) == INTEGER_CST)
|
|
1298 {
|
|
1299 /* Z = (B + 0) * S, S constant
|
|
1300 X = Y +/- Z
|
|
1301 ===========================
|
|
1302 X = Y + ((+/-1 * S) * B) */
|
|
1303 base = base_in;
|
|
1304 index = wi::to_widest (addend_cand->stride);
|
|
1305 if (subtract_p)
|
|
1306 index = -index;
|
|
1307 stride = addend_cand->base_expr;
|
|
1308 ctype = TREE_TYPE (base_in);
|
|
1309 stype = addend_cand->cand_type;
|
|
1310 if (has_single_use (addend_in))
|
|
1311 savings = (addend_cand->dead_savings
|
|
1312 + stmt_cost (addend_cand->cand_stmt, speed));
|
|
1313 }
|
|
1314
|
145
|
1315 addend_cand = lookup_cand (addend_cand->next_interp);
|
111
|
1316 }
|
|
1317
|
|
1318 while (base_cand && !base && base_cand->kind != CAND_PHI)
|
|
1319 {
|
|
1320 if (base_cand->kind == CAND_ADD
|
|
1321 && (base_cand->index == 0
|
|
1322 || operand_equal_p (base_cand->stride,
|
|
1323 integer_zero_node, 0)))
|
|
1324 {
|
|
1325 /* Y = B + (i' * S), i' * S = 0
|
|
1326 X = Y +/- Z
|
|
1327 ============================
|
|
1328 X = B + (+/-1 * Z) */
|
|
1329 base = base_cand->base_expr;
|
|
1330 index = subtract_p ? -1 : 1;
|
|
1331 stride = addend_in;
|
|
1332 ctype = base_cand->cand_type;
|
|
1333 stype = (TREE_CODE (addend_in) == INTEGER_CST ? sizetype
|
|
1334 : TREE_TYPE (addend_in));
|
|
1335 if (has_single_use (base_in))
|
|
1336 savings = (base_cand->dead_savings
|
|
1337 + stmt_cost (base_cand->cand_stmt, speed));
|
|
1338 }
|
|
1339 else if (subtract_p)
|
|
1340 {
|
|
1341 slsr_cand_t subtrahend_cand = base_cand_from_table (addend_in);
|
|
1342
|
|
1343 while (subtrahend_cand && !base && subtrahend_cand->kind != CAND_PHI)
|
|
1344 {
|
|
1345 if (subtrahend_cand->kind == CAND_MULT
|
|
1346 && subtrahend_cand->index == 0
|
|
1347 && TREE_CODE (subtrahend_cand->stride) == INTEGER_CST)
|
|
1348 {
|
|
1349 /* Z = (B + 0) * S, S constant
|
|
1350 X = Y - Z
|
|
1351 ===========================
|
|
1352 Value: X = Y + ((-1 * S) * B) */
|
|
1353 base = base_in;
|
|
1354 index = wi::to_widest (subtrahend_cand->stride);
|
|
1355 index = -index;
|
|
1356 stride = subtrahend_cand->base_expr;
|
|
1357 ctype = TREE_TYPE (base_in);
|
|
1358 stype = subtrahend_cand->cand_type;
|
|
1359 if (has_single_use (addend_in))
|
|
1360 savings = (subtrahend_cand->dead_savings
|
|
1361 + stmt_cost (subtrahend_cand->cand_stmt, speed));
|
|
1362 }
|
145
|
1363
|
|
1364 subtrahend_cand = lookup_cand (subtrahend_cand->next_interp);
|
111
|
1365 }
|
|
1366 }
|
|
1367
|
145
|
1368 base_cand = lookup_cand (base_cand->next_interp);
|
111
|
1369 }
|
|
1370
|
|
1371 if (!base)
|
|
1372 {
|
|
1373 /* No interpretations had anything useful to propagate, so
|
|
1374 produce X = Y + (1 * Z). */
|
|
1375 base = base_in;
|
|
1376 index = subtract_p ? -1 : 1;
|
|
1377 stride = addend_in;
|
|
1378 ctype = TREE_TYPE (base_in);
|
|
1379 stype = (TREE_CODE (addend_in) == INTEGER_CST ? sizetype
|
|
1380 : TREE_TYPE (addend_in));
|
|
1381 }
|
|
1382
|
|
1383 c = alloc_cand_and_find_basis (CAND_ADD, gs, base, index, stride,
|
|
1384 ctype, stype, savings);
|
|
1385 return c;
|
|
1386 }
|
|
1387
|
|
1388 /* Create a candidate entry for a statement GS, where GS adds SSA
|
|
1389 name BASE_IN to constant INDEX_IN. Propagate any known information
|
|
1390 about BASE_IN into the new candidate. Return the new candidate. */
|
|
1391
|
|
1392 static slsr_cand_t
|
|
1393 create_add_imm_cand (gimple *gs, tree base_in, const widest_int &index_in,
|
|
1394 bool speed)
|
|
1395 {
|
|
1396 enum cand_kind kind = CAND_ADD;
|
|
1397 tree base = NULL_TREE, stride = NULL_TREE, ctype = NULL_TREE;
|
|
1398 tree stype = NULL_TREE;
|
|
1399 widest_int index, multiple;
|
|
1400 unsigned savings = 0;
|
|
1401 slsr_cand_t c;
|
|
1402 slsr_cand_t base_cand = base_cand_from_table (base_in);
|
|
1403
|
|
1404 while (base_cand && !base && base_cand->kind != CAND_PHI)
|
|
1405 {
|
|
1406 signop sign = TYPE_SIGN (TREE_TYPE (base_cand->stride));
|
|
1407
|
|
1408 if (TREE_CODE (base_cand->stride) == INTEGER_CST
|
|
1409 && wi::multiple_of_p (index_in, wi::to_widest (base_cand->stride),
|
|
1410 sign, &multiple))
|
|
1411 {
|
|
1412 /* Y = (B + i') * S, S constant, c = kS for some integer k
|
|
1413 X = Y + c
|
|
1414 ============================
|
|
1415 X = (B + (i'+ k)) * S
|
|
1416 OR
|
|
1417 Y = B + (i' * S), S constant, c = kS for some integer k
|
|
1418 X = Y + c
|
|
1419 ============================
|
|
1420 X = (B + (i'+ k)) * S */
|
|
1421 kind = base_cand->kind;
|
|
1422 base = base_cand->base_expr;
|
|
1423 index = base_cand->index + multiple;
|
|
1424 stride = base_cand->stride;
|
|
1425 ctype = base_cand->cand_type;
|
|
1426 stype = base_cand->stride_type;
|
|
1427 if (has_single_use (base_in))
|
|
1428 savings = (base_cand->dead_savings
|
|
1429 + stmt_cost (base_cand->cand_stmt, speed));
|
|
1430 }
|
|
1431
|
145
|
1432 base_cand = lookup_cand (base_cand->next_interp);
|
111
|
1433 }
|
|
1434
|
|
1435 if (!base)
|
|
1436 {
|
|
1437 /* No interpretations had anything useful to propagate, so
|
|
1438 produce X = Y + (c * 1). */
|
|
1439 kind = CAND_ADD;
|
|
1440 base = base_in;
|
|
1441 index = index_in;
|
|
1442 stride = integer_one_node;
|
|
1443 ctype = TREE_TYPE (base_in);
|
|
1444 stype = sizetype;
|
|
1445 }
|
|
1446
|
|
1447 c = alloc_cand_and_find_basis (kind, gs, base, index, stride,
|
|
1448 ctype, stype, savings);
|
|
1449 return c;
|
|
1450 }
|
|
1451
|
|
1452 /* Given GS which is an add or subtract of scalar integers or pointers,
|
|
1453 make at least one appropriate entry in the candidate table. */
|
|
1454
|
|
1455 static void
|
|
1456 slsr_process_add (gimple *gs, tree rhs1, tree rhs2, bool speed)
|
|
1457 {
|
|
1458 bool subtract_p = gimple_assign_rhs_code (gs) == MINUS_EXPR;
|
|
1459 slsr_cand_t c = NULL, c2;
|
|
1460
|
|
1461 if (TREE_CODE (rhs2) == SSA_NAME)
|
|
1462 {
|
|
1463 /* First record an interpretation assuming RHS1 is the base expression
|
|
1464 and RHS2 is the stride. But it doesn't make sense for the
|
|
1465 stride to be a pointer, so don't record a candidate in that case. */
|
|
1466 if (!POINTER_TYPE_P (TREE_TYPE (rhs2)))
|
|
1467 {
|
|
1468 c = create_add_ssa_cand (gs, rhs1, rhs2, subtract_p, speed);
|
|
1469
|
|
1470 /* Add the first interpretation to the statement-candidate
|
|
1471 mapping. */
|
|
1472 add_cand_for_stmt (gs, c);
|
|
1473 }
|
|
1474
|
|
1475 /* If the two RHS operands are identical, or this is a subtract,
|
|
1476 we're done. */
|
|
1477 if (operand_equal_p (rhs1, rhs2, 0) || subtract_p)
|
|
1478 return;
|
|
1479
|
|
1480 /* Otherwise, record another interpretation assuming RHS2 is the
|
|
1481 base expression and RHS1 is the stride, again provided that the
|
|
1482 stride is not a pointer. */
|
|
1483 if (!POINTER_TYPE_P (TREE_TYPE (rhs1)))
|
|
1484 {
|
|
1485 c2 = create_add_ssa_cand (gs, rhs2, rhs1, false, speed);
|
|
1486 if (c)
|
131
|
1487 {
|
|
1488 c->next_interp = c2->cand_num;
|
|
1489 c2->first_interp = c->cand_num;
|
|
1490 }
|
111
|
1491 else
|
|
1492 add_cand_for_stmt (gs, c2);
|
|
1493 }
|
|
1494 }
|
131
|
1495 else if (TREE_CODE (rhs2) == INTEGER_CST)
|
111
|
1496 {
|
|
1497 /* Record an interpretation for the add-immediate. */
|
|
1498 widest_int index = wi::to_widest (rhs2);
|
|
1499 if (subtract_p)
|
|
1500 index = -index;
|
|
1501
|
|
1502 c = create_add_imm_cand (gs, rhs1, index, speed);
|
|
1503
|
|
1504 /* Add the interpretation to the statement-candidate mapping. */
|
|
1505 add_cand_for_stmt (gs, c);
|
|
1506 }
|
|
1507 }
|
|
1508
|
|
1509 /* Given GS which is a negate of a scalar integer, make an appropriate
|
|
1510 entry in the candidate table. A negate is equivalent to a multiply
|
|
1511 by -1. */
|
|
1512
|
|
1513 static void
|
|
1514 slsr_process_neg (gimple *gs, tree rhs1, bool speed)
|
|
1515 {
|
|
1516 /* Record a CAND_MULT interpretation for the multiply by -1. */
|
|
1517 slsr_cand_t c = create_mul_imm_cand (gs, rhs1, integer_minus_one_node, speed);
|
|
1518
|
|
1519 /* Add the interpretation to the statement-candidate mapping. */
|
|
1520 add_cand_for_stmt (gs, c);
|
|
1521 }
|
|
1522
|
|
1523 /* Help function for legal_cast_p, operating on two trees. Checks
|
|
1524 whether it's allowable to cast from RHS to LHS. See legal_cast_p
|
|
1525 for more details. */
|
|
1526
|
|
1527 static bool
|
|
1528 legal_cast_p_1 (tree lhs_type, tree rhs_type)
|
|
1529 {
|
|
1530 unsigned lhs_size, rhs_size;
|
|
1531 bool lhs_wraps, rhs_wraps;
|
|
1532
|
|
1533 lhs_size = TYPE_PRECISION (lhs_type);
|
|
1534 rhs_size = TYPE_PRECISION (rhs_type);
|
|
1535 lhs_wraps = ANY_INTEGRAL_TYPE_P (lhs_type) && TYPE_OVERFLOW_WRAPS (lhs_type);
|
|
1536 rhs_wraps = ANY_INTEGRAL_TYPE_P (rhs_type) && TYPE_OVERFLOW_WRAPS (rhs_type);
|
|
1537
|
|
1538 if (lhs_size < rhs_size
|
|
1539 || (rhs_wraps && !lhs_wraps)
|
|
1540 || (rhs_wraps && lhs_wraps && rhs_size != lhs_size))
|
|
1541 return false;
|
|
1542
|
|
1543 return true;
|
|
1544 }
|
|
1545
|
|
1546 /* Return TRUE if GS is a statement that defines an SSA name from
|
|
1547 a conversion and is legal for us to combine with an add and multiply
|
|
1548 in the candidate table. For example, suppose we have:
|
|
1549
|
|
1550 A = B + i;
|
|
1551 C = (type) A;
|
|
1552 D = C * S;
|
|
1553
|
|
1554 Without the type-cast, we would create a CAND_MULT for D with base B,
|
|
1555 index i, and stride S. We want to record this candidate only if it
|
|
1556 is equivalent to apply the type cast following the multiply:
|
|
1557
|
|
1558 A = B + i;
|
|
1559 E = A * S;
|
|
1560 D = (type) E;
|
|
1561
|
|
1562 We will record the type with the candidate for D. This allows us
|
|
1563 to use a similar previous candidate as a basis. If we have earlier seen
|
|
1564
|
|
1565 A' = B + i';
|
|
1566 C' = (type) A';
|
|
1567 D' = C' * S;
|
|
1568
|
|
1569 we can replace D with
|
|
1570
|
|
1571 D = D' + (i - i') * S;
|
|
1572
|
|
1573 But if moving the type-cast would change semantics, we mustn't do this.
|
|
1574
|
|
1575 This is legitimate for casts from a non-wrapping integral type to
|
|
1576 any integral type of the same or larger size. It is not legitimate
|
|
1577 to convert a wrapping type to a non-wrapping type, or to a wrapping
|
|
1578 type of a different size. I.e., with a wrapping type, we must
|
|
1579 assume that the addition B + i could wrap, in which case performing
|
|
1580 the multiply before or after one of the "illegal" type casts will
|
|
1581 have different semantics. */
|
|
1582
|
|
1583 static bool
|
|
1584 legal_cast_p (gimple *gs, tree rhs)
|
|
1585 {
|
|
1586 if (!is_gimple_assign (gs)
|
|
1587 || !CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (gs)))
|
|
1588 return false;
|
|
1589
|
|
1590 return legal_cast_p_1 (TREE_TYPE (gimple_assign_lhs (gs)), TREE_TYPE (rhs));
|
|
1591 }
|
|
1592
|
|
1593 /* Given GS which is a cast to a scalar integer type, determine whether
|
|
1594 the cast is legal for strength reduction. If so, make at least one
|
|
1595 appropriate entry in the candidate table. */
|
|
1596
|
|
1597 static void
|
|
1598 slsr_process_cast (gimple *gs, tree rhs1, bool speed)
|
|
1599 {
|
|
1600 tree lhs, ctype;
|
|
1601 slsr_cand_t base_cand, c = NULL, c2;
|
|
1602 unsigned savings = 0;
|
|
1603
|
|
1604 if (!legal_cast_p (gs, rhs1))
|
|
1605 return;
|
|
1606
|
|
1607 lhs = gimple_assign_lhs (gs);
|
|
1608 base_cand = base_cand_from_table (rhs1);
|
|
1609 ctype = TREE_TYPE (lhs);
|
|
1610
|
|
1611 if (base_cand && base_cand->kind != CAND_PHI)
|
|
1612 {
|
131
|
1613 slsr_cand_t first_cand = NULL;
|
|
1614
|
111
|
1615 while (base_cand)
|
|
1616 {
|
|
1617 /* Propagate all data from the base candidate except the type,
|
|
1618 which comes from the cast, and the base candidate's cast,
|
|
1619 which is no longer applicable. */
|
|
1620 if (has_single_use (rhs1))
|
|
1621 savings = (base_cand->dead_savings
|
|
1622 + stmt_cost (base_cand->cand_stmt, speed));
|
|
1623
|
|
1624 c = alloc_cand_and_find_basis (base_cand->kind, gs,
|
|
1625 base_cand->base_expr,
|
|
1626 base_cand->index, base_cand->stride,
|
|
1627 ctype, base_cand->stride_type,
|
|
1628 savings);
|
131
|
1629 if (!first_cand)
|
|
1630 first_cand = c;
|
|
1631
|
|
1632 if (first_cand != c)
|
|
1633 c->first_interp = first_cand->cand_num;
|
|
1634
|
145
|
1635 base_cand = lookup_cand (base_cand->next_interp);
|
111
|
1636 }
|
|
1637 }
|
|
1638 else
|
|
1639 {
|
|
1640 /* If nothing is known about the RHS, create fresh CAND_ADD and
|
|
1641 CAND_MULT interpretations:
|
|
1642
|
|
1643 X = Y + (0 * 1)
|
|
1644 X = (Y + 0) * 1
|
|
1645
|
|
1646 The first of these is somewhat arbitrary, but the choice of
|
|
1647 1 for the stride simplifies the logic for propagating casts
|
|
1648 into their uses. */
|
|
1649 c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, 0,
|
|
1650 integer_one_node, ctype, sizetype, 0);
|
|
1651 c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, 0,
|
|
1652 integer_one_node, ctype, sizetype, 0);
|
|
1653 c->next_interp = c2->cand_num;
|
131
|
1654 c2->first_interp = c->cand_num;
|
111
|
1655 }
|
|
1656
|
|
1657 /* Add the first (or only) interpretation to the statement-candidate
|
|
1658 mapping. */
|
|
1659 add_cand_for_stmt (gs, c);
|
|
1660 }
|
|
1661
|
|
1662 /* Given GS which is a copy of a scalar integer type, make at least one
|
|
1663 appropriate entry in the candidate table.
|
|
1664
|
|
1665 This interface is included for completeness, but is unnecessary
|
|
1666 if this pass immediately follows a pass that performs copy
|
|
1667 propagation, such as DOM. */
|
|
1668
|
|
1669 static void
|
|
1670 slsr_process_copy (gimple *gs, tree rhs1, bool speed)
|
|
1671 {
|
|
1672 slsr_cand_t base_cand, c = NULL, c2;
|
|
1673 unsigned savings = 0;
|
|
1674
|
|
1675 base_cand = base_cand_from_table (rhs1);
|
|
1676
|
|
1677 if (base_cand && base_cand->kind != CAND_PHI)
|
|
1678 {
|
131
|
1679 slsr_cand_t first_cand = NULL;
|
|
1680
|
111
|
1681 while (base_cand)
|
|
1682 {
|
|
1683 /* Propagate all data from the base candidate. */
|
|
1684 if (has_single_use (rhs1))
|
|
1685 savings = (base_cand->dead_savings
|
|
1686 + stmt_cost (base_cand->cand_stmt, speed));
|
|
1687
|
|
1688 c = alloc_cand_and_find_basis (base_cand->kind, gs,
|
|
1689 base_cand->base_expr,
|
|
1690 base_cand->index, base_cand->stride,
|
|
1691 base_cand->cand_type,
|
|
1692 base_cand->stride_type, savings);
|
131
|
1693 if (!first_cand)
|
|
1694 first_cand = c;
|
|
1695
|
|
1696 if (first_cand != c)
|
|
1697 c->first_interp = first_cand->cand_num;
|
|
1698
|
145
|
1699 base_cand = lookup_cand (base_cand->next_interp);
|
111
|
1700 }
|
|
1701 }
|
|
1702 else
|
|
1703 {
|
|
1704 /* If nothing is known about the RHS, create fresh CAND_ADD and
|
|
1705 CAND_MULT interpretations:
|
|
1706
|
|
1707 X = Y + (0 * 1)
|
|
1708 X = (Y + 0) * 1
|
|
1709
|
|
1710 The first of these is somewhat arbitrary, but the choice of
|
|
1711 1 for the stride simplifies the logic for propagating casts
|
|
1712 into their uses. */
|
|
1713 c = alloc_cand_and_find_basis (CAND_ADD, gs, rhs1, 0,
|
|
1714 integer_one_node, TREE_TYPE (rhs1),
|
|
1715 sizetype, 0);
|
|
1716 c2 = alloc_cand_and_find_basis (CAND_MULT, gs, rhs1, 0,
|
|
1717 integer_one_node, TREE_TYPE (rhs1),
|
|
1718 sizetype, 0);
|
|
1719 c->next_interp = c2->cand_num;
|
131
|
1720 c2->first_interp = c->cand_num;
|
111
|
1721 }
|
|
1722
|
|
1723 /* Add the first (or only) interpretation to the statement-candidate
|
|
1724 mapping. */
|
|
1725 add_cand_for_stmt (gs, c);
|
|
1726 }
|
|
1727
|
|
1728 class find_candidates_dom_walker : public dom_walker
|
|
1729 {
|
|
1730 public:
|
|
1731 find_candidates_dom_walker (cdi_direction direction)
|
|
1732 : dom_walker (direction) {}
|
|
1733 virtual edge before_dom_children (basic_block);
|
|
1734 };
|
|
1735
|
|
1736 /* Find strength-reduction candidates in block BB. */
|
|
1737
|
|
1738 edge
|
|
1739 find_candidates_dom_walker::before_dom_children (basic_block bb)
|
|
1740 {
|
|
1741 bool speed = optimize_bb_for_speed_p (bb);
|
|
1742
|
|
1743 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi);
|
|
1744 gsi_next (&gsi))
|
|
1745 slsr_process_phi (gsi.phi (), speed);
|
|
1746
|
|
1747 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
|
|
1748 gsi_next (&gsi))
|
|
1749 {
|
|
1750 gimple *gs = gsi_stmt (gsi);
|
|
1751
|
131
|
1752 if (stmt_could_throw_p (cfun, gs))
|
|
1753 continue;
|
|
1754
|
111
|
1755 if (gimple_vuse (gs) && gimple_assign_single_p (gs))
|
|
1756 slsr_process_ref (gs);
|
|
1757
|
|
1758 else if (is_gimple_assign (gs)
|
|
1759 && (INTEGRAL_TYPE_P (TREE_TYPE (gimple_assign_lhs (gs)))
|
|
1760 || POINTER_TYPE_P (TREE_TYPE (gimple_assign_lhs (gs)))))
|
|
1761 {
|
|
1762 tree rhs1 = NULL_TREE, rhs2 = NULL_TREE;
|
|
1763
|
|
1764 switch (gimple_assign_rhs_code (gs))
|
|
1765 {
|
|
1766 case MULT_EXPR:
|
|
1767 case PLUS_EXPR:
|
|
1768 rhs1 = gimple_assign_rhs1 (gs);
|
|
1769 rhs2 = gimple_assign_rhs2 (gs);
|
|
1770 /* Should never happen, but currently some buggy situations
|
|
1771 in earlier phases put constants in rhs1. */
|
|
1772 if (TREE_CODE (rhs1) != SSA_NAME)
|
|
1773 continue;
|
|
1774 break;
|
|
1775
|
|
1776 /* Possible future opportunity: rhs1 of a ptr+ can be
|
|
1777 an ADDR_EXPR. */
|
|
1778 case POINTER_PLUS_EXPR:
|
|
1779 case MINUS_EXPR:
|
|
1780 rhs2 = gimple_assign_rhs2 (gs);
|
|
1781 gcc_fallthrough ();
|
|
1782
|
|
1783 CASE_CONVERT:
|
|
1784 case SSA_NAME:
|
|
1785 case NEGATE_EXPR:
|
|
1786 rhs1 = gimple_assign_rhs1 (gs);
|
|
1787 if (TREE_CODE (rhs1) != SSA_NAME)
|
|
1788 continue;
|
|
1789 break;
|
|
1790
|
|
1791 default:
|
|
1792 ;
|
|
1793 }
|
|
1794
|
|
1795 switch (gimple_assign_rhs_code (gs))
|
|
1796 {
|
|
1797 case MULT_EXPR:
|
|
1798 slsr_process_mul (gs, rhs1, rhs2, speed);
|
|
1799 break;
|
|
1800
|
|
1801 case PLUS_EXPR:
|
|
1802 case POINTER_PLUS_EXPR:
|
|
1803 case MINUS_EXPR:
|
|
1804 slsr_process_add (gs, rhs1, rhs2, speed);
|
|
1805 break;
|
|
1806
|
|
1807 case NEGATE_EXPR:
|
|
1808 slsr_process_neg (gs, rhs1, speed);
|
|
1809 break;
|
|
1810
|
|
1811 CASE_CONVERT:
|
|
1812 slsr_process_cast (gs, rhs1, speed);
|
|
1813 break;
|
|
1814
|
|
1815 case SSA_NAME:
|
|
1816 slsr_process_copy (gs, rhs1, speed);
|
|
1817 break;
|
|
1818
|
|
1819 default:
|
|
1820 ;
|
|
1821 }
|
|
1822 }
|
|
1823 }
|
|
1824 return NULL;
|
|
1825 }
|
|
1826
|
|
1827 /* Dump a candidate for debug. */
|
|
1828
|
|
1829 static void
|
|
1830 dump_candidate (slsr_cand_t c)
|
|
1831 {
|
|
1832 fprintf (dump_file, "%3d [%d] ", c->cand_num,
|
|
1833 gimple_bb (c->cand_stmt)->index);
|
|
1834 print_gimple_stmt (dump_file, c->cand_stmt, 0);
|
|
1835 switch (c->kind)
|
|
1836 {
|
|
1837 case CAND_MULT:
|
|
1838 fputs (" MULT : (", dump_file);
|
|
1839 print_generic_expr (dump_file, c->base_expr);
|
|
1840 fputs (" + ", dump_file);
|
|
1841 print_decs (c->index, dump_file);
|
|
1842 fputs (") * ", dump_file);
|
|
1843 if (TREE_CODE (c->stride) != INTEGER_CST
|
|
1844 && c->stride_type != TREE_TYPE (c->stride))
|
|
1845 {
|
|
1846 fputs ("(", dump_file);
|
|
1847 print_generic_expr (dump_file, c->stride_type);
|
|
1848 fputs (")", dump_file);
|
|
1849 }
|
|
1850 print_generic_expr (dump_file, c->stride);
|
|
1851 fputs (" : ", dump_file);
|
|
1852 break;
|
|
1853 case CAND_ADD:
|
|
1854 fputs (" ADD : ", dump_file);
|
|
1855 print_generic_expr (dump_file, c->base_expr);
|
|
1856 fputs (" + (", dump_file);
|
|
1857 print_decs (c->index, dump_file);
|
|
1858 fputs (" * ", dump_file);
|
|
1859 if (TREE_CODE (c->stride) != INTEGER_CST
|
|
1860 && c->stride_type != TREE_TYPE (c->stride))
|
|
1861 {
|
|
1862 fputs ("(", dump_file);
|
|
1863 print_generic_expr (dump_file, c->stride_type);
|
|
1864 fputs (")", dump_file);
|
|
1865 }
|
|
1866 print_generic_expr (dump_file, c->stride);
|
|
1867 fputs (") : ", dump_file);
|
|
1868 break;
|
|
1869 case CAND_REF:
|
|
1870 fputs (" REF : ", dump_file);
|
|
1871 print_generic_expr (dump_file, c->base_expr);
|
|
1872 fputs (" + (", dump_file);
|
|
1873 print_generic_expr (dump_file, c->stride);
|
|
1874 fputs (") + ", dump_file);
|
|
1875 print_decs (c->index, dump_file);
|
|
1876 fputs (" : ", dump_file);
|
|
1877 break;
|
|
1878 case CAND_PHI:
|
|
1879 fputs (" PHI : ", dump_file);
|
|
1880 print_generic_expr (dump_file, c->base_expr);
|
|
1881 fputs (" + (unknown * ", dump_file);
|
|
1882 print_generic_expr (dump_file, c->stride);
|
|
1883 fputs (") : ", dump_file);
|
|
1884 break;
|
|
1885 default:
|
|
1886 gcc_unreachable ();
|
|
1887 }
|
|
1888 print_generic_expr (dump_file, c->cand_type);
|
|
1889 fprintf (dump_file, "\n basis: %d dependent: %d sibling: %d\n",
|
|
1890 c->basis, c->dependent, c->sibling);
|
131
|
1891 fprintf (dump_file,
|
|
1892 " next-interp: %d first-interp: %d dead-savings: %d\n",
|
|
1893 c->next_interp, c->first_interp, c->dead_savings);
|
111
|
1894 if (c->def_phi)
|
|
1895 fprintf (dump_file, " phi: %d\n", c->def_phi);
|
|
1896 fputs ("\n", dump_file);
|
|
1897 }
|
|
1898
|
|
1899 /* Dump the candidate vector for debug. */
|
|
1900
|
|
1901 static void
|
|
1902 dump_cand_vec (void)
|
|
1903 {
|
|
1904 unsigned i;
|
|
1905 slsr_cand_t c;
|
|
1906
|
|
1907 fprintf (dump_file, "\nStrength reduction candidate vector:\n\n");
|
|
1908
|
|
1909 FOR_EACH_VEC_ELT (cand_vec, i, c)
|
145
|
1910 if (c != NULL)
|
|
1911 dump_candidate (c);
|
111
|
1912 }
|
|
1913
|
|
1914 /* Callback used to dump the candidate chains hash table. */
|
|
1915
|
|
1916 int
|
|
1917 ssa_base_cand_dump_callback (cand_chain **slot, void *ignored ATTRIBUTE_UNUSED)
|
|
1918 {
|
|
1919 const_cand_chain_t chain = *slot;
|
|
1920 cand_chain_t p;
|
|
1921
|
|
1922 print_generic_expr (dump_file, chain->base_expr);
|
|
1923 fprintf (dump_file, " -> %d", chain->cand->cand_num);
|
|
1924
|
|
1925 for (p = chain->next; p; p = p->next)
|
|
1926 fprintf (dump_file, " -> %d", p->cand->cand_num);
|
|
1927
|
|
1928 fputs ("\n", dump_file);
|
|
1929 return 1;
|
|
1930 }
|
|
1931
|
|
1932 /* Dump the candidate chains. */
|
|
1933
|
|
1934 static void
|
|
1935 dump_cand_chains (void)
|
|
1936 {
|
|
1937 fprintf (dump_file, "\nStrength reduction candidate chains:\n\n");
|
|
1938 base_cand_map->traverse_noresize <void *, ssa_base_cand_dump_callback>
|
|
1939 (NULL);
|
|
1940 fputs ("\n", dump_file);
|
|
1941 }
|
|
1942
|
|
1943 /* Dump the increment vector for debug. */
|
|
1944
|
|
1945 static void
|
|
1946 dump_incr_vec (void)
|
|
1947 {
|
|
1948 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
1949 {
|
|
1950 unsigned i;
|
|
1951
|
|
1952 fprintf (dump_file, "\nIncrement vector:\n\n");
|
|
1953
|
|
1954 for (i = 0; i < incr_vec_len; i++)
|
|
1955 {
|
|
1956 fprintf (dump_file, "%3d increment: ", i);
|
|
1957 print_decs (incr_vec[i].incr, dump_file);
|
|
1958 fprintf (dump_file, "\n count: %d", incr_vec[i].count);
|
|
1959 fprintf (dump_file, "\n cost: %d", incr_vec[i].cost);
|
|
1960 fputs ("\n initializer: ", dump_file);
|
|
1961 print_generic_expr (dump_file, incr_vec[i].initializer);
|
|
1962 fputs ("\n\n", dump_file);
|
|
1963 }
|
|
1964 }
|
|
1965 }
|
|
1966
|
|
1967 /* Replace *EXPR in candidate C with an equivalent strength-reduced
|
|
1968 data reference. */
|
|
1969
|
|
1970 static void
|
|
1971 replace_ref (tree *expr, slsr_cand_t c)
|
|
1972 {
|
|
1973 tree add_expr, mem_ref, acc_type = TREE_TYPE (*expr);
|
|
1974 unsigned HOST_WIDE_INT misalign;
|
|
1975 unsigned align;
|
|
1976
|
|
1977 /* Ensure the memory reference carries the minimum alignment
|
|
1978 requirement for the data type. See PR58041. */
|
|
1979 get_object_alignment_1 (*expr, &align, &misalign);
|
|
1980 if (misalign != 0)
|
|
1981 align = least_bit_hwi (misalign);
|
|
1982 if (align < TYPE_ALIGN (acc_type))
|
|
1983 acc_type = build_aligned_type (acc_type, align);
|
|
1984
|
|
1985 add_expr = fold_build2 (POINTER_PLUS_EXPR, c->cand_type,
|
|
1986 c->base_expr, c->stride);
|
|
1987 mem_ref = fold_build2 (MEM_REF, acc_type, add_expr,
|
|
1988 wide_int_to_tree (c->cand_type, c->index));
|
|
1989
|
|
1990 /* Gimplify the base addressing expression for the new MEM_REF tree. */
|
|
1991 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
|
|
1992 TREE_OPERAND (mem_ref, 0)
|
|
1993 = force_gimple_operand_gsi (&gsi, TREE_OPERAND (mem_ref, 0),
|
|
1994 /*simple_p=*/true, NULL,
|
|
1995 /*before=*/true, GSI_SAME_STMT);
|
|
1996 copy_ref_info (mem_ref, *expr);
|
|
1997 *expr = mem_ref;
|
|
1998 update_stmt (c->cand_stmt);
|
|
1999 }
|
|
2000
|
145
|
2001 /* Return true if CAND_REF candidate C is a valid memory reference. */
|
|
2002
|
|
2003 static bool
|
|
2004 valid_mem_ref_cand_p (slsr_cand_t c)
|
|
2005 {
|
|
2006 if (TREE_CODE (TREE_OPERAND (c->stride, 1)) != INTEGER_CST)
|
|
2007 return false;
|
|
2008
|
|
2009 struct mem_address addr
|
|
2010 = { NULL_TREE, c->base_expr, TREE_OPERAND (c->stride, 0),
|
|
2011 TREE_OPERAND (c->stride, 1), wide_int_to_tree (sizetype, c->index) };
|
|
2012
|
|
2013 return
|
|
2014 valid_mem_ref_p (TYPE_MODE (c->cand_type), TYPE_ADDR_SPACE (c->cand_type),
|
|
2015 &addr);
|
|
2016 }
|
|
2017
|
111
|
2018 /* Replace CAND_REF candidate C, each sibling of candidate C, and each
|
|
2019 dependent of candidate C with an equivalent strength-reduced data
|
|
2020 reference. */
|
|
2021
|
|
2022 static void
|
|
2023 replace_refs (slsr_cand_t c)
|
|
2024 {
|
145
|
2025 /* Replacing a chain of only 2 candidates which are valid memory references
|
|
2026 is generally counter-productive because you cannot recoup the additional
|
|
2027 calculation added in front of them. */
|
|
2028 if (c->basis == 0
|
|
2029 && c->dependent
|
|
2030 && !lookup_cand (c->dependent)->dependent
|
|
2031 && valid_mem_ref_cand_p (c)
|
|
2032 && valid_mem_ref_cand_p (lookup_cand (c->dependent)))
|
|
2033 return;
|
|
2034
|
111
|
2035 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
2036 {
|
|
2037 fputs ("Replacing reference: ", dump_file);
|
|
2038 print_gimple_stmt (dump_file, c->cand_stmt, 0);
|
|
2039 }
|
|
2040
|
|
2041 if (gimple_vdef (c->cand_stmt))
|
|
2042 {
|
|
2043 tree *lhs = gimple_assign_lhs_ptr (c->cand_stmt);
|
|
2044 replace_ref (lhs, c);
|
|
2045 }
|
|
2046 else
|
|
2047 {
|
|
2048 tree *rhs = gimple_assign_rhs1_ptr (c->cand_stmt);
|
|
2049 replace_ref (rhs, c);
|
|
2050 }
|
|
2051
|
|
2052 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
2053 {
|
|
2054 fputs ("With: ", dump_file);
|
|
2055 print_gimple_stmt (dump_file, c->cand_stmt, 0);
|
|
2056 fputs ("\n", dump_file);
|
|
2057 }
|
|
2058
|
|
2059 if (c->sibling)
|
|
2060 replace_refs (lookup_cand (c->sibling));
|
|
2061
|
|
2062 if (c->dependent)
|
|
2063 replace_refs (lookup_cand (c->dependent));
|
|
2064 }
|
|
2065
|
|
2066 /* Return TRUE if candidate C is dependent upon a PHI. */
|
|
2067
|
|
2068 static bool
|
|
2069 phi_dependent_cand_p (slsr_cand_t c)
|
|
2070 {
|
|
2071 /* A candidate is not necessarily dependent upon a PHI just because
|
|
2072 it has a phi definition for its base name. It may have a basis
|
|
2073 that relies upon the same phi definition, in which case the PHI
|
|
2074 is irrelevant to this candidate. */
|
|
2075 return (c->def_phi
|
|
2076 && c->basis
|
|
2077 && lookup_cand (c->basis)->def_phi != c->def_phi);
|
|
2078 }
|
|
2079
|
|
2080 /* Calculate the increment required for candidate C relative to
|
|
2081 its basis. */
|
|
2082
|
|
2083 static widest_int
|
|
2084 cand_increment (slsr_cand_t c)
|
|
2085 {
|
|
2086 slsr_cand_t basis;
|
|
2087
|
|
2088 /* If the candidate doesn't have a basis, just return its own
|
|
2089 index. This is useful in record_increments to help us find
|
|
2090 an existing initializer. Also, if the candidate's basis is
|
|
2091 hidden by a phi, then its own index will be the increment
|
|
2092 from the newly introduced phi basis. */
|
|
2093 if (!c->basis || phi_dependent_cand_p (c))
|
|
2094 return c->index;
|
|
2095
|
|
2096 basis = lookup_cand (c->basis);
|
|
2097 gcc_assert (operand_equal_p (c->base_expr, basis->base_expr, 0));
|
|
2098 return c->index - basis->index;
|
|
2099 }
|
|
2100
|
|
2101 /* Calculate the increment required for candidate C relative to
|
|
2102 its basis. If we aren't going to generate pointer arithmetic
|
|
2103 for this candidate, return the absolute value of that increment
|
|
2104 instead. */
|
|
2105
|
|
2106 static inline widest_int
|
|
2107 cand_abs_increment (slsr_cand_t c)
|
|
2108 {
|
|
2109 widest_int increment = cand_increment (c);
|
|
2110
|
|
2111 if (!address_arithmetic_p && wi::neg_p (increment))
|
|
2112 increment = -increment;
|
|
2113
|
|
2114 return increment;
|
|
2115 }
|
|
2116
|
|
2117 /* Return TRUE iff candidate C has already been replaced under
|
|
2118 another interpretation. */
|
|
2119
|
|
2120 static inline bool
|
|
2121 cand_already_replaced (slsr_cand_t c)
|
|
2122 {
|
|
2123 return (gimple_bb (c->cand_stmt) == 0);
|
|
2124 }
|
|
2125
|
|
2126 /* Common logic used by replace_unconditional_candidate and
|
|
2127 replace_conditional_candidate. */
|
|
2128
|
|
2129 static void
|
|
2130 replace_mult_candidate (slsr_cand_t c, tree basis_name, widest_int bump)
|
|
2131 {
|
|
2132 tree target_type = TREE_TYPE (gimple_assign_lhs (c->cand_stmt));
|
|
2133 enum tree_code cand_code = gimple_assign_rhs_code (c->cand_stmt);
|
|
2134
|
|
2135 /* It is not useful to replace casts, copies, negates, or adds of
|
|
2136 an SSA name and a constant. */
|
|
2137 if (cand_code == SSA_NAME
|
|
2138 || CONVERT_EXPR_CODE_P (cand_code)
|
|
2139 || cand_code == PLUS_EXPR
|
|
2140 || cand_code == POINTER_PLUS_EXPR
|
|
2141 || cand_code == MINUS_EXPR
|
|
2142 || cand_code == NEGATE_EXPR)
|
|
2143 return;
|
|
2144
|
|
2145 enum tree_code code = PLUS_EXPR;
|
|
2146 tree bump_tree;
|
|
2147 gimple *stmt_to_print = NULL;
|
|
2148
|
|
2149 if (wi::neg_p (bump))
|
|
2150 {
|
|
2151 code = MINUS_EXPR;
|
|
2152 bump = -bump;
|
|
2153 }
|
|
2154
|
|
2155 /* It is possible that the resulting bump doesn't fit in target_type.
|
|
2156 Abandon the replacement in this case. This does not affect
|
|
2157 siblings or dependents of C. */
|
|
2158 if (bump != wi::ext (bump, TYPE_PRECISION (target_type),
|
|
2159 TYPE_SIGN (target_type)))
|
|
2160 return;
|
|
2161
|
|
2162 bump_tree = wide_int_to_tree (target_type, bump);
|
|
2163
|
|
2164 /* If the basis name and the candidate's LHS have incompatible types,
|
|
2165 introduce a cast. */
|
|
2166 if (!useless_type_conversion_p (target_type, TREE_TYPE (basis_name)))
|
|
2167 basis_name = introduce_cast_before_cand (c, target_type, basis_name);
|
|
2168
|
|
2169 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
2170 {
|
|
2171 fputs ("Replacing: ", dump_file);
|
|
2172 print_gimple_stmt (dump_file, c->cand_stmt, 0);
|
|
2173 }
|
|
2174
|
|
2175 if (bump == 0)
|
|
2176 {
|
|
2177 tree lhs = gimple_assign_lhs (c->cand_stmt);
|
|
2178 gassign *copy_stmt = gimple_build_assign (lhs, basis_name);
|
|
2179 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
|
131
|
2180 slsr_cand_t cc = lookup_cand (c->first_interp);
|
111
|
2181 gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
|
|
2182 gsi_replace (&gsi, copy_stmt, false);
|
131
|
2183 while (cc)
|
111
|
2184 {
|
|
2185 cc->cand_stmt = copy_stmt;
|
145
|
2186 cc = lookup_cand (cc->next_interp);
|
111
|
2187 }
|
|
2188 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
2189 stmt_to_print = copy_stmt;
|
|
2190 }
|
|
2191 else
|
|
2192 {
|
|
2193 tree rhs1, rhs2;
|
|
2194 if (cand_code != NEGATE_EXPR) {
|
|
2195 rhs1 = gimple_assign_rhs1 (c->cand_stmt);
|
|
2196 rhs2 = gimple_assign_rhs2 (c->cand_stmt);
|
|
2197 }
|
|
2198 if (cand_code != NEGATE_EXPR
|
|
2199 && ((operand_equal_p (rhs1, basis_name, 0)
|
|
2200 && operand_equal_p (rhs2, bump_tree, 0))
|
|
2201 || (operand_equal_p (rhs1, bump_tree, 0)
|
|
2202 && operand_equal_p (rhs2, basis_name, 0))))
|
|
2203 {
|
|
2204 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
2205 {
|
|
2206 fputs ("(duplicate, not actually replacing)", dump_file);
|
|
2207 stmt_to_print = c->cand_stmt;
|
|
2208 }
|
|
2209 }
|
|
2210 else
|
|
2211 {
|
|
2212 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
|
131
|
2213 slsr_cand_t cc = lookup_cand (c->first_interp);
|
111
|
2214 gimple_assign_set_rhs_with_ops (&gsi, code, basis_name, bump_tree);
|
|
2215 update_stmt (gsi_stmt (gsi));
|
131
|
2216 while (cc)
|
111
|
2217 {
|
|
2218 cc->cand_stmt = gsi_stmt (gsi);
|
145
|
2219 cc = lookup_cand (cc->next_interp);
|
111
|
2220 }
|
|
2221 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
2222 stmt_to_print = gsi_stmt (gsi);
|
|
2223 }
|
|
2224 }
|
|
2225
|
|
2226 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
2227 {
|
|
2228 fputs ("With: ", dump_file);
|
|
2229 print_gimple_stmt (dump_file, stmt_to_print, 0);
|
|
2230 fputs ("\n", dump_file);
|
|
2231 }
|
|
2232 }
|
|
2233
|
|
2234 /* Replace candidate C with an add or subtract. Note that we only
|
|
2235 operate on CAND_MULTs with known strides, so we will never generate
|
|
2236 a POINTER_PLUS_EXPR. Each candidate X = (B + i) * S is replaced by
|
|
2237 X = Y + ((i - i') * S), as described in the module commentary. The
|
|
2238 folded value ((i - i') * S) is referred to here as the "bump." */
|
|
2239
|
|
2240 static void
|
|
2241 replace_unconditional_candidate (slsr_cand_t c)
|
|
2242 {
|
|
2243 slsr_cand_t basis;
|
|
2244
|
|
2245 if (cand_already_replaced (c))
|
|
2246 return;
|
|
2247
|
|
2248 basis = lookup_cand (c->basis);
|
|
2249 widest_int bump = cand_increment (c) * wi::to_widest (c->stride);
|
|
2250
|
|
2251 replace_mult_candidate (c, gimple_assign_lhs (basis->cand_stmt), bump);
|
|
2252 }
|
|
2253
|
|
2254 /* Return the index in the increment vector of the given INCREMENT,
|
|
2255 or -1 if not found. The latter can occur if more than
|
|
2256 MAX_INCR_VEC_LEN increments have been found. */
|
|
2257
|
|
2258 static inline int
|
|
2259 incr_vec_index (const widest_int &increment)
|
|
2260 {
|
|
2261 unsigned i;
|
|
2262
|
|
2263 for (i = 0; i < incr_vec_len && increment != incr_vec[i].incr; i++)
|
|
2264 ;
|
|
2265
|
|
2266 if (i < incr_vec_len)
|
|
2267 return i;
|
|
2268 else
|
|
2269 return -1;
|
|
2270 }
|
|
2271
|
|
2272 /* Create a new statement along edge E to add BASIS_NAME to the product
|
|
2273 of INCREMENT and the stride of candidate C. Create and return a new
|
|
2274 SSA name from *VAR to be used as the LHS of the new statement.
|
|
2275 KNOWN_STRIDE is true iff C's stride is a constant. */
|
|
2276
|
|
2277 static tree
|
|
2278 create_add_on_incoming_edge (slsr_cand_t c, tree basis_name,
|
|
2279 widest_int increment, edge e, location_t loc,
|
|
2280 bool known_stride)
|
|
2281 {
|
|
2282 tree lhs, basis_type;
|
|
2283 gassign *new_stmt, *cast_stmt = NULL;
|
|
2284
|
|
2285 /* If the add candidate along this incoming edge has the same
|
|
2286 index as C's hidden basis, the hidden basis represents this
|
|
2287 edge correctly. */
|
|
2288 if (increment == 0)
|
|
2289 return basis_name;
|
|
2290
|
|
2291 basis_type = TREE_TYPE (basis_name);
|
|
2292 lhs = make_temp_ssa_name (basis_type, NULL, "slsr");
|
|
2293
|
|
2294 /* Occasionally people convert integers to pointers without a
|
|
2295 cast, leading us into trouble if we aren't careful. */
|
|
2296 enum tree_code plus_code
|
|
2297 = POINTER_TYPE_P (basis_type) ? POINTER_PLUS_EXPR : PLUS_EXPR;
|
|
2298
|
|
2299 if (known_stride)
|
|
2300 {
|
|
2301 tree bump_tree;
|
|
2302 enum tree_code code = plus_code;
|
|
2303 widest_int bump = increment * wi::to_widest (c->stride);
|
|
2304 if (wi::neg_p (bump) && !POINTER_TYPE_P (basis_type))
|
|
2305 {
|
|
2306 code = MINUS_EXPR;
|
|
2307 bump = -bump;
|
|
2308 }
|
|
2309
|
|
2310 tree stride_type = POINTER_TYPE_P (basis_type) ? sizetype : basis_type;
|
|
2311 bump_tree = wide_int_to_tree (stride_type, bump);
|
|
2312 new_stmt = gimple_build_assign (lhs, code, basis_name, bump_tree);
|
|
2313 }
|
|
2314 else
|
|
2315 {
|
|
2316 int i;
|
|
2317 bool negate_incr = !POINTER_TYPE_P (basis_type) && wi::neg_p (increment);
|
|
2318 i = incr_vec_index (negate_incr ? -increment : increment);
|
|
2319 gcc_assert (i >= 0);
|
|
2320
|
|
2321 if (incr_vec[i].initializer)
|
|
2322 {
|
|
2323 enum tree_code code = negate_incr ? MINUS_EXPR : plus_code;
|
|
2324 new_stmt = gimple_build_assign (lhs, code, basis_name,
|
|
2325 incr_vec[i].initializer);
|
|
2326 }
|
|
2327 else {
|
|
2328 tree stride;
|
|
2329
|
|
2330 if (!types_compatible_p (TREE_TYPE (c->stride), c->stride_type))
|
|
2331 {
|
|
2332 tree cast_stride = make_temp_ssa_name (c->stride_type, NULL,
|
|
2333 "slsr");
|
|
2334 cast_stmt = gimple_build_assign (cast_stride, NOP_EXPR,
|
|
2335 c->stride);
|
|
2336 stride = cast_stride;
|
|
2337 }
|
|
2338 else
|
|
2339 stride = c->stride;
|
|
2340
|
|
2341 if (increment == 1)
|
|
2342 new_stmt = gimple_build_assign (lhs, plus_code, basis_name, stride);
|
|
2343 else if (increment == -1)
|
|
2344 new_stmt = gimple_build_assign (lhs, MINUS_EXPR, basis_name, stride);
|
|
2345 else
|
|
2346 gcc_unreachable ();
|
|
2347 }
|
|
2348 }
|
|
2349
|
|
2350 if (cast_stmt)
|
|
2351 {
|
|
2352 gimple_set_location (cast_stmt, loc);
|
|
2353 gsi_insert_on_edge (e, cast_stmt);
|
|
2354 }
|
|
2355
|
|
2356 gimple_set_location (new_stmt, loc);
|
|
2357 gsi_insert_on_edge (e, new_stmt);
|
|
2358
|
|
2359 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
2360 {
|
|
2361 if (cast_stmt)
|
|
2362 {
|
|
2363 fprintf (dump_file, "Inserting cast on edge %d->%d: ",
|
|
2364 e->src->index, e->dest->index);
|
|
2365 print_gimple_stmt (dump_file, cast_stmt, 0);
|
|
2366 }
|
|
2367 fprintf (dump_file, "Inserting on edge %d->%d: ", e->src->index,
|
|
2368 e->dest->index);
|
|
2369 print_gimple_stmt (dump_file, new_stmt, 0);
|
|
2370 }
|
|
2371
|
|
2372 return lhs;
|
|
2373 }
|
|
2374
|
|
2375 /* Clear the visited field for a tree of PHI candidates. */
|
|
2376
|
|
2377 static void
|
|
2378 clear_visited (gphi *phi)
|
|
2379 {
|
|
2380 unsigned i;
|
|
2381 slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
|
|
2382
|
|
2383 if (phi_cand->visited)
|
|
2384 {
|
|
2385 phi_cand->visited = 0;
|
|
2386
|
|
2387 for (i = 0; i < gimple_phi_num_args (phi); i++)
|
|
2388 {
|
|
2389 tree arg = gimple_phi_arg_def (phi, i);
|
|
2390 gimple *arg_def = SSA_NAME_DEF_STMT (arg);
|
|
2391 if (gimple_code (arg_def) == GIMPLE_PHI)
|
|
2392 clear_visited (as_a <gphi *> (arg_def));
|
|
2393 }
|
|
2394 }
|
|
2395 }
|
|
2396
|
|
2397 /* Recursive helper function for create_phi_basis. */
|
|
2398
|
|
2399 static tree
|
|
2400 create_phi_basis_1 (slsr_cand_t c, gimple *from_phi, tree basis_name,
|
|
2401 location_t loc, bool known_stride)
|
|
2402 {
|
|
2403 int i;
|
|
2404 tree name, phi_arg;
|
|
2405 gphi *phi;
|
|
2406 slsr_cand_t basis = lookup_cand (c->basis);
|
|
2407 int nargs = gimple_phi_num_args (from_phi);
|
|
2408 basic_block phi_bb = gimple_bb (from_phi);
|
|
2409 slsr_cand_t phi_cand = *stmt_cand_map->get (from_phi);
|
|
2410 auto_vec<tree> phi_args (nargs);
|
|
2411
|
|
2412 if (phi_cand->visited)
|
|
2413 return phi_cand->cached_basis;
|
|
2414 phi_cand->visited = 1;
|
|
2415
|
|
2416 /* Process each argument of the existing phi that represents
|
|
2417 conditionally-executed add candidates. */
|
|
2418 for (i = 0; i < nargs; i++)
|
|
2419 {
|
|
2420 edge e = (*phi_bb->preds)[i];
|
|
2421 tree arg = gimple_phi_arg_def (from_phi, i);
|
|
2422 tree feeding_def;
|
|
2423
|
|
2424 /* If the phi argument is the base name of the CAND_PHI, then
|
|
2425 this incoming arc should use the hidden basis. */
|
|
2426 if (operand_equal_p (arg, phi_cand->base_expr, 0))
|
|
2427 if (basis->index == 0)
|
|
2428 feeding_def = gimple_assign_lhs (basis->cand_stmt);
|
|
2429 else
|
|
2430 {
|
|
2431 widest_int incr = -basis->index;
|
|
2432 feeding_def = create_add_on_incoming_edge (c, basis_name, incr,
|
|
2433 e, loc, known_stride);
|
|
2434 }
|
|
2435 else
|
|
2436 {
|
|
2437 gimple *arg_def = SSA_NAME_DEF_STMT (arg);
|
|
2438
|
|
2439 /* If there is another phi along this incoming edge, we must
|
|
2440 process it in the same fashion to ensure that all basis
|
|
2441 adjustments are made along its incoming edges. */
|
|
2442 if (gimple_code (arg_def) == GIMPLE_PHI)
|
|
2443 feeding_def = create_phi_basis_1 (c, arg_def, basis_name,
|
|
2444 loc, known_stride);
|
|
2445 else
|
|
2446 {
|
|
2447 slsr_cand_t arg_cand = base_cand_from_table (arg);
|
|
2448 widest_int diff = arg_cand->index - basis->index;
|
|
2449 feeding_def = create_add_on_incoming_edge (c, basis_name, diff,
|
|
2450 e, loc, known_stride);
|
|
2451 }
|
|
2452 }
|
|
2453
|
|
2454 /* Because of recursion, we need to save the arguments in a vector
|
|
2455 so we can create the PHI statement all at once. Otherwise the
|
|
2456 storage for the half-created PHI can be reclaimed. */
|
|
2457 phi_args.safe_push (feeding_def);
|
|
2458 }
|
|
2459
|
|
2460 /* Create the new phi basis. */
|
|
2461 name = make_temp_ssa_name (TREE_TYPE (basis_name), NULL, "slsr");
|
|
2462 phi = create_phi_node (name, phi_bb);
|
|
2463 SSA_NAME_DEF_STMT (name) = phi;
|
|
2464
|
|
2465 FOR_EACH_VEC_ELT (phi_args, i, phi_arg)
|
|
2466 {
|
|
2467 edge e = (*phi_bb->preds)[i];
|
|
2468 add_phi_arg (phi, phi_arg, e, loc);
|
|
2469 }
|
|
2470
|
|
2471 update_stmt (phi);
|
|
2472
|
|
2473 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
2474 {
|
|
2475 fputs ("Introducing new phi basis: ", dump_file);
|
|
2476 print_gimple_stmt (dump_file, phi, 0);
|
|
2477 }
|
|
2478
|
|
2479 phi_cand->cached_basis = name;
|
|
2480 return name;
|
|
2481 }
|
|
2482
|
|
2483 /* Given a candidate C with BASIS_NAME being the LHS of C's basis which
|
|
2484 is hidden by the phi node FROM_PHI, create a new phi node in the same
|
|
2485 block as FROM_PHI. The new phi is suitable for use as a basis by C,
|
|
2486 with its phi arguments representing conditional adjustments to the
|
|
2487 hidden basis along conditional incoming paths. Those adjustments are
|
|
2488 made by creating add statements (and sometimes recursively creating
|
|
2489 phis) along those incoming paths. LOC is the location to attach to
|
|
2490 the introduced statements. KNOWN_STRIDE is true iff C's stride is a
|
|
2491 constant. */
|
|
2492
|
|
2493 static tree
|
|
2494 create_phi_basis (slsr_cand_t c, gimple *from_phi, tree basis_name,
|
|
2495 location_t loc, bool known_stride)
|
|
2496 {
|
|
2497 tree retval = create_phi_basis_1 (c, from_phi, basis_name, loc,
|
|
2498 known_stride);
|
|
2499 gcc_assert (retval);
|
|
2500 clear_visited (as_a <gphi *> (from_phi));
|
|
2501 return retval;
|
|
2502 }
|
|
2503
|
|
2504 /* Given a candidate C whose basis is hidden by at least one intervening
|
|
2505 phi, introduce a matching number of new phis to represent its basis
|
|
2506 adjusted by conditional increments along possible incoming paths. Then
|
|
2507 replace C as though it were an unconditional candidate, using the new
|
|
2508 basis. */
|
|
2509
|
|
2510 static void
|
|
2511 replace_conditional_candidate (slsr_cand_t c)
|
|
2512 {
|
|
2513 tree basis_name, name;
|
|
2514 slsr_cand_t basis;
|
|
2515 location_t loc;
|
|
2516
|
|
2517 /* Look up the LHS SSA name from C's basis. This will be the
|
|
2518 RHS1 of the adds we will introduce to create new phi arguments. */
|
|
2519 basis = lookup_cand (c->basis);
|
|
2520 basis_name = gimple_assign_lhs (basis->cand_stmt);
|
|
2521
|
|
2522 /* Create a new phi statement which will represent C's true basis
|
|
2523 after the transformation is complete. */
|
|
2524 loc = gimple_location (c->cand_stmt);
|
|
2525 name = create_phi_basis (c, lookup_cand (c->def_phi)->cand_stmt,
|
|
2526 basis_name, loc, KNOWN_STRIDE);
|
|
2527
|
|
2528 /* Replace C with an add of the new basis phi and a constant. */
|
|
2529 widest_int bump = c->index * wi::to_widest (c->stride);
|
|
2530
|
|
2531 replace_mult_candidate (c, name, bump);
|
|
2532 }
|
|
2533
|
|
2534 /* Recursive helper function for phi_add_costs. SPREAD is a measure of
|
|
2535 how many PHI nodes we have visited at this point in the tree walk. */
|
|
2536
|
|
2537 static int
|
|
2538 phi_add_costs_1 (gimple *phi, slsr_cand_t c, int one_add_cost, int *spread)
|
|
2539 {
|
|
2540 unsigned i;
|
|
2541 int cost = 0;
|
|
2542 slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
|
|
2543
|
|
2544 if (phi_cand->visited)
|
|
2545 return 0;
|
|
2546
|
|
2547 phi_cand->visited = 1;
|
|
2548 (*spread)++;
|
|
2549
|
|
2550 /* If we work our way back to a phi that isn't dominated by the hidden
|
|
2551 basis, this isn't a candidate for replacement. Indicate this by
|
|
2552 returning an unreasonably high cost. It's not easy to detect
|
|
2553 these situations when determining the basis, so we defer the
|
|
2554 decision until now. */
|
|
2555 basic_block phi_bb = gimple_bb (phi);
|
|
2556 slsr_cand_t basis = lookup_cand (c->basis);
|
|
2557 basic_block basis_bb = gimple_bb (basis->cand_stmt);
|
|
2558
|
|
2559 if (phi_bb == basis_bb || !dominated_by_p (CDI_DOMINATORS, phi_bb, basis_bb))
|
|
2560 return COST_INFINITE;
|
|
2561
|
|
2562 for (i = 0; i < gimple_phi_num_args (phi); i++)
|
|
2563 {
|
|
2564 tree arg = gimple_phi_arg_def (phi, i);
|
|
2565
|
|
2566 if (arg != phi_cand->base_expr)
|
|
2567 {
|
|
2568 gimple *arg_def = SSA_NAME_DEF_STMT (arg);
|
|
2569
|
|
2570 if (gimple_code (arg_def) == GIMPLE_PHI)
|
|
2571 {
|
|
2572 cost += phi_add_costs_1 (arg_def, c, one_add_cost, spread);
|
|
2573
|
|
2574 if (cost >= COST_INFINITE || *spread > MAX_SPREAD)
|
|
2575 return COST_INFINITE;
|
|
2576 }
|
|
2577 else
|
|
2578 {
|
|
2579 slsr_cand_t arg_cand = base_cand_from_table (arg);
|
|
2580
|
|
2581 if (arg_cand->index != c->index)
|
|
2582 cost += one_add_cost;
|
|
2583 }
|
|
2584 }
|
|
2585 }
|
|
2586
|
|
2587 return cost;
|
|
2588 }
|
|
2589
|
|
2590 /* Compute the expected costs of inserting basis adjustments for
|
|
2591 candidate C with phi-definition PHI. The cost of inserting
|
|
2592 one adjustment is given by ONE_ADD_COST. If PHI has arguments
|
|
2593 which are themselves phi results, recursively calculate costs
|
|
2594 for those phis as well. */
|
|
2595
|
|
2596 static int
|
|
2597 phi_add_costs (gimple *phi, slsr_cand_t c, int one_add_cost)
|
|
2598 {
|
|
2599 int spread = 0;
|
|
2600 int retval = phi_add_costs_1 (phi, c, one_add_cost, &spread);
|
|
2601 clear_visited (as_a <gphi *> (phi));
|
|
2602 return retval;
|
|
2603 }
|
|
2604 /* For candidate C, each sibling of candidate C, and each dependent of
|
|
2605 candidate C, determine whether the candidate is dependent upon a
|
|
2606 phi that hides its basis. If not, replace the candidate unconditionally.
|
|
2607 Otherwise, determine whether the cost of introducing compensation code
|
|
2608 for the candidate is offset by the gains from strength reduction. If
|
|
2609 so, replace the candidate and introduce the compensation code. */
|
|
2610
|
|
2611 static void
|
|
2612 replace_uncond_cands_and_profitable_phis (slsr_cand_t c)
|
|
2613 {
|
|
2614 if (phi_dependent_cand_p (c))
|
|
2615 {
|
|
2616 /* A multiply candidate with a stride of 1 is just an artifice
|
|
2617 of a copy or cast; there is no value in replacing it. */
|
|
2618 if (c->kind == CAND_MULT && wi::to_widest (c->stride) != 1)
|
|
2619 {
|
|
2620 /* A candidate dependent upon a phi will replace a multiply by
|
|
2621 a constant with an add, and will insert at most one add for
|
|
2622 each phi argument. Add these costs with the potential dead-code
|
|
2623 savings to determine profitability. */
|
|
2624 bool speed = optimize_bb_for_speed_p (gimple_bb (c->cand_stmt));
|
|
2625 int mult_savings = stmt_cost (c->cand_stmt, speed);
|
|
2626 gimple *phi = lookup_cand (c->def_phi)->cand_stmt;
|
|
2627 tree phi_result = gimple_phi_result (phi);
|
|
2628 int one_add_cost = add_cost (speed,
|
|
2629 TYPE_MODE (TREE_TYPE (phi_result)));
|
|
2630 int add_costs = one_add_cost + phi_add_costs (phi, c, one_add_cost);
|
|
2631 int cost = add_costs - mult_savings - c->dead_savings;
|
|
2632
|
|
2633 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
2634 {
|
|
2635 fprintf (dump_file, " Conditional candidate %d:\n", c->cand_num);
|
|
2636 fprintf (dump_file, " add_costs = %d\n", add_costs);
|
|
2637 fprintf (dump_file, " mult_savings = %d\n", mult_savings);
|
|
2638 fprintf (dump_file, " dead_savings = %d\n", c->dead_savings);
|
|
2639 fprintf (dump_file, " cost = %d\n", cost);
|
|
2640 if (cost <= COST_NEUTRAL)
|
|
2641 fputs (" Replacing...\n", dump_file);
|
|
2642 else
|
|
2643 fputs (" Not replaced.\n", dump_file);
|
|
2644 }
|
|
2645
|
|
2646 if (cost <= COST_NEUTRAL)
|
|
2647 replace_conditional_candidate (c);
|
|
2648 }
|
|
2649 }
|
|
2650 else
|
|
2651 replace_unconditional_candidate (c);
|
|
2652
|
|
2653 if (c->sibling)
|
|
2654 replace_uncond_cands_and_profitable_phis (lookup_cand (c->sibling));
|
|
2655
|
|
2656 if (c->dependent)
|
|
2657 replace_uncond_cands_and_profitable_phis (lookup_cand (c->dependent));
|
|
2658 }
|
|
2659
|
|
2660 /* Count the number of candidates in the tree rooted at C that have
|
|
2661 not already been replaced under other interpretations. */
|
|
2662
|
|
2663 static int
|
|
2664 count_candidates (slsr_cand_t c)
|
|
2665 {
|
|
2666 unsigned count = cand_already_replaced (c) ? 0 : 1;
|
|
2667
|
|
2668 if (c->sibling)
|
|
2669 count += count_candidates (lookup_cand (c->sibling));
|
|
2670
|
|
2671 if (c->dependent)
|
|
2672 count += count_candidates (lookup_cand (c->dependent));
|
|
2673
|
|
2674 return count;
|
|
2675 }
|
|
2676
|
|
2677 /* Increase the count of INCREMENT by one in the increment vector.
|
|
2678 INCREMENT is associated with candidate C. If INCREMENT is to be
|
|
2679 conditionally executed as part of a conditional candidate replacement,
|
|
2680 IS_PHI_ADJUST is true, otherwise false. If an initializer
|
|
2681 T_0 = stride * I is provided by a candidate that dominates all
|
|
2682 candidates with the same increment, also record T_0 for subsequent use. */
|
|
2683
|
|
2684 static void
|
|
2685 record_increment (slsr_cand_t c, widest_int increment, bool is_phi_adjust)
|
|
2686 {
|
|
2687 bool found = false;
|
|
2688 unsigned i;
|
|
2689
|
|
2690 /* Treat increments that differ only in sign as identical so as to
|
|
2691 share initializers, unless we are generating pointer arithmetic. */
|
|
2692 if (!address_arithmetic_p && wi::neg_p (increment))
|
|
2693 increment = -increment;
|
|
2694
|
|
2695 for (i = 0; i < incr_vec_len; i++)
|
|
2696 {
|
|
2697 if (incr_vec[i].incr == increment)
|
|
2698 {
|
|
2699 incr_vec[i].count++;
|
|
2700 found = true;
|
|
2701
|
|
2702 /* If we previously recorded an initializer that doesn't
|
|
2703 dominate this candidate, it's not going to be useful to
|
|
2704 us after all. */
|
|
2705 if (incr_vec[i].initializer
|
|
2706 && !dominated_by_p (CDI_DOMINATORS,
|
|
2707 gimple_bb (c->cand_stmt),
|
|
2708 incr_vec[i].init_bb))
|
|
2709 {
|
|
2710 incr_vec[i].initializer = NULL_TREE;
|
|
2711 incr_vec[i].init_bb = NULL;
|
|
2712 }
|
|
2713
|
|
2714 break;
|
|
2715 }
|
|
2716 }
|
|
2717
|
|
2718 if (!found && incr_vec_len < MAX_INCR_VEC_LEN - 1)
|
|
2719 {
|
|
2720 /* The first time we see an increment, create the entry for it.
|
|
2721 If this is the root candidate which doesn't have a basis, set
|
|
2722 the count to zero. We're only processing it so it can possibly
|
|
2723 provide an initializer for other candidates. */
|
|
2724 incr_vec[incr_vec_len].incr = increment;
|
|
2725 incr_vec[incr_vec_len].count = c->basis || is_phi_adjust ? 1 : 0;
|
|
2726 incr_vec[incr_vec_len].cost = COST_INFINITE;
|
|
2727
|
|
2728 /* Optimistically record the first occurrence of this increment
|
|
2729 as providing an initializer (if it does); we will revise this
|
|
2730 opinion later if it doesn't dominate all other occurrences.
|
|
2731 Exception: increments of 0, 1 never need initializers;
|
|
2732 and phi adjustments don't ever provide initializers. */
|
|
2733 if (c->kind == CAND_ADD
|
|
2734 && !is_phi_adjust
|
|
2735 && c->index == increment
|
|
2736 && (increment > 1 || increment < 0)
|
|
2737 && (gimple_assign_rhs_code (c->cand_stmt) == PLUS_EXPR
|
|
2738 || gimple_assign_rhs_code (c->cand_stmt) == POINTER_PLUS_EXPR))
|
|
2739 {
|
|
2740 tree t0 = NULL_TREE;
|
|
2741 tree rhs1 = gimple_assign_rhs1 (c->cand_stmt);
|
|
2742 tree rhs2 = gimple_assign_rhs2 (c->cand_stmt);
|
|
2743 if (operand_equal_p (rhs1, c->base_expr, 0))
|
|
2744 t0 = rhs2;
|
|
2745 else if (operand_equal_p (rhs2, c->base_expr, 0))
|
|
2746 t0 = rhs1;
|
|
2747 if (t0
|
|
2748 && SSA_NAME_DEF_STMT (t0)
|
|
2749 && gimple_bb (SSA_NAME_DEF_STMT (t0)))
|
|
2750 {
|
|
2751 incr_vec[incr_vec_len].initializer = t0;
|
|
2752 incr_vec[incr_vec_len++].init_bb
|
|
2753 = gimple_bb (SSA_NAME_DEF_STMT (t0));
|
|
2754 }
|
|
2755 else
|
|
2756 {
|
|
2757 incr_vec[incr_vec_len].initializer = NULL_TREE;
|
|
2758 incr_vec[incr_vec_len++].init_bb = NULL;
|
|
2759 }
|
|
2760 }
|
|
2761 else
|
|
2762 {
|
|
2763 incr_vec[incr_vec_len].initializer = NULL_TREE;
|
|
2764 incr_vec[incr_vec_len++].init_bb = NULL;
|
|
2765 }
|
|
2766 }
|
|
2767 }
|
|
2768
|
|
2769 /* Recursive helper function for record_phi_increments. */
|
|
2770
|
|
2771 static void
|
|
2772 record_phi_increments_1 (slsr_cand_t basis, gimple *phi)
|
|
2773 {
|
|
2774 unsigned i;
|
|
2775 slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
|
|
2776
|
|
2777 if (phi_cand->visited)
|
|
2778 return;
|
|
2779 phi_cand->visited = 1;
|
|
2780
|
|
2781 for (i = 0; i < gimple_phi_num_args (phi); i++)
|
|
2782 {
|
|
2783 tree arg = gimple_phi_arg_def (phi, i);
|
131
|
2784 gimple *arg_def = SSA_NAME_DEF_STMT (arg);
|
|
2785
|
|
2786 if (gimple_code (arg_def) == GIMPLE_PHI)
|
|
2787 record_phi_increments_1 (basis, arg_def);
|
|
2788 else
|
111
|
2789 {
|
131
|
2790 widest_int diff;
|
|
2791
|
|
2792 if (operand_equal_p (arg, phi_cand->base_expr, 0))
|
|
2793 {
|
|
2794 diff = -basis->index;
|
|
2795 record_increment (phi_cand, diff, PHI_ADJUST);
|
|
2796 }
|
111
|
2797 else
|
|
2798 {
|
|
2799 slsr_cand_t arg_cand = base_cand_from_table (arg);
|
131
|
2800 diff = arg_cand->index - basis->index;
|
111
|
2801 record_increment (arg_cand, diff, PHI_ADJUST);
|
|
2802 }
|
|
2803 }
|
|
2804 }
|
|
2805 }
|
|
2806
|
|
2807 /* Given phi statement PHI that hides a candidate from its BASIS, find
|
|
2808 the increments along each incoming arc (recursively handling additional
|
|
2809 phis that may be present) and record them. These increments are the
|
|
2810 difference in index between the index-adjusting statements and the
|
|
2811 index of the basis. */
|
|
2812
|
|
2813 static void
|
|
2814 record_phi_increments (slsr_cand_t basis, gimple *phi)
|
|
2815 {
|
|
2816 record_phi_increments_1 (basis, phi);
|
|
2817 clear_visited (as_a <gphi *> (phi));
|
|
2818 }
|
|
2819
|
|
2820 /* Determine how many times each unique increment occurs in the set
|
|
2821 of candidates rooted at C's parent, recording the data in the
|
|
2822 increment vector. For each unique increment I, if an initializer
|
|
2823 T_0 = stride * I is provided by a candidate that dominates all
|
|
2824 candidates with the same increment, also record T_0 for subsequent
|
|
2825 use. */
|
|
2826
|
|
2827 static void
|
|
2828 record_increments (slsr_cand_t c)
|
|
2829 {
|
|
2830 if (!cand_already_replaced (c))
|
|
2831 {
|
|
2832 if (!phi_dependent_cand_p (c))
|
|
2833 record_increment (c, cand_increment (c), NOT_PHI_ADJUST);
|
|
2834 else
|
|
2835 {
|
|
2836 /* A candidate with a basis hidden by a phi will have one
|
|
2837 increment for its relationship to the index represented by
|
|
2838 the phi, and potentially additional increments along each
|
|
2839 incoming edge. For the root of the dependency tree (which
|
|
2840 has no basis), process just the initial index in case it has
|
|
2841 an initializer that can be used by subsequent candidates. */
|
|
2842 record_increment (c, c->index, NOT_PHI_ADJUST);
|
|
2843
|
|
2844 if (c->basis)
|
|
2845 record_phi_increments (lookup_cand (c->basis),
|
|
2846 lookup_cand (c->def_phi)->cand_stmt);
|
|
2847 }
|
|
2848 }
|
|
2849
|
|
2850 if (c->sibling)
|
|
2851 record_increments (lookup_cand (c->sibling));
|
|
2852
|
|
2853 if (c->dependent)
|
|
2854 record_increments (lookup_cand (c->dependent));
|
|
2855 }
|
|
2856
|
|
2857 /* Recursive helper function for phi_incr_cost. */
|
|
2858
|
|
2859 static int
|
|
2860 phi_incr_cost_1 (slsr_cand_t c, const widest_int &incr, gimple *phi,
|
|
2861 int *savings)
|
|
2862 {
|
|
2863 unsigned i;
|
|
2864 int cost = 0;
|
|
2865 slsr_cand_t basis = lookup_cand (c->basis);
|
|
2866 slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
|
|
2867
|
|
2868 if (phi_cand->visited)
|
|
2869 return 0;
|
|
2870 phi_cand->visited = 1;
|
|
2871
|
|
2872 for (i = 0; i < gimple_phi_num_args (phi); i++)
|
|
2873 {
|
|
2874 tree arg = gimple_phi_arg_def (phi, i);
|
131
|
2875 gimple *arg_def = SSA_NAME_DEF_STMT (arg);
|
|
2876
|
|
2877 if (gimple_code (arg_def) == GIMPLE_PHI)
|
|
2878 {
|
|
2879 int feeding_savings = 0;
|
|
2880 tree feeding_var = gimple_phi_result (arg_def);
|
|
2881 cost += phi_incr_cost_1 (c, incr, arg_def, &feeding_savings);
|
|
2882 if (uses_consumed_by_stmt (feeding_var, phi))
|
|
2883 *savings += feeding_savings;
|
|
2884 }
|
|
2885 else
|
111
|
2886 {
|
131
|
2887 widest_int diff;
|
|
2888 slsr_cand_t arg_cand;
|
|
2889
|
|
2890 /* When the PHI argument is just a pass-through to the base
|
|
2891 expression of the hidden basis, the difference is zero minus
|
|
2892 the index of the basis. There is no potential savings by
|
|
2893 eliminating a statement in this case. */
|
|
2894 if (operand_equal_p (arg, phi_cand->base_expr, 0))
|
111
|
2895 {
|
131
|
2896 arg_cand = (slsr_cand_t)NULL;
|
|
2897 diff = -basis->index;
|
111
|
2898 }
|
|
2899 else
|
|
2900 {
|
131
|
2901 arg_cand = base_cand_from_table (arg);
|
|
2902 diff = arg_cand->index - basis->index;
|
|
2903 }
|
|
2904
|
|
2905 if (incr == diff)
|
|
2906 {
|
|
2907 tree basis_lhs = gimple_assign_lhs (basis->cand_stmt);
|
|
2908 cost += add_cost (true, TYPE_MODE (TREE_TYPE (basis_lhs)));
|
|
2909 if (arg_cand)
|
111
|
2910 {
|
|
2911 tree lhs = gimple_assign_lhs (arg_cand->cand_stmt);
|
|
2912 if (uses_consumed_by_stmt (lhs, phi))
|
|
2913 *savings += stmt_cost (arg_cand->cand_stmt, true);
|
|
2914 }
|
|
2915 }
|
|
2916 }
|
|
2917 }
|
|
2918
|
|
2919 return cost;
|
|
2920 }
|
|
2921
|
|
2922 /* Add up and return the costs of introducing add statements that
|
|
2923 require the increment INCR on behalf of candidate C and phi
|
|
2924 statement PHI. Accumulate into *SAVINGS the potential savings
|
|
2925 from removing existing statements that feed PHI and have no other
|
|
2926 uses. */
|
|
2927
|
|
2928 static int
|
|
2929 phi_incr_cost (slsr_cand_t c, const widest_int &incr, gimple *phi,
|
|
2930 int *savings)
|
|
2931 {
|
|
2932 int retval = phi_incr_cost_1 (c, incr, phi, savings);
|
|
2933 clear_visited (as_a <gphi *> (phi));
|
|
2934 return retval;
|
|
2935 }
|
|
2936
|
|
2937 /* Return the first candidate in the tree rooted at C that has not
|
|
2938 already been replaced, favoring siblings over dependents. */
|
|
2939
|
|
2940 static slsr_cand_t
|
|
2941 unreplaced_cand_in_tree (slsr_cand_t c)
|
|
2942 {
|
|
2943 if (!cand_already_replaced (c))
|
|
2944 return c;
|
|
2945
|
|
2946 if (c->sibling)
|
|
2947 {
|
|
2948 slsr_cand_t sib = unreplaced_cand_in_tree (lookup_cand (c->sibling));
|
|
2949 if (sib)
|
|
2950 return sib;
|
|
2951 }
|
|
2952
|
|
2953 if (c->dependent)
|
|
2954 {
|
|
2955 slsr_cand_t dep = unreplaced_cand_in_tree (lookup_cand (c->dependent));
|
|
2956 if (dep)
|
|
2957 return dep;
|
|
2958 }
|
|
2959
|
|
2960 return NULL;
|
|
2961 }
|
|
2962
|
|
2963 /* Return TRUE if the candidates in the tree rooted at C should be
|
|
2964 optimized for speed, else FALSE. We estimate this based on the block
|
|
2965 containing the most dominant candidate in the tree that has not yet
|
|
2966 been replaced. */
|
|
2967
|
|
2968 static bool
|
|
2969 optimize_cands_for_speed_p (slsr_cand_t c)
|
|
2970 {
|
|
2971 slsr_cand_t c2 = unreplaced_cand_in_tree (c);
|
|
2972 gcc_assert (c2);
|
|
2973 return optimize_bb_for_speed_p (gimple_bb (c2->cand_stmt));
|
|
2974 }
|
|
2975
|
|
2976 /* Add COST_IN to the lowest cost of any dependent path starting at
|
|
2977 candidate C or any of its siblings, counting only candidates along
|
|
2978 such paths with increment INCR. Assume that replacing a candidate
|
|
2979 reduces cost by REPL_SAVINGS. Also account for savings from any
|
|
2980 statements that would go dead. If COUNT_PHIS is true, include
|
|
2981 costs of introducing feeding statements for conditional candidates. */
|
|
2982
|
|
2983 static int
|
|
2984 lowest_cost_path (int cost_in, int repl_savings, slsr_cand_t c,
|
|
2985 const widest_int &incr, bool count_phis)
|
|
2986 {
|
|
2987 int local_cost, sib_cost, savings = 0;
|
|
2988 widest_int cand_incr = cand_abs_increment (c);
|
|
2989
|
|
2990 if (cand_already_replaced (c))
|
|
2991 local_cost = cost_in;
|
|
2992 else if (incr == cand_incr)
|
|
2993 local_cost = cost_in - repl_savings - c->dead_savings;
|
|
2994 else
|
|
2995 local_cost = cost_in - c->dead_savings;
|
|
2996
|
|
2997 if (count_phis
|
|
2998 && phi_dependent_cand_p (c)
|
|
2999 && !cand_already_replaced (c))
|
|
3000 {
|
|
3001 gimple *phi = lookup_cand (c->def_phi)->cand_stmt;
|
|
3002 local_cost += phi_incr_cost (c, incr, phi, &savings);
|
|
3003
|
|
3004 if (uses_consumed_by_stmt (gimple_phi_result (phi), c->cand_stmt))
|
|
3005 local_cost -= savings;
|
|
3006 }
|
|
3007
|
|
3008 if (c->dependent)
|
|
3009 local_cost = lowest_cost_path (local_cost, repl_savings,
|
|
3010 lookup_cand (c->dependent), incr,
|
|
3011 count_phis);
|
|
3012
|
|
3013 if (c->sibling)
|
|
3014 {
|
|
3015 sib_cost = lowest_cost_path (cost_in, repl_savings,
|
|
3016 lookup_cand (c->sibling), incr,
|
|
3017 count_phis);
|
|
3018 local_cost = MIN (local_cost, sib_cost);
|
|
3019 }
|
|
3020
|
|
3021 return local_cost;
|
|
3022 }
|
|
3023
|
|
3024 /* Compute the total savings that would accrue from all replacements
|
|
3025 in the candidate tree rooted at C, counting only candidates with
|
|
3026 increment INCR. Assume that replacing a candidate reduces cost
|
|
3027 by REPL_SAVINGS. Also account for savings from statements that
|
|
3028 would go dead. */
|
|
3029
|
|
3030 static int
|
|
3031 total_savings (int repl_savings, slsr_cand_t c, const widest_int &incr,
|
|
3032 bool count_phis)
|
|
3033 {
|
|
3034 int savings = 0;
|
|
3035 widest_int cand_incr = cand_abs_increment (c);
|
|
3036
|
|
3037 if (incr == cand_incr && !cand_already_replaced (c))
|
|
3038 savings += repl_savings + c->dead_savings;
|
|
3039
|
|
3040 if (count_phis
|
|
3041 && phi_dependent_cand_p (c)
|
|
3042 && !cand_already_replaced (c))
|
|
3043 {
|
|
3044 int phi_savings = 0;
|
|
3045 gimple *phi = lookup_cand (c->def_phi)->cand_stmt;
|
|
3046 savings -= phi_incr_cost (c, incr, phi, &phi_savings);
|
|
3047
|
|
3048 if (uses_consumed_by_stmt (gimple_phi_result (phi), c->cand_stmt))
|
|
3049 savings += phi_savings;
|
|
3050 }
|
|
3051
|
|
3052 if (c->dependent)
|
|
3053 savings += total_savings (repl_savings, lookup_cand (c->dependent), incr,
|
|
3054 count_phis);
|
|
3055
|
|
3056 if (c->sibling)
|
|
3057 savings += total_savings (repl_savings, lookup_cand (c->sibling), incr,
|
|
3058 count_phis);
|
|
3059
|
|
3060 return savings;
|
|
3061 }
|
|
3062
|
|
3063 /* Use target-specific costs to determine and record which increments
|
|
3064 in the current candidate tree are profitable to replace, assuming
|
|
3065 MODE and SPEED. FIRST_DEP is the first dependent of the root of
|
|
3066 the candidate tree.
|
|
3067
|
|
3068 One slight limitation here is that we don't account for the possible
|
|
3069 introduction of casts in some cases. See replace_one_candidate for
|
|
3070 the cases where these are introduced. This should probably be cleaned
|
|
3071 up sometime. */
|
|
3072
|
|
3073 static void
|
|
3074 analyze_increments (slsr_cand_t first_dep, machine_mode mode, bool speed)
|
|
3075 {
|
|
3076 unsigned i;
|
|
3077
|
|
3078 for (i = 0; i < incr_vec_len; i++)
|
|
3079 {
|
|
3080 HOST_WIDE_INT incr = incr_vec[i].incr.to_shwi ();
|
|
3081
|
|
3082 /* If somehow this increment is bigger than a HWI, we won't
|
|
3083 be optimizing candidates that use it. And if the increment
|
|
3084 has a count of zero, nothing will be done with it. */
|
|
3085 if (!wi::fits_shwi_p (incr_vec[i].incr) || !incr_vec[i].count)
|
|
3086 incr_vec[i].cost = COST_INFINITE;
|
|
3087
|
|
3088 /* Increments of 0, 1, and -1 are always profitable to replace,
|
|
3089 because they always replace a multiply or add with an add or
|
|
3090 copy, and may cause one or more existing instructions to go
|
|
3091 dead. Exception: -1 can't be assumed to be profitable for
|
|
3092 pointer addition. */
|
|
3093 else if (incr == 0
|
|
3094 || incr == 1
|
|
3095 || (incr == -1
|
|
3096 && !POINTER_TYPE_P (first_dep->cand_type)))
|
|
3097 incr_vec[i].cost = COST_NEUTRAL;
|
|
3098
|
|
3099 /* If we need to add an initializer, give up if a cast from the
|
|
3100 candidate's type to its stride's type can lose precision.
|
|
3101 Note that this already takes into account that the stride may
|
|
3102 have been cast to a wider type, in which case this test won't
|
|
3103 fire. Example:
|
|
3104
|
|
3105 short int _1;
|
|
3106 _2 = (int) _1;
|
|
3107 _3 = _2 * 10;
|
|
3108 _4 = x + _3; ADD: x + (10 * (int)_1) : int
|
|
3109 _5 = _2 * 15;
|
|
3110 _6 = x + _5; ADD: x + (15 * (int)_1) : int
|
|
3111
|
|
3112 Although the stride was a short int initially, the stride
|
|
3113 used in the analysis has been widened to an int, and such
|
|
3114 widening will be done in the initializer as well. */
|
|
3115 else if (!incr_vec[i].initializer
|
|
3116 && TREE_CODE (first_dep->stride) != INTEGER_CST
|
|
3117 && !legal_cast_p_1 (first_dep->stride_type,
|
|
3118 TREE_TYPE (gimple_assign_lhs
|
|
3119 (first_dep->cand_stmt))))
|
|
3120 incr_vec[i].cost = COST_INFINITE;
|
|
3121
|
|
3122 /* If we need to add an initializer, make sure we don't introduce
|
|
3123 a multiply by a pointer type, which can happen in certain cast
|
|
3124 scenarios. */
|
|
3125 else if (!incr_vec[i].initializer
|
|
3126 && TREE_CODE (first_dep->stride) != INTEGER_CST
|
|
3127 && POINTER_TYPE_P (first_dep->stride_type))
|
|
3128 incr_vec[i].cost = COST_INFINITE;
|
|
3129
|
|
3130 /* For any other increment, if this is a multiply candidate, we
|
|
3131 must introduce a temporary T and initialize it with
|
|
3132 T_0 = stride * increment. When optimizing for speed, walk the
|
|
3133 candidate tree to calculate the best cost reduction along any
|
|
3134 path; if it offsets the fixed cost of inserting the initializer,
|
|
3135 replacing the increment is profitable. When optimizing for
|
|
3136 size, instead calculate the total cost reduction from replacing
|
|
3137 all candidates with this increment. */
|
|
3138 else if (first_dep->kind == CAND_MULT)
|
|
3139 {
|
|
3140 int cost = mult_by_coeff_cost (incr, mode, speed);
|
131
|
3141 int repl_savings;
|
|
3142
|
|
3143 if (tree_fits_shwi_p (first_dep->stride))
|
|
3144 {
|
|
3145 HOST_WIDE_INT hwi_stride = tree_to_shwi (first_dep->stride);
|
|
3146 repl_savings = mult_by_coeff_cost (hwi_stride, mode, speed);
|
|
3147 }
|
|
3148 else
|
|
3149 repl_savings = mul_cost (speed, mode);
|
|
3150 repl_savings -= add_cost (speed, mode);
|
|
3151
|
111
|
3152 if (speed)
|
|
3153 cost = lowest_cost_path (cost, repl_savings, first_dep,
|
|
3154 incr_vec[i].incr, COUNT_PHIS);
|
|
3155 else
|
|
3156 cost -= total_savings (repl_savings, first_dep, incr_vec[i].incr,
|
|
3157 COUNT_PHIS);
|
|
3158
|
|
3159 incr_vec[i].cost = cost;
|
|
3160 }
|
|
3161
|
|
3162 /* If this is an add candidate, the initializer may already
|
|
3163 exist, so only calculate the cost of the initializer if it
|
|
3164 doesn't. We are replacing one add with another here, so the
|
|
3165 known replacement savings is zero. We will account for removal
|
|
3166 of dead instructions in lowest_cost_path or total_savings. */
|
|
3167 else
|
|
3168 {
|
|
3169 int cost = 0;
|
|
3170 if (!incr_vec[i].initializer)
|
|
3171 cost = mult_by_coeff_cost (incr, mode, speed);
|
|
3172
|
|
3173 if (speed)
|
|
3174 cost = lowest_cost_path (cost, 0, first_dep, incr_vec[i].incr,
|
|
3175 DONT_COUNT_PHIS);
|
|
3176 else
|
|
3177 cost -= total_savings (0, first_dep, incr_vec[i].incr,
|
|
3178 DONT_COUNT_PHIS);
|
|
3179
|
|
3180 incr_vec[i].cost = cost;
|
|
3181 }
|
|
3182 }
|
|
3183 }
|
|
3184
|
|
3185 /* Return the nearest common dominator of BB1 and BB2. If the blocks
|
|
3186 are identical, return the earlier of C1 and C2 in *WHERE. Otherwise,
|
|
3187 if the NCD matches BB1, return C1 in *WHERE; if the NCD matches BB2,
|
|
3188 return C2 in *WHERE; and if the NCD matches neither, return NULL in
|
|
3189 *WHERE. Note: It is possible for one of C1 and C2 to be NULL. */
|
|
3190
|
|
3191 static basic_block
|
|
3192 ncd_for_two_cands (basic_block bb1, basic_block bb2,
|
|
3193 slsr_cand_t c1, slsr_cand_t c2, slsr_cand_t *where)
|
|
3194 {
|
|
3195 basic_block ncd;
|
|
3196
|
|
3197 if (!bb1)
|
|
3198 {
|
|
3199 *where = c2;
|
|
3200 return bb2;
|
|
3201 }
|
|
3202
|
|
3203 if (!bb2)
|
|
3204 {
|
|
3205 *where = c1;
|
|
3206 return bb1;
|
|
3207 }
|
|
3208
|
|
3209 ncd = nearest_common_dominator (CDI_DOMINATORS, bb1, bb2);
|
|
3210
|
|
3211 /* If both candidates are in the same block, the earlier
|
|
3212 candidate wins. */
|
|
3213 if (bb1 == ncd && bb2 == ncd)
|
|
3214 {
|
|
3215 if (!c1 || (c2 && c2->cand_num < c1->cand_num))
|
|
3216 *where = c2;
|
|
3217 else
|
|
3218 *where = c1;
|
|
3219 }
|
|
3220
|
|
3221 /* Otherwise, if one of them produced a candidate in the
|
|
3222 dominator, that one wins. */
|
|
3223 else if (bb1 == ncd)
|
|
3224 *where = c1;
|
|
3225
|
|
3226 else if (bb2 == ncd)
|
|
3227 *where = c2;
|
|
3228
|
|
3229 /* If neither matches the dominator, neither wins. */
|
|
3230 else
|
|
3231 *where = NULL;
|
|
3232
|
|
3233 return ncd;
|
|
3234 }
|
|
3235
|
|
3236 /* Consider all candidates that feed PHI. Find the nearest common
|
|
3237 dominator of those candidates requiring the given increment INCR.
|
|
3238 Further find and return the nearest common dominator of this result
|
|
3239 with block NCD. If the returned block contains one or more of the
|
|
3240 candidates, return the earliest candidate in the block in *WHERE. */
|
|
3241
|
|
3242 static basic_block
|
|
3243 ncd_with_phi (slsr_cand_t c, const widest_int &incr, gphi *phi,
|
|
3244 basic_block ncd, slsr_cand_t *where)
|
|
3245 {
|
|
3246 unsigned i;
|
|
3247 slsr_cand_t basis = lookup_cand (c->basis);
|
|
3248 slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
|
|
3249
|
|
3250 for (i = 0; i < gimple_phi_num_args (phi); i++)
|
|
3251 {
|
|
3252 tree arg = gimple_phi_arg_def (phi, i);
|
131
|
3253 gimple *arg_def = SSA_NAME_DEF_STMT (arg);
|
|
3254
|
|
3255 if (gimple_code (arg_def) == GIMPLE_PHI)
|
|
3256 ncd = ncd_with_phi (c, incr, as_a <gphi *> (arg_def), ncd, where);
|
|
3257 else
|
111
|
3258 {
|
131
|
3259 widest_int diff;
|
|
3260
|
|
3261 if (operand_equal_p (arg, phi_cand->base_expr, 0))
|
|
3262 diff = -basis->index;
|
|
3263 else
|
111
|
3264 {
|
|
3265 slsr_cand_t arg_cand = base_cand_from_table (arg);
|
131
|
3266 diff = arg_cand->index - basis->index;
|
111
|
3267 }
|
131
|
3268
|
|
3269 basic_block pred = gimple_phi_arg_edge (phi, i)->src;
|
|
3270
|
|
3271 if ((incr == diff) || (!address_arithmetic_p && incr == -diff))
|
|
3272 ncd = ncd_for_two_cands (ncd, pred, *where, NULL, where);
|
111
|
3273 }
|
|
3274 }
|
|
3275
|
|
3276 return ncd;
|
|
3277 }
|
|
3278
|
|
3279 /* Consider the candidate C together with any candidates that feed
|
|
3280 C's phi dependence (if any). Find and return the nearest common
|
|
3281 dominator of those candidates requiring the given increment INCR.
|
|
3282 If the returned block contains one or more of the candidates,
|
|
3283 return the earliest candidate in the block in *WHERE. */
|
|
3284
|
|
3285 static basic_block
|
|
3286 ncd_of_cand_and_phis (slsr_cand_t c, const widest_int &incr, slsr_cand_t *where)
|
|
3287 {
|
|
3288 basic_block ncd = NULL;
|
|
3289
|
|
3290 if (cand_abs_increment (c) == incr)
|
|
3291 {
|
|
3292 ncd = gimple_bb (c->cand_stmt);
|
|
3293 *where = c;
|
|
3294 }
|
|
3295
|
|
3296 if (phi_dependent_cand_p (c))
|
|
3297 ncd = ncd_with_phi (c, incr,
|
|
3298 as_a <gphi *> (lookup_cand (c->def_phi)->cand_stmt),
|
|
3299 ncd, where);
|
|
3300
|
|
3301 return ncd;
|
|
3302 }
|
|
3303
|
|
3304 /* Consider all candidates in the tree rooted at C for which INCR
|
|
3305 represents the required increment of C relative to its basis.
|
|
3306 Find and return the basic block that most nearly dominates all
|
|
3307 such candidates. If the returned block contains one or more of
|
|
3308 the candidates, return the earliest candidate in the block in
|
|
3309 *WHERE. */
|
|
3310
|
|
3311 static basic_block
|
|
3312 nearest_common_dominator_for_cands (slsr_cand_t c, const widest_int &incr,
|
|
3313 slsr_cand_t *where)
|
|
3314 {
|
|
3315 basic_block sib_ncd = NULL, dep_ncd = NULL, this_ncd = NULL, ncd;
|
|
3316 slsr_cand_t sib_where = NULL, dep_where = NULL, this_where = NULL, new_where;
|
|
3317
|
|
3318 /* First find the NCD of all siblings and dependents. */
|
|
3319 if (c->sibling)
|
|
3320 sib_ncd = nearest_common_dominator_for_cands (lookup_cand (c->sibling),
|
|
3321 incr, &sib_where);
|
|
3322 if (c->dependent)
|
|
3323 dep_ncd = nearest_common_dominator_for_cands (lookup_cand (c->dependent),
|
|
3324 incr, &dep_where);
|
|
3325 if (!sib_ncd && !dep_ncd)
|
|
3326 {
|
|
3327 new_where = NULL;
|
|
3328 ncd = NULL;
|
|
3329 }
|
|
3330 else if (sib_ncd && !dep_ncd)
|
|
3331 {
|
|
3332 new_where = sib_where;
|
|
3333 ncd = sib_ncd;
|
|
3334 }
|
|
3335 else if (dep_ncd && !sib_ncd)
|
|
3336 {
|
|
3337 new_where = dep_where;
|
|
3338 ncd = dep_ncd;
|
|
3339 }
|
|
3340 else
|
|
3341 ncd = ncd_for_two_cands (sib_ncd, dep_ncd, sib_where,
|
|
3342 dep_where, &new_where);
|
|
3343
|
|
3344 /* If the candidate's increment doesn't match the one we're interested
|
|
3345 in (and nor do any increments for feeding defs of a phi-dependence),
|
|
3346 then the result depends only on siblings and dependents. */
|
|
3347 this_ncd = ncd_of_cand_and_phis (c, incr, &this_where);
|
|
3348
|
|
3349 if (!this_ncd || cand_already_replaced (c))
|
|
3350 {
|
|
3351 *where = new_where;
|
|
3352 return ncd;
|
|
3353 }
|
|
3354
|
|
3355 /* Otherwise, compare this candidate with the result from all siblings
|
|
3356 and dependents. */
|
|
3357 ncd = ncd_for_two_cands (ncd, this_ncd, new_where, this_where, where);
|
|
3358
|
|
3359 return ncd;
|
|
3360 }
|
|
3361
|
|
3362 /* Return TRUE if the increment indexed by INDEX is profitable to replace. */
|
|
3363
|
|
3364 static inline bool
|
|
3365 profitable_increment_p (unsigned index)
|
|
3366 {
|
|
3367 return (incr_vec[index].cost <= COST_NEUTRAL);
|
|
3368 }
|
|
3369
|
|
3370 /* For each profitable increment in the increment vector not equal to
|
|
3371 0 or 1 (or -1, for non-pointer arithmetic), find the nearest common
|
|
3372 dominator of all statements in the candidate chain rooted at C
|
|
3373 that require that increment, and insert an initializer
|
|
3374 T_0 = stride * increment at that location. Record T_0 with the
|
|
3375 increment record. */
|
|
3376
|
|
3377 static void
|
|
3378 insert_initializers (slsr_cand_t c)
|
|
3379 {
|
|
3380 unsigned i;
|
|
3381
|
|
3382 for (i = 0; i < incr_vec_len; i++)
|
|
3383 {
|
|
3384 basic_block bb;
|
|
3385 slsr_cand_t where = NULL;
|
|
3386 gassign *init_stmt;
|
|
3387 gassign *cast_stmt = NULL;
|
|
3388 tree new_name, incr_tree, init_stride;
|
|
3389 widest_int incr = incr_vec[i].incr;
|
|
3390
|
|
3391 if (!profitable_increment_p (i)
|
|
3392 || incr == 1
|
|
3393 || (incr == -1
|
|
3394 && (!POINTER_TYPE_P (lookup_cand (c->basis)->cand_type)))
|
|
3395 || incr == 0)
|
|
3396 continue;
|
|
3397
|
|
3398 /* We may have already identified an existing initializer that
|
|
3399 will suffice. */
|
|
3400 if (incr_vec[i].initializer)
|
|
3401 {
|
|
3402 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
3403 {
|
|
3404 fputs ("Using existing initializer: ", dump_file);
|
|
3405 print_gimple_stmt (dump_file,
|
|
3406 SSA_NAME_DEF_STMT (incr_vec[i].initializer),
|
131
|
3407 0, TDF_NONE);
|
111
|
3408 }
|
|
3409 continue;
|
|
3410 }
|
|
3411
|
|
3412 /* Find the block that most closely dominates all candidates
|
|
3413 with this increment. If there is at least one candidate in
|
|
3414 that block, the earliest one will be returned in WHERE. */
|
|
3415 bb = nearest_common_dominator_for_cands (c, incr, &where);
|
|
3416
|
|
3417 /* If the NCD is not dominated by the block containing the
|
|
3418 definition of the stride, we can't legally insert a
|
|
3419 single initializer. Mark the increment as unprofitable
|
|
3420 so we don't make any replacements. FIXME: Multiple
|
|
3421 initializers could be placed with more analysis. */
|
|
3422 gimple *stride_def = SSA_NAME_DEF_STMT (c->stride);
|
|
3423 basic_block stride_bb = gimple_bb (stride_def);
|
|
3424
|
|
3425 if (stride_bb && !dominated_by_p (CDI_DOMINATORS, bb, stride_bb))
|
|
3426 {
|
|
3427 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
3428 fprintf (dump_file,
|
|
3429 "Initializer #%d cannot be legally placed\n", i);
|
|
3430 incr_vec[i].cost = COST_INFINITE;
|
|
3431 continue;
|
|
3432 }
|
|
3433
|
|
3434 /* If the nominal stride has a different type than the recorded
|
|
3435 stride type, build a cast from the nominal stride to that type. */
|
|
3436 if (!types_compatible_p (TREE_TYPE (c->stride), c->stride_type))
|
|
3437 {
|
|
3438 init_stride = make_temp_ssa_name (c->stride_type, NULL, "slsr");
|
|
3439 cast_stmt = gimple_build_assign (init_stride, NOP_EXPR, c->stride);
|
|
3440 }
|
|
3441 else
|
|
3442 init_stride = c->stride;
|
|
3443
|
|
3444 /* Create a new SSA name to hold the initializer's value. */
|
|
3445 new_name = make_temp_ssa_name (c->stride_type, NULL, "slsr");
|
|
3446 incr_vec[i].initializer = new_name;
|
|
3447
|
|
3448 /* Create the initializer and insert it in the latest possible
|
|
3449 dominating position. */
|
|
3450 incr_tree = wide_int_to_tree (c->stride_type, incr);
|
|
3451 init_stmt = gimple_build_assign (new_name, MULT_EXPR,
|
|
3452 init_stride, incr_tree);
|
|
3453 if (where)
|
|
3454 {
|
|
3455 gimple_stmt_iterator gsi = gsi_for_stmt (where->cand_stmt);
|
|
3456 location_t loc = gimple_location (where->cand_stmt);
|
|
3457
|
|
3458 if (cast_stmt)
|
|
3459 {
|
|
3460 gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT);
|
|
3461 gimple_set_location (cast_stmt, loc);
|
|
3462 }
|
|
3463
|
|
3464 gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
|
|
3465 gimple_set_location (init_stmt, loc);
|
|
3466 }
|
|
3467 else
|
|
3468 {
|
|
3469 gimple_stmt_iterator gsi = gsi_last_bb (bb);
|
|
3470 gimple *basis_stmt = lookup_cand (c->basis)->cand_stmt;
|
|
3471 location_t loc = gimple_location (basis_stmt);
|
|
3472
|
|
3473 if (!gsi_end_p (gsi) && stmt_ends_bb_p (gsi_stmt (gsi)))
|
|
3474 {
|
|
3475 if (cast_stmt)
|
|
3476 {
|
|
3477 gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT);
|
|
3478 gimple_set_location (cast_stmt, loc);
|
|
3479 }
|
|
3480 gsi_insert_before (&gsi, init_stmt, GSI_SAME_STMT);
|
|
3481 }
|
|
3482 else
|
|
3483 {
|
|
3484 if (cast_stmt)
|
|
3485 {
|
|
3486 gsi_insert_after (&gsi, cast_stmt, GSI_NEW_STMT);
|
|
3487 gimple_set_location (cast_stmt, loc);
|
|
3488 }
|
131
|
3489 gsi_insert_after (&gsi, init_stmt, GSI_NEW_STMT);
|
111
|
3490 }
|
|
3491
|
|
3492 gimple_set_location (init_stmt, gimple_location (basis_stmt));
|
|
3493 }
|
|
3494
|
|
3495 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
3496 {
|
|
3497 if (cast_stmt)
|
|
3498 {
|
|
3499 fputs ("Inserting stride cast: ", dump_file);
|
|
3500 print_gimple_stmt (dump_file, cast_stmt, 0);
|
|
3501 }
|
|
3502 fputs ("Inserting initializer: ", dump_file);
|
|
3503 print_gimple_stmt (dump_file, init_stmt, 0);
|
|
3504 }
|
|
3505 }
|
|
3506 }
|
|
3507
|
|
3508 /* Recursive helper function for all_phi_incrs_profitable. */
|
|
3509
|
|
3510 static bool
|
|
3511 all_phi_incrs_profitable_1 (slsr_cand_t c, gphi *phi, int *spread)
|
|
3512 {
|
|
3513 unsigned i;
|
|
3514 slsr_cand_t basis = lookup_cand (c->basis);
|
|
3515 slsr_cand_t phi_cand = *stmt_cand_map->get (phi);
|
|
3516
|
|
3517 if (phi_cand->visited)
|
|
3518 return true;
|
|
3519
|
|
3520 phi_cand->visited = 1;
|
|
3521 (*spread)++;
|
|
3522
|
|
3523 /* If the basis doesn't dominate the PHI (including when the PHI is
|
|
3524 in the same block as the basis), we won't be able to create a PHI
|
|
3525 using the basis here. */
|
|
3526 basic_block basis_bb = gimple_bb (basis->cand_stmt);
|
|
3527 basic_block phi_bb = gimple_bb (phi);
|
|
3528
|
|
3529 if (phi_bb == basis_bb
|
|
3530 || !dominated_by_p (CDI_DOMINATORS, phi_bb, basis_bb))
|
|
3531 return false;
|
|
3532
|
|
3533 for (i = 0; i < gimple_phi_num_args (phi); i++)
|
|
3534 {
|
|
3535 /* If the PHI arg resides in a block not dominated by the basis,
|
|
3536 we won't be able to create a PHI using the basis here. */
|
|
3537 basic_block pred_bb = gimple_phi_arg_edge (phi, i)->src;
|
|
3538
|
|
3539 if (!dominated_by_p (CDI_DOMINATORS, pred_bb, basis_bb))
|
|
3540 return false;
|
|
3541
|
|
3542 tree arg = gimple_phi_arg_def (phi, i);
|
131
|
3543 gimple *arg_def = SSA_NAME_DEF_STMT (arg);
|
|
3544
|
|
3545 if (gimple_code (arg_def) == GIMPLE_PHI)
|
111
|
3546 {
|
131
|
3547 if (!all_phi_incrs_profitable_1 (c, as_a <gphi *> (arg_def), spread)
|
|
3548 || *spread > MAX_SPREAD)
|
|
3549 return false;
|
|
3550 }
|
|
3551 else
|
|
3552 {
|
|
3553 int j;
|
|
3554 widest_int increment;
|
|
3555
|
|
3556 if (operand_equal_p (arg, phi_cand->base_expr, 0))
|
|
3557 increment = -basis->index;
|
111
|
3558 else
|
|
3559 {
|
|
3560 slsr_cand_t arg_cand = base_cand_from_table (arg);
|
131
|
3561 increment = arg_cand->index - basis->index;
|
111
|
3562 }
|
131
|
3563
|
|
3564 if (!address_arithmetic_p && wi::neg_p (increment))
|
|
3565 increment = -increment;
|
|
3566
|
|
3567 j = incr_vec_index (increment);
|
|
3568
|
|
3569 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
3570 {
|
|
3571 fprintf (dump_file, " Conditional candidate %d, phi: ",
|
|
3572 c->cand_num);
|
|
3573 print_gimple_stmt (dump_file, phi, 0);
|
|
3574 fputs (" increment: ", dump_file);
|
|
3575 print_decs (increment, dump_file);
|
|
3576 if (j < 0)
|
|
3577 fprintf (dump_file,
|
|
3578 "\n Not replaced; incr_vec overflow.\n");
|
|
3579 else {
|
|
3580 fprintf (dump_file, "\n cost: %d\n", incr_vec[j].cost);
|
|
3581 if (profitable_increment_p (j))
|
|
3582 fputs (" Replacing...\n", dump_file);
|
|
3583 else
|
|
3584 fputs (" Not replaced.\n", dump_file);
|
|
3585 }
|
|
3586 }
|
|
3587
|
|
3588 if (j < 0 || !profitable_increment_p (j))
|
|
3589 return false;
|
111
|
3590 }
|
|
3591 }
|
|
3592
|
|
3593 return true;
|
|
3594 }
|
|
3595
|
|
3596 /* Return TRUE iff all required increments for candidates feeding PHI
|
|
3597 are profitable (and legal!) to replace on behalf of candidate C. */
|
|
3598
|
|
3599 static bool
|
|
3600 all_phi_incrs_profitable (slsr_cand_t c, gphi *phi)
|
|
3601 {
|
|
3602 int spread = 0;
|
|
3603 bool retval = all_phi_incrs_profitable_1 (c, phi, &spread);
|
|
3604 clear_visited (phi);
|
|
3605 return retval;
|
|
3606 }
|
|
3607
|
|
3608 /* Create a NOP_EXPR that copies FROM_EXPR into a new SSA name of
|
|
3609 type TO_TYPE, and insert it in front of the statement represented
|
|
3610 by candidate C. Use *NEW_VAR to create the new SSA name. Return
|
|
3611 the new SSA name. */
|
|
3612
|
|
3613 static tree
|
|
3614 introduce_cast_before_cand (slsr_cand_t c, tree to_type, tree from_expr)
|
|
3615 {
|
|
3616 tree cast_lhs;
|
|
3617 gassign *cast_stmt;
|
|
3618 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
|
|
3619
|
|
3620 cast_lhs = make_temp_ssa_name (to_type, NULL, "slsr");
|
|
3621 cast_stmt = gimple_build_assign (cast_lhs, NOP_EXPR, from_expr);
|
|
3622 gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
|
|
3623 gsi_insert_before (&gsi, cast_stmt, GSI_SAME_STMT);
|
|
3624
|
|
3625 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
3626 {
|
|
3627 fputs (" Inserting: ", dump_file);
|
|
3628 print_gimple_stmt (dump_file, cast_stmt, 0);
|
|
3629 }
|
|
3630
|
|
3631 return cast_lhs;
|
|
3632 }
|
|
3633
|
|
3634 /* Replace the RHS of the statement represented by candidate C with
|
|
3635 NEW_CODE, NEW_RHS1, and NEW_RHS2, provided that to do so doesn't
|
|
3636 leave C unchanged or just interchange its operands. The original
|
|
3637 operation and operands are in OLD_CODE, OLD_RHS1, and OLD_RHS2.
|
|
3638 If the replacement was made and we are doing a details dump,
|
|
3639 return the revised statement, else NULL. */
|
|
3640
|
|
3641 static gimple *
|
|
3642 replace_rhs_if_not_dup (enum tree_code new_code, tree new_rhs1, tree new_rhs2,
|
|
3643 enum tree_code old_code, tree old_rhs1, tree old_rhs2,
|
|
3644 slsr_cand_t c)
|
|
3645 {
|
|
3646 if (new_code != old_code
|
|
3647 || ((!operand_equal_p (new_rhs1, old_rhs1, 0)
|
|
3648 || !operand_equal_p (new_rhs2, old_rhs2, 0))
|
|
3649 && (!operand_equal_p (new_rhs1, old_rhs2, 0)
|
|
3650 || !operand_equal_p (new_rhs2, old_rhs1, 0))))
|
|
3651 {
|
|
3652 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
|
131
|
3653 slsr_cand_t cc = lookup_cand (c->first_interp);
|
111
|
3654 gimple_assign_set_rhs_with_ops (&gsi, new_code, new_rhs1, new_rhs2);
|
|
3655 update_stmt (gsi_stmt (gsi));
|
131
|
3656 while (cc)
|
111
|
3657 {
|
|
3658 cc->cand_stmt = gsi_stmt (gsi);
|
145
|
3659 cc = lookup_cand (cc->next_interp);
|
111
|
3660 }
|
|
3661
|
|
3662 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
3663 return gsi_stmt (gsi);
|
|
3664 }
|
|
3665
|
|
3666 else if (dump_file && (dump_flags & TDF_DETAILS))
|
|
3667 fputs (" (duplicate, not actually replacing)\n", dump_file);
|
|
3668
|
|
3669 return NULL;
|
|
3670 }
|
|
3671
|
|
3672 /* Strength-reduce the statement represented by candidate C by replacing
|
|
3673 it with an equivalent addition or subtraction. I is the index into
|
|
3674 the increment vector identifying C's increment. NEW_VAR is used to
|
|
3675 create a new SSA name if a cast needs to be introduced. BASIS_NAME
|
|
3676 is the rhs1 to use in creating the add/subtract. */
|
|
3677
|
|
3678 static void
|
|
3679 replace_one_candidate (slsr_cand_t c, unsigned i, tree basis_name)
|
|
3680 {
|
|
3681 gimple *stmt_to_print = NULL;
|
|
3682 tree orig_rhs1, orig_rhs2;
|
|
3683 tree rhs2;
|
|
3684 enum tree_code orig_code, repl_code;
|
|
3685 widest_int cand_incr;
|
|
3686
|
|
3687 orig_code = gimple_assign_rhs_code (c->cand_stmt);
|
|
3688 orig_rhs1 = gimple_assign_rhs1 (c->cand_stmt);
|
|
3689 orig_rhs2 = gimple_assign_rhs2 (c->cand_stmt);
|
|
3690 cand_incr = cand_increment (c);
|
|
3691
|
131
|
3692 /* If orig_rhs2 is NULL, we have already replaced this in situ with
|
|
3693 a copy statement under another interpretation. */
|
|
3694 if (!orig_rhs2)
|
|
3695 return;
|
|
3696
|
111
|
3697 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
3698 {
|
|
3699 fputs ("Replacing: ", dump_file);
|
|
3700 print_gimple_stmt (dump_file, c->cand_stmt, 0);
|
|
3701 stmt_to_print = c->cand_stmt;
|
|
3702 }
|
|
3703
|
|
3704 if (address_arithmetic_p)
|
|
3705 repl_code = POINTER_PLUS_EXPR;
|
|
3706 else
|
|
3707 repl_code = PLUS_EXPR;
|
|
3708
|
|
3709 /* If the increment has an initializer T_0, replace the candidate
|
|
3710 statement with an add of the basis name and the initializer. */
|
|
3711 if (incr_vec[i].initializer)
|
|
3712 {
|
|
3713 tree init_type = TREE_TYPE (incr_vec[i].initializer);
|
|
3714 tree orig_type = TREE_TYPE (orig_rhs2);
|
|
3715
|
|
3716 if (types_compatible_p (orig_type, init_type))
|
|
3717 rhs2 = incr_vec[i].initializer;
|
|
3718 else
|
|
3719 rhs2 = introduce_cast_before_cand (c, orig_type,
|
|
3720 incr_vec[i].initializer);
|
|
3721
|
|
3722 if (incr_vec[i].incr != cand_incr)
|
|
3723 {
|
|
3724 gcc_assert (repl_code == PLUS_EXPR);
|
|
3725 repl_code = MINUS_EXPR;
|
|
3726 }
|
|
3727
|
|
3728 stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
|
|
3729 orig_code, orig_rhs1, orig_rhs2,
|
|
3730 c);
|
|
3731 }
|
|
3732
|
|
3733 /* Otherwise, the increment is one of -1, 0, and 1. Replace
|
|
3734 with a subtract of the stride from the basis name, a copy
|
|
3735 from the basis name, or an add of the stride to the basis
|
|
3736 name, respectively. It may be necessary to introduce a
|
|
3737 cast (or reuse an existing cast). */
|
|
3738 else if (cand_incr == 1)
|
|
3739 {
|
|
3740 tree stride_type = TREE_TYPE (c->stride);
|
|
3741 tree orig_type = TREE_TYPE (orig_rhs2);
|
|
3742
|
|
3743 if (types_compatible_p (orig_type, stride_type))
|
|
3744 rhs2 = c->stride;
|
|
3745 else
|
|
3746 rhs2 = introduce_cast_before_cand (c, orig_type, c->stride);
|
|
3747
|
|
3748 stmt_to_print = replace_rhs_if_not_dup (repl_code, basis_name, rhs2,
|
|
3749 orig_code, orig_rhs1, orig_rhs2,
|
|
3750 c);
|
|
3751 }
|
|
3752
|
|
3753 else if (cand_incr == -1)
|
|
3754 {
|
|
3755 tree stride_type = TREE_TYPE (c->stride);
|
|
3756 tree orig_type = TREE_TYPE (orig_rhs2);
|
|
3757 gcc_assert (repl_code != POINTER_PLUS_EXPR);
|
|
3758
|
|
3759 if (types_compatible_p (orig_type, stride_type))
|
|
3760 rhs2 = c->stride;
|
|
3761 else
|
|
3762 rhs2 = introduce_cast_before_cand (c, orig_type, c->stride);
|
|
3763
|
|
3764 if (orig_code != MINUS_EXPR
|
|
3765 || !operand_equal_p (basis_name, orig_rhs1, 0)
|
|
3766 || !operand_equal_p (rhs2, orig_rhs2, 0))
|
|
3767 {
|
|
3768 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
|
131
|
3769 slsr_cand_t cc = lookup_cand (c->first_interp);
|
111
|
3770 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, basis_name, rhs2);
|
|
3771 update_stmt (gsi_stmt (gsi));
|
131
|
3772 while (cc)
|
111
|
3773 {
|
|
3774 cc->cand_stmt = gsi_stmt (gsi);
|
145
|
3775 cc = lookup_cand (cc->next_interp);
|
111
|
3776 }
|
|
3777
|
|
3778 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
3779 stmt_to_print = gsi_stmt (gsi);
|
|
3780 }
|
|
3781 else if (dump_file && (dump_flags & TDF_DETAILS))
|
|
3782 fputs (" (duplicate, not actually replacing)\n", dump_file);
|
|
3783 }
|
|
3784
|
|
3785 else if (cand_incr == 0)
|
|
3786 {
|
|
3787 tree lhs = gimple_assign_lhs (c->cand_stmt);
|
|
3788 tree lhs_type = TREE_TYPE (lhs);
|
|
3789 tree basis_type = TREE_TYPE (basis_name);
|
|
3790
|
|
3791 if (types_compatible_p (lhs_type, basis_type))
|
|
3792 {
|
|
3793 gassign *copy_stmt = gimple_build_assign (lhs, basis_name);
|
|
3794 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
|
131
|
3795 slsr_cand_t cc = lookup_cand (c->first_interp);
|
111
|
3796 gimple_set_location (copy_stmt, gimple_location (c->cand_stmt));
|
|
3797 gsi_replace (&gsi, copy_stmt, false);
|
131
|
3798 while (cc)
|
111
|
3799 {
|
|
3800 cc->cand_stmt = copy_stmt;
|
145
|
3801 cc = lookup_cand (cc->next_interp);
|
111
|
3802 }
|
|
3803
|
|
3804 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
3805 stmt_to_print = copy_stmt;
|
|
3806 }
|
|
3807 else
|
|
3808 {
|
|
3809 gimple_stmt_iterator gsi = gsi_for_stmt (c->cand_stmt);
|
|
3810 gassign *cast_stmt = gimple_build_assign (lhs, NOP_EXPR, basis_name);
|
131
|
3811 slsr_cand_t cc = lookup_cand (c->first_interp);
|
111
|
3812 gimple_set_location (cast_stmt, gimple_location (c->cand_stmt));
|
|
3813 gsi_replace (&gsi, cast_stmt, false);
|
131
|
3814 while (cc)
|
111
|
3815 {
|
|
3816 cc->cand_stmt = cast_stmt;
|
145
|
3817 cc = lookup_cand (cc->next_interp);
|
111
|
3818 }
|
|
3819
|
|
3820 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
3821 stmt_to_print = cast_stmt;
|
|
3822 }
|
|
3823 }
|
|
3824 else
|
|
3825 gcc_unreachable ();
|
|
3826
|
|
3827 if (dump_file && (dump_flags & TDF_DETAILS) && stmt_to_print)
|
|
3828 {
|
|
3829 fputs ("With: ", dump_file);
|
|
3830 print_gimple_stmt (dump_file, stmt_to_print, 0);
|
|
3831 fputs ("\n", dump_file);
|
|
3832 }
|
|
3833 }
|
|
3834
|
|
3835 /* For each candidate in the tree rooted at C, replace it with
|
|
3836 an increment if such has been shown to be profitable. */
|
|
3837
|
|
3838 static void
|
|
3839 replace_profitable_candidates (slsr_cand_t c)
|
|
3840 {
|
|
3841 if (!cand_already_replaced (c))
|
|
3842 {
|
|
3843 widest_int increment = cand_abs_increment (c);
|
|
3844 enum tree_code orig_code = gimple_assign_rhs_code (c->cand_stmt);
|
|
3845 int i;
|
|
3846
|
|
3847 i = incr_vec_index (increment);
|
|
3848
|
|
3849 /* Only process profitable increments. Nothing useful can be done
|
|
3850 to a cast or copy. */
|
|
3851 if (i >= 0
|
|
3852 && profitable_increment_p (i)
|
|
3853 && orig_code != SSA_NAME
|
|
3854 && !CONVERT_EXPR_CODE_P (orig_code))
|
|
3855 {
|
|
3856 if (phi_dependent_cand_p (c))
|
|
3857 {
|
|
3858 gphi *phi = as_a <gphi *> (lookup_cand (c->def_phi)->cand_stmt);
|
|
3859
|
|
3860 if (all_phi_incrs_profitable (c, phi))
|
|
3861 {
|
|
3862 /* Look up the LHS SSA name from C's basis. This will be
|
|
3863 the RHS1 of the adds we will introduce to create new
|
|
3864 phi arguments. */
|
|
3865 slsr_cand_t basis = lookup_cand (c->basis);
|
|
3866 tree basis_name = gimple_assign_lhs (basis->cand_stmt);
|
|
3867
|
|
3868 /* Create a new phi statement that will represent C's true
|
|
3869 basis after the transformation is complete. */
|
|
3870 location_t loc = gimple_location (c->cand_stmt);
|
|
3871 tree name = create_phi_basis (c, phi, basis_name,
|
|
3872 loc, UNKNOWN_STRIDE);
|
|
3873
|
|
3874 /* Replace C with an add of the new basis phi and the
|
|
3875 increment. */
|
|
3876 replace_one_candidate (c, i, name);
|
|
3877 }
|
|
3878 }
|
|
3879 else
|
|
3880 {
|
|
3881 slsr_cand_t basis = lookup_cand (c->basis);
|
|
3882 tree basis_name = gimple_assign_lhs (basis->cand_stmt);
|
|
3883 replace_one_candidate (c, i, basis_name);
|
|
3884 }
|
|
3885 }
|
|
3886 }
|
|
3887
|
|
3888 if (c->sibling)
|
|
3889 replace_profitable_candidates (lookup_cand (c->sibling));
|
|
3890
|
|
3891 if (c->dependent)
|
|
3892 replace_profitable_candidates (lookup_cand (c->dependent));
|
|
3893 }
|
|
3894
|
|
3895 /* Analyze costs of related candidates in the candidate vector,
|
|
3896 and make beneficial replacements. */
|
|
3897
|
|
3898 static void
|
|
3899 analyze_candidates_and_replace (void)
|
|
3900 {
|
|
3901 unsigned i;
|
|
3902 slsr_cand_t c;
|
|
3903
|
|
3904 /* Each candidate that has a null basis and a non-null
|
|
3905 dependent is the root of a tree of related statements.
|
|
3906 Analyze each tree to determine a subset of those
|
145
|
3907 statements that can be replaced with maximum benefit.
|
|
3908
|
|
3909 Note the first NULL element is skipped. */
|
|
3910 FOR_EACH_VEC_ELT_FROM (cand_vec, i, c, 1)
|
111
|
3911 {
|
|
3912 slsr_cand_t first_dep;
|
|
3913
|
|
3914 if (c->basis != 0 || c->dependent == 0)
|
|
3915 continue;
|
|
3916
|
|
3917 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
3918 fprintf (dump_file, "\nProcessing dependency tree rooted at %d.\n",
|
|
3919 c->cand_num);
|
|
3920
|
|
3921 first_dep = lookup_cand (c->dependent);
|
|
3922
|
|
3923 /* If this is a chain of CAND_REFs, unconditionally replace
|
|
3924 each of them with a strength-reduced data reference. */
|
|
3925 if (c->kind == CAND_REF)
|
|
3926 replace_refs (c);
|
|
3927
|
|
3928 /* If the common stride of all related candidates is a known
|
|
3929 constant, each candidate without a phi-dependence can be
|
|
3930 profitably replaced. Each replaces a multiply by a single
|
|
3931 add, with the possibility that a feeding add also goes dead.
|
|
3932 A candidate with a phi-dependence is replaced only if the
|
|
3933 compensation code it requires is offset by the strength
|
|
3934 reduction savings. */
|
|
3935 else if (TREE_CODE (c->stride) == INTEGER_CST)
|
|
3936 replace_uncond_cands_and_profitable_phis (first_dep);
|
|
3937
|
|
3938 /* When the stride is an SSA name, it may still be profitable
|
|
3939 to replace some or all of the dependent candidates, depending
|
|
3940 on whether the introduced increments can be reused, or are
|
|
3941 less expensive to calculate than the replaced statements. */
|
|
3942 else
|
|
3943 {
|
|
3944 machine_mode mode;
|
|
3945 bool speed;
|
|
3946
|
|
3947 /* Determine whether we'll be generating pointer arithmetic
|
|
3948 when replacing candidates. */
|
|
3949 address_arithmetic_p = (c->kind == CAND_ADD
|
|
3950 && POINTER_TYPE_P (c->cand_type));
|
|
3951
|
|
3952 /* If all candidates have already been replaced under other
|
|
3953 interpretations, nothing remains to be done. */
|
|
3954 if (!count_candidates (c))
|
|
3955 continue;
|
|
3956
|
|
3957 /* Construct an array of increments for this candidate chain. */
|
|
3958 incr_vec = XNEWVEC (incr_info, MAX_INCR_VEC_LEN);
|
|
3959 incr_vec_len = 0;
|
|
3960 record_increments (c);
|
|
3961
|
|
3962 /* Determine which increments are profitable to replace. */
|
|
3963 mode = TYPE_MODE (TREE_TYPE (gimple_assign_lhs (c->cand_stmt)));
|
|
3964 speed = optimize_cands_for_speed_p (c);
|
|
3965 analyze_increments (first_dep, mode, speed);
|
|
3966
|
|
3967 /* Insert initializers of the form T_0 = stride * increment
|
|
3968 for use in profitable replacements. */
|
|
3969 insert_initializers (first_dep);
|
|
3970 dump_incr_vec ();
|
|
3971
|
|
3972 /* Perform the replacements. */
|
|
3973 replace_profitable_candidates (first_dep);
|
|
3974 free (incr_vec);
|
|
3975 }
|
|
3976 }
|
|
3977
|
|
3978 /* For conditional candidates, we may have uncommitted insertions
|
|
3979 on edges to clean up. */
|
|
3980 gsi_commit_edge_inserts ();
|
|
3981 }
|
|
3982
|
|
3983 namespace {
|
|
3984
|
|
3985 const pass_data pass_data_strength_reduction =
|
|
3986 {
|
|
3987 GIMPLE_PASS, /* type */
|
|
3988 "slsr", /* name */
|
|
3989 OPTGROUP_NONE, /* optinfo_flags */
|
|
3990 TV_GIMPLE_SLSR, /* tv_id */
|
|
3991 ( PROP_cfg | PROP_ssa ), /* properties_required */
|
|
3992 0, /* properties_provided */
|
|
3993 0, /* properties_destroyed */
|
|
3994 0, /* todo_flags_start */
|
|
3995 0, /* todo_flags_finish */
|
|
3996 };
|
|
3997
|
|
3998 class pass_strength_reduction : public gimple_opt_pass
|
|
3999 {
|
|
4000 public:
|
|
4001 pass_strength_reduction (gcc::context *ctxt)
|
|
4002 : gimple_opt_pass (pass_data_strength_reduction, ctxt)
|
|
4003 {}
|
|
4004
|
|
4005 /* opt_pass methods: */
|
|
4006 virtual bool gate (function *) { return flag_tree_slsr; }
|
|
4007 virtual unsigned int execute (function *);
|
|
4008
|
|
4009 }; // class pass_strength_reduction
|
|
4010
|
|
4011 unsigned
|
|
4012 pass_strength_reduction::execute (function *fun)
|
|
4013 {
|
|
4014 /* Create the obstack where candidates will reside. */
|
|
4015 gcc_obstack_init (&cand_obstack);
|
|
4016
|
145
|
4017 /* Allocate the candidate vector and initialize the first NULL element. */
|
111
|
4018 cand_vec.create (128);
|
145
|
4019 cand_vec.safe_push (NULL);
|
111
|
4020
|
|
4021 /* Allocate the mapping from statements to candidate indices. */
|
|
4022 stmt_cand_map = new hash_map<gimple *, slsr_cand_t>;
|
|
4023
|
|
4024 /* Create the obstack where candidate chains will reside. */
|
|
4025 gcc_obstack_init (&chain_obstack);
|
|
4026
|
|
4027 /* Allocate the mapping from base expressions to candidate chains. */
|
|
4028 base_cand_map = new hash_table<cand_chain_hasher> (500);
|
|
4029
|
|
4030 /* Allocate the mapping from bases to alternative bases. */
|
|
4031 alt_base_map = new hash_map<tree, tree>;
|
|
4032
|
|
4033 /* Initialize the loop optimizer. We need to detect flow across
|
|
4034 back edges, and this gives us dominator information as well. */
|
|
4035 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
|
|
4036
|
|
4037 /* Walk the CFG in predominator order looking for strength reduction
|
|
4038 candidates. */
|
|
4039 find_candidates_dom_walker (CDI_DOMINATORS)
|
|
4040 .walk (fun->cfg->x_entry_block_ptr);
|
|
4041
|
|
4042 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
4043 {
|
|
4044 dump_cand_vec ();
|
|
4045 dump_cand_chains ();
|
|
4046 }
|
|
4047
|
|
4048 delete alt_base_map;
|
|
4049 free_affine_expand_cache (&name_expansions);
|
|
4050
|
|
4051 /* Analyze costs and make appropriate replacements. */
|
|
4052 analyze_candidates_and_replace ();
|
|
4053
|
|
4054 loop_optimizer_finalize ();
|
|
4055 delete base_cand_map;
|
|
4056 base_cand_map = NULL;
|
|
4057 obstack_free (&chain_obstack, NULL);
|
|
4058 delete stmt_cand_map;
|
|
4059 cand_vec.release ();
|
|
4060 obstack_free (&cand_obstack, NULL);
|
|
4061
|
|
4062 return 0;
|
|
4063 }
|
|
4064
|
|
4065 } // anon namespace
|
|
4066
|
|
4067 gimple_opt_pass *
|
|
4068 make_pass_strength_reduction (gcc::context *ctxt)
|
|
4069 {
|
|
4070 return new pass_strength_reduction (ctxt);
|
|
4071 }
|