Mercurial > hg > CbC > CbC_gcc
comparison gcc/gimple-loop-versioning.cc @ 145:1830386684a0
gcc-9.2.0
author | anatofuz |
---|---|
date | Thu, 13 Feb 2020 11:34:05 +0900 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
131:84e7813d76e9 | 145:1830386684a0 |
---|---|
1 /* Loop versioning pass. | |
2 Copyright (C) 2018-2020 Free Software Foundation, Inc. | |
3 | |
4 This file is part of GCC. | |
5 | |
6 GCC is free software; you can redistribute it and/or modify it | |
7 under the terms of the GNU General Public License as published by the | |
8 Free Software Foundation; either version 3, or (at your option) any | |
9 later version. | |
10 | |
11 GCC is distributed in the hope that it will be useful, but WITHOUT | |
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
14 for more details. | |
15 | |
16 You should have received a copy of the GNU General Public License | |
17 along with GCC; see the file COPYING3. If not see | |
18 <http://www.gnu.org/licenses/>. */ | |
19 | |
20 #include "config.h" | |
21 #include "system.h" | |
22 #include "coretypes.h" | |
23 #include "backend.h" | |
24 #include "tree.h" | |
25 #include "gimple.h" | |
26 #include "gimple-iterator.h" | |
27 #include "tree-pass.h" | |
28 #include "gimplify-me.h" | |
29 #include "cfgloop.h" | |
30 #include "tree-ssa-loop.h" | |
31 #include "ssa.h" | |
32 #include "tree-scalar-evolution.h" | |
33 #include "tree-chrec.h" | |
34 #include "tree-ssa-loop-ivopts.h" | |
35 #include "fold-const.h" | |
36 #include "tree-ssa-propagate.h" | |
37 #include "tree-inline.h" | |
38 #include "domwalk.h" | |
39 #include "alloc-pool.h" | |
40 #include "vr-values.h" | |
41 #include "gimple-ssa-evrp-analyze.h" | |
42 #include "tree-vectorizer.h" | |
43 #include "omp-general.h" | |
44 #include "predict.h" | |
45 #include "tree-into-ssa.h" | |
46 | |
47 namespace { | |
48 | |
49 /* This pass looks for loops that could be simplified if certain loop | |
50 invariant conditions were true. It is effectively a form of loop | |
51 splitting in which the pass produces the split conditions itself, | |
52 instead of using ones that are already present in the IL. | |
53 | |
54 Versioning for when strides are 1 | |
55 --------------------------------- | |
56 | |
57 At the moment the only thing the pass looks for are memory references | |
58 like: | |
59 | |
60 for (auto i : ...) | |
61 ...x[i * stride]... | |
62 | |
63 It considers changing such loops to: | |
64 | |
65 if (stride == 1) | |
66 for (auto i : ...) [A] | |
67 ...x[i]... | |
68 else | |
69 for (auto i : ...) [B] | |
70 ...x[i * stride]... | |
71 | |
72 This can have several benefits: | |
73 | |
74 (1) [A] is often easier or cheaper to vectorize than [B]. | |
75 | |
76 (2) The scalar code in [A] is simpler than the scalar code in [B] | |
77 (if the loops cannot be vectorized or need an epilogue loop). | |
78 | |
79 (3) We might recognize [A] as a pattern, such as a memcpy or memset. | |
80 | |
81 (4) [A] has simpler address evolutions, which can help other passes | |
82 like loop interchange. | |
83 | |
84 The optimization is particularly useful for assumed-shape arrays in | |
85 Fortran, where the stride of the innermost dimension depends on the | |
86 array descriptor but is often equal to 1 in practice. For example: | |
87 | |
88 subroutine f1(x) | |
89 real :: x(:) | |
90 x(:) = 100 | |
91 end subroutine f1 | |
92 | |
93 generates the equivalent of: | |
94 | |
95 raw_stride = *x.dim[0].stride; | |
96 stride = raw_stride != 0 ? raw_stride : 1; | |
97 x_base = *x.data; | |
98 ... | |
99 tmp1 = stride * S; | |
100 tmp2 = tmp1 - stride; | |
101 *x_base[tmp2] = 1.0e+2; | |
102 | |
103 but in the common case that stride == 1, the last three statements | |
104 simplify to: | |
105 | |
106 tmp3 = S + -1; | |
107 *x_base[tmp3] = 1.0e+2; | |
108 | |
109 The optimization is in principle very simple. The difficult parts are: | |
110 | |
111 (a) deciding which parts of a general address calculation correspond | |
112 to the inner dimension of an array, since this usually isn't explicit | |
113 in the IL, and for C often isn't even explicit in the source code | |
114 | |
115 (b) estimating when the transformation is worthwhile | |
116 | |
117 Structure | |
118 --------- | |
119 | |
120 The pass has four phases: | |
121 | |
122 (1) Walk through the statements looking for and recording potential | |
123 versioning opportunities. Stop if there are none. | |
124 | |
125 (2) Use context-sensitive range information to see whether any versioning | |
126 conditions are impossible in practice. Remove them if so, and stop | |
127 if no opportunities remain. | |
128 | |
129 (We do this only after (1) to keep compile time down when no | |
130 versioning opportunities exist.) | |
131 | |
132 (3) Apply the cost model. Decide which versioning opportunities are | |
133 worthwhile and at which nesting level they should be applied. | |
134 | |
135 (4) Attempt to version all the loops selected by (3), so that: | |
136 | |
137 for (...) | |
138 ... | |
139 | |
140 becomes: | |
141 | |
142 if (!cond) | |
143 for (...) // Original loop | |
144 ... | |
145 else | |
146 for (...) // New loop | |
147 ... | |
148 | |
149 Use the version condition COND to simplify the new loop. */ | |
150 | |
151 /* Enumerates the likelihood that a particular value indexes the inner | |
152 dimension of an array. */ | |
153 enum inner_likelihood { | |
154 INNER_UNLIKELY, | |
155 INNER_DONT_KNOW, | |
156 INNER_LIKELY | |
157 }; | |
158 | |
159 /* Information about one term of an address_info. */ | |
160 struct address_term_info | |
161 { | |
162 /* The value of the term is EXPR * MULTIPLIER. */ | |
163 tree expr; | |
164 unsigned HOST_WIDE_INT multiplier; | |
165 | |
166 /* The stride applied by EXPR in each iteration of some unrecorded loop, | |
167 or null if no stride has been identified. */ | |
168 tree stride; | |
169 | |
170 /* Enumerates the likelihood that EXPR indexes the inner dimension | |
171 of an array. */ | |
172 enum inner_likelihood inner_likelihood; | |
173 | |
174 /* True if STRIDE == 1 is a versioning opportunity when considered | |
175 in isolation. */ | |
176 bool versioning_opportunity_p; | |
177 }; | |
178 | |
179 /* Information about an address calculation, and the range of constant | |
180 offsets applied to it. */ | |
181 class address_info | |
182 { | |
183 public: | |
184 static const unsigned int MAX_TERMS = 8; | |
185 | |
186 /* One statement that calculates the address. If multiple statements | |
187 share the same address, we only record the first. */ | |
188 gimple *stmt; | |
189 | |
190 /* The loop containing STMT (cached for convenience). If multiple | |
191 statements share the same address, they all belong to this loop. */ | |
192 class loop *loop; | |
193 | |
194 /* A decomposition of the calculation into a sum of terms plus an | |
195 optional base. When BASE is provided, it is never an SSA name. | |
196 Once initialization is complete, all members of TERMs are SSA names. */ | |
197 tree base; | |
198 auto_vec<address_term_info, MAX_TERMS> terms; | |
199 | |
200 /* All bytes accessed from the address fall in the offset range | |
201 [MIN_OFFSET, MAX_OFFSET). */ | |
202 HOST_WIDE_INT min_offset, max_offset; | |
203 }; | |
204 | |
205 /* Stores addresses based on their base and terms (ignoring the offsets). */ | |
206 struct address_info_hasher : nofree_ptr_hash <address_info> | |
207 { | |
208 static hashval_t hash (const address_info *); | |
209 static bool equal (const address_info *, const address_info *); | |
210 }; | |
211 | |
212 /* Information about the versioning we'd like to apply to a loop. */ | |
213 class loop_info | |
214 { | |
215 public: | |
216 bool worth_versioning_p () const; | |
217 | |
218 /* True if we've decided not to version this loop. The remaining | |
219 fields are meaningless if so. */ | |
220 bool rejected_p; | |
221 | |
222 /* True if at least one subloop of this loop benefits from versioning. */ | |
223 bool subloops_benefit_p; | |
224 | |
225 /* An estimate of the total number of instructions in the loop, | |
226 excluding those in subloops that benefit from versioning. */ | |
227 unsigned int num_insns; | |
228 | |
229 /* The outermost loop that can handle all the version checks | |
230 described below. */ | |
231 class loop *outermost; | |
232 | |
233 /* The first entry in the list of blocks that belong to this loop | |
234 (and not to subloops). m_next_block_in_loop provides the chain | |
235 pointers for the list. */ | |
236 basic_block block_list; | |
237 | |
238 /* We'd like to version the loop for the case in which these SSA names | |
239 (keyed off their SSA_NAME_VERSION) are all equal to 1 at runtime. */ | |
240 bitmap_head unity_names; | |
241 | |
242 /* If versioning succeeds, this points the version of the loop that | |
243 assumes the version conditions holds. */ | |
244 class loop *optimized_loop; | |
245 }; | |
246 | |
247 /* The main pass structure. */ | |
248 class loop_versioning | |
249 { | |
250 public: | |
251 loop_versioning (function *); | |
252 ~loop_versioning (); | |
253 unsigned int run (); | |
254 | |
255 private: | |
256 /* Used to walk the dominator tree to find loop versioning conditions | |
257 that are always false. */ | |
258 class lv_dom_walker : public dom_walker | |
259 { | |
260 public: | |
261 lv_dom_walker (loop_versioning &); | |
262 | |
263 edge before_dom_children (basic_block) FINAL OVERRIDE; | |
264 void after_dom_children (basic_block) FINAL OVERRIDE; | |
265 | |
266 private: | |
267 /* The parent pass. */ | |
268 loop_versioning &m_lv; | |
269 | |
270 /* Used to build context-dependent range information. */ | |
271 evrp_range_analyzer m_range_analyzer; | |
272 }; | |
273 | |
274 /* Used to simplify statements based on conditions that are established | |
275 by the version checks. */ | |
276 class name_prop : public substitute_and_fold_engine | |
277 { | |
278 public: | |
279 name_prop (loop_info &li) : m_li (li) {} | |
280 tree get_value (tree) FINAL OVERRIDE; | |
281 | |
282 private: | |
283 /* Information about the versioning we've performed on the loop. */ | |
284 loop_info &m_li; | |
285 }; | |
286 | |
287 loop_info &get_loop_info (class loop *loop) { return m_loops[loop->num]; } | |
288 | |
289 unsigned int max_insns_for_loop (class loop *); | |
290 bool expensive_stmt_p (gimple *); | |
291 | |
292 void version_for_unity (gimple *, tree); | |
293 bool acceptable_multiplier_p (tree, unsigned HOST_WIDE_INT, | |
294 unsigned HOST_WIDE_INT * = 0); | |
295 bool acceptable_type_p (tree, unsigned HOST_WIDE_INT *); | |
296 bool multiply_term_by (address_term_info &, tree); | |
297 inner_likelihood get_inner_likelihood (tree, unsigned HOST_WIDE_INT); | |
298 void dump_inner_likelihood (address_info &, address_term_info &); | |
299 void analyze_stride (address_info &, address_term_info &, | |
300 tree, class loop *); | |
301 bool find_per_loop_multiplication (address_info &, address_term_info &); | |
302 bool analyze_term_using_scevs (address_info &, address_term_info &); | |
303 void analyze_arbitrary_term (address_info &, address_term_info &); | |
304 void analyze_address_fragment (address_info &); | |
305 void record_address_fragment (gimple *, unsigned HOST_WIDE_INT, | |
306 tree, unsigned HOST_WIDE_INT, HOST_WIDE_INT); | |
307 void analyze_expr (gimple *, tree); | |
308 bool analyze_block (basic_block); | |
309 bool analyze_blocks (); | |
310 | |
311 void prune_loop_conditions (class loop *, vr_values *); | |
312 bool prune_conditions (); | |
313 | |
314 void merge_loop_info (class loop *, class loop *); | |
315 void add_loop_to_queue (class loop *); | |
316 bool decide_whether_loop_is_versionable (class loop *); | |
317 bool make_versioning_decisions (); | |
318 | |
319 bool version_loop (class loop *); | |
320 void implement_versioning_decisions (); | |
321 | |
322 /* The function we're optimizing. */ | |
323 function *m_fn; | |
324 | |
325 /* The obstack to use for all pass-specific bitmaps. */ | |
326 bitmap_obstack m_bitmap_obstack; | |
327 | |
328 /* An obstack to use for general allocation. */ | |
329 obstack m_obstack; | |
330 | |
331 /* The number of loops in the function. */ | |
332 unsigned int m_nloops; | |
333 | |
334 /* The total number of loop version conditions we've found. */ | |
335 unsigned int m_num_conditions; | |
336 | |
337 /* Assume that an address fragment of the form i * stride * scale | |
338 (for variable stride and constant scale) will not benefit from | |
339 versioning for stride == 1 when scale is greater than this value. */ | |
340 unsigned HOST_WIDE_INT m_maximum_scale; | |
341 | |
342 /* Information about each loop. */ | |
343 auto_vec<loop_info> m_loops; | |
344 | |
345 /* Used to form a linked list of blocks that belong to a loop, | |
346 started by loop_info::block_list. */ | |
347 auto_vec<basic_block> m_next_block_in_loop; | |
348 | |
349 /* The list of loops that we've decided to version. */ | |
350 auto_vec<class loop *> m_loops_to_version; | |
351 | |
352 /* A table of addresses in the current loop, keyed off their values | |
353 but not their offsets. */ | |
354 hash_table <address_info_hasher> m_address_table; | |
355 | |
356 /* A list of all addresses in M_ADDRESS_TABLE, in a predictable order. */ | |
357 auto_vec <address_info *, 32> m_address_list; | |
358 }; | |
359 | |
360 /* If EXPR is an SSA name and not a default definition, return the | |
361 defining statement, otherwise return null. */ | |
362 | |
363 static gimple * | |
364 maybe_get_stmt (tree expr) | |
365 { | |
366 if (TREE_CODE (expr) == SSA_NAME && !SSA_NAME_IS_DEFAULT_DEF (expr)) | |
367 return SSA_NAME_DEF_STMT (expr); | |
368 return NULL; | |
369 } | |
370 | |
371 /* Like maybe_get_stmt, but also return null if the defining | |
372 statement isn't an assignment. */ | |
373 | |
374 static gassign * | |
375 maybe_get_assign (tree expr) | |
376 { | |
377 return safe_dyn_cast <gassign *> (maybe_get_stmt (expr)); | |
378 } | |
379 | |
380 /* Return true if this pass should look through a cast of expression FROM | |
381 to type TYPE when analyzing pieces of an address. */ | |
382 | |
383 static bool | |
384 look_through_cast_p (tree type, tree from) | |
385 { | |
386 return (INTEGRAL_TYPE_P (TREE_TYPE (from)) == INTEGRAL_TYPE_P (type) | |
387 && POINTER_TYPE_P (TREE_TYPE (from)) == POINTER_TYPE_P (type)); | |
388 } | |
389 | |
390 /* Strip all conversions of integers or pointers from EXPR, regardless | |
391 of whether the conversions are nops. This is useful in the context | |
392 of this pass because we're not trying to fold or simulate the | |
393 expression; we just want to see how it's structured. */ | |
394 | |
395 static tree | |
396 strip_casts (tree expr) | |
397 { | |
398 const unsigned int MAX_NITERS = 4; | |
399 | |
400 tree type = TREE_TYPE (expr); | |
401 while (CONVERT_EXPR_P (expr) | |
402 && look_through_cast_p (type, TREE_OPERAND (expr, 0))) | |
403 expr = TREE_OPERAND (expr, 0); | |
404 | |
405 for (unsigned int niters = 0; niters < MAX_NITERS; ++niters) | |
406 { | |
407 gassign *assign = maybe_get_assign (expr); | |
408 if (assign | |
409 && CONVERT_EXPR_CODE_P (gimple_assign_rhs_code (assign)) | |
410 && look_through_cast_p (type, gimple_assign_rhs1 (assign))) | |
411 expr = gimple_assign_rhs1 (assign); | |
412 else | |
413 break; | |
414 } | |
415 return expr; | |
416 } | |
417 | |
418 /* Compare two address_term_infos in the same address_info. */ | |
419 | |
420 static int | |
421 compare_address_terms (const void *a_uncast, const void *b_uncast) | |
422 { | |
423 const address_term_info *a = (const address_term_info *) a_uncast; | |
424 const address_term_info *b = (const address_term_info *) b_uncast; | |
425 | |
426 if (a->expr != b->expr) | |
427 return SSA_NAME_VERSION (a->expr) < SSA_NAME_VERSION (b->expr) ? -1 : 1; | |
428 | |
429 if (a->multiplier != b->multiplier) | |
430 return a->multiplier < b->multiplier ? -1 : 1; | |
431 | |
432 return 0; | |
433 } | |
434 | |
435 /* Dump ADDRESS using flags FLAGS. */ | |
436 | |
437 static void | |
438 dump_address_info (dump_flags_t flags, address_info &address) | |
439 { | |
440 if (address.base) | |
441 dump_printf (flags, "%T + ", address.base); | |
442 for (unsigned int i = 0; i < address.terms.length (); ++i) | |
443 { | |
444 if (i != 0) | |
445 dump_printf (flags, " + "); | |
446 dump_printf (flags, "%T", address.terms[i].expr); | |
447 if (address.terms[i].multiplier != 1) | |
448 dump_printf (flags, " * %wd", address.terms[i].multiplier); | |
449 } | |
450 dump_printf (flags, " + [%wd, %wd]", | |
451 address.min_offset, address.max_offset - 1); | |
452 } | |
453 | |
454 /* Hash an address_info based on its base and terms. */ | |
455 | |
456 hashval_t | |
457 address_info_hasher::hash (const address_info *info) | |
458 { | |
459 inchash::hash hash; | |
460 hash.add_int (info->base ? TREE_CODE (info->base) : 0); | |
461 hash.add_int (info->terms.length ()); | |
462 for (unsigned int i = 0; i < info->terms.length (); ++i) | |
463 { | |
464 hash.add_int (SSA_NAME_VERSION (info->terms[i].expr)); | |
465 hash.add_hwi (info->terms[i].multiplier); | |
466 } | |
467 return hash.end (); | |
468 } | |
469 | |
470 /* Return true if two address_infos have equal bases and terms. Other | |
471 properties might be different (such as the statement or constant | |
472 offset range). */ | |
473 | |
474 bool | |
475 address_info_hasher::equal (const address_info *a, const address_info *b) | |
476 { | |
477 if (a->base != b->base | |
478 && (!a->base || !b->base || !operand_equal_p (a->base, b->base, 0))) | |
479 return false; | |
480 | |
481 if (a->terms.length () != b->terms.length ()) | |
482 return false; | |
483 | |
484 for (unsigned int i = 0; i < a->terms.length (); ++i) | |
485 if (a->terms[i].expr != b->terms[i].expr | |
486 || a->terms[i].multiplier != b->terms[i].multiplier) | |
487 return false; | |
488 | |
489 return true; | |
490 } | |
491 | |
492 /* Return true if we want to version the loop, i.e. if we have a | |
493 specific reason for doing so and no specific reason not to. */ | |
494 | |
495 bool | |
496 loop_info::worth_versioning_p () const | |
497 { | |
498 return (!rejected_p | |
499 && (!bitmap_empty_p (&unity_names) || subloops_benefit_p)); | |
500 } | |
501 | |
502 loop_versioning::lv_dom_walker::lv_dom_walker (loop_versioning &lv) | |
503 : dom_walker (CDI_DOMINATORS), m_lv (lv), m_range_analyzer (false) | |
504 { | |
505 } | |
506 | |
507 /* Process BB before processing the blocks it dominates. */ | |
508 | |
509 edge | |
510 loop_versioning::lv_dom_walker::before_dom_children (basic_block bb) | |
511 { | |
512 m_range_analyzer.enter (bb); | |
513 | |
514 if (bb == bb->loop_father->header) | |
515 m_lv.prune_loop_conditions (bb->loop_father, | |
516 m_range_analyzer.get_vr_values ()); | |
517 | |
518 for (gimple_stmt_iterator si = gsi_start_bb (bb); !gsi_end_p (si); | |
519 gsi_next (&si)) | |
520 m_range_analyzer.record_ranges_from_stmt (gsi_stmt (si), false); | |
521 | |
522 return NULL; | |
523 } | |
524 | |
525 /* Process BB after processing the blocks it dominates. */ | |
526 | |
527 void | |
528 loop_versioning::lv_dom_walker::after_dom_children (basic_block bb) | |
529 { | |
530 m_range_analyzer.leave (bb); | |
531 } | |
532 | |
533 /* Decide whether to replace VAL with a new value in a versioned loop. | |
534 Return the new value if so, otherwise return null. */ | |
535 | |
536 tree | |
537 loop_versioning::name_prop::get_value (tree val) | |
538 { | |
539 if (TREE_CODE (val) == SSA_NAME | |
540 && bitmap_bit_p (&m_li.unity_names, SSA_NAME_VERSION (val))) | |
541 return build_one_cst (TREE_TYPE (val)); | |
542 return NULL_TREE; | |
543 } | |
544 | |
545 /* Initialize the structure to optimize FN. */ | |
546 | |
547 loop_versioning::loop_versioning (function *fn) | |
548 : m_fn (fn), | |
549 m_nloops (number_of_loops (fn)), | |
550 m_num_conditions (0), | |
551 m_address_table (31) | |
552 { | |
553 bitmap_obstack_initialize (&m_bitmap_obstack); | |
554 gcc_obstack_init (&m_obstack); | |
555 | |
556 /* Initialize the loop information. */ | |
557 m_loops.safe_grow_cleared (m_nloops); | |
558 for (unsigned int i = 0; i < m_nloops; ++i) | |
559 { | |
560 m_loops[i].outermost = get_loop (m_fn, 0); | |
561 bitmap_initialize (&m_loops[i].unity_names, &m_bitmap_obstack); | |
562 } | |
563 | |
564 /* Initialize the list of blocks that belong to each loop. */ | |
565 unsigned int nbbs = last_basic_block_for_fn (fn); | |
566 m_next_block_in_loop.safe_grow (nbbs); | |
567 basic_block bb; | |
568 FOR_EACH_BB_FN (bb, fn) | |
569 { | |
570 loop_info &li = get_loop_info (bb->loop_father); | |
571 m_next_block_in_loop[bb->index] = li.block_list; | |
572 li.block_list = bb; | |
573 } | |
574 | |
575 /* MAX_FIXED_MODE_SIZE should be a reasonable maximum scale for | |
576 unvectorizable code, since it is the largest size that can be | |
577 handled efficiently by scalar code. omp_max_vf calculates the | |
578 maximum number of bytes in a vector, when such a value is relevant | |
579 to loop optimization. */ | |
580 m_maximum_scale = estimated_poly_value (omp_max_vf ()); | |
581 m_maximum_scale = MAX (m_maximum_scale, MAX_FIXED_MODE_SIZE); | |
582 } | |
583 | |
584 loop_versioning::~loop_versioning () | |
585 { | |
586 bitmap_obstack_release (&m_bitmap_obstack); | |
587 obstack_free (&m_obstack, NULL); | |
588 } | |
589 | |
590 /* Return the maximum number of instructions allowed in LOOP before | |
591 it becomes too big for versioning. | |
592 | |
593 There are separate limits for inner and outer loops. The limit for | |
594 inner loops applies only to loops that benefit directly from versioning. | |
595 The limit for outer loops applies to all code in the outer loop and | |
596 its subloops that *doesn't* benefit directly from versioning; such code | |
597 would be "taken along for the ride". The idea is that if the cost of | |
598 the latter is small, it is better to version outer loops rather than | |
599 inner loops, both to reduce the number of repeated checks and to enable | |
600 more of the loop nest to be optimized as a natural nest (e.g. by loop | |
601 interchange or outer-loop vectorization). */ | |
602 | |
603 unsigned int | |
604 loop_versioning::max_insns_for_loop (class loop *loop) | |
605 { | |
606 return (loop->inner | |
607 ? param_loop_versioning_max_outer_insns | |
608 : param_loop_versioning_max_inner_insns); | |
609 } | |
610 | |
611 /* Return true if for cost reasons we should avoid versioning any loop | |
612 that contains STMT. | |
613 | |
614 Note that we don't need to check whether versioning is invalid for | |
615 correctness reasons, since the versioning process does that for us. | |
616 The conditions involved are too rare to be worth duplicating here. */ | |
617 | |
618 bool | |
619 loop_versioning::expensive_stmt_p (gimple *stmt) | |
620 { | |
621 if (gcall *call = dyn_cast <gcall *> (stmt)) | |
622 /* Assume for now that the time spent in an "expensive" call would | |
623 overwhelm any saving from versioning. */ | |
624 return !gimple_inexpensive_call_p (call); | |
625 return false; | |
626 } | |
627 | |
628 /* Record that we want to version the loop that contains STMT for the | |
629 case in which SSA name NAME is equal to 1. We already know that NAME | |
630 is invariant in the loop. */ | |
631 | |
632 void | |
633 loop_versioning::version_for_unity (gimple *stmt, tree name) | |
634 { | |
635 class loop *loop = loop_containing_stmt (stmt); | |
636 loop_info &li = get_loop_info (loop); | |
637 | |
638 if (bitmap_set_bit (&li.unity_names, SSA_NAME_VERSION (name))) | |
639 { | |
640 /* This is the first time we've wanted to version LOOP for NAME. | |
641 Keep track of the outermost loop that can handle all versioning | |
642 checks in LI. */ | |
643 class loop *outermost | |
644 = outermost_invariant_loop_for_expr (loop, name); | |
645 if (loop_depth (li.outermost) < loop_depth (outermost)) | |
646 li.outermost = outermost; | |
647 | |
648 if (dump_enabled_p ()) | |
649 { | |
650 dump_printf_loc (MSG_NOTE, stmt, "want to version containing loop" | |
651 " for when %T == 1", name); | |
652 if (outermost == loop) | |
653 dump_printf (MSG_NOTE, "; cannot hoist check further"); | |
654 else | |
655 { | |
656 dump_printf (MSG_NOTE, "; could implement the check at loop" | |
657 " depth %d", loop_depth (outermost)); | |
658 if (loop_depth (li.outermost) > loop_depth (outermost)) | |
659 dump_printf (MSG_NOTE, ", but other checks only allow" | |
660 " a depth of %d", loop_depth (li.outermost)); | |
661 } | |
662 dump_printf (MSG_NOTE, "\n"); | |
663 } | |
664 | |
665 m_num_conditions += 1; | |
666 } | |
667 else | |
668 { | |
669 /* This is a duplicate request. */ | |
670 if (dump_enabled_p ()) | |
671 dump_printf_loc (MSG_NOTE, stmt, "already asked to version containing" | |
672 " loop for when %T == 1\n", name); | |
673 } | |
674 } | |
675 | |
676 /* Return true if OP1_TREE is constant and if in principle it is worth | |
677 versioning an address fragment of the form: | |
678 | |
679 i * OP1_TREE * OP2 * stride | |
680 | |
681 for the case in which stride == 1. This in practice means testing | |
682 whether: | |
683 | |
684 OP1_TREE * OP2 <= M_MAXIMUM_SCALE. | |
685 | |
686 If RESULT is nonnull, store OP1_TREE * OP2 there when returning true. */ | |
687 | |
688 bool | |
689 loop_versioning::acceptable_multiplier_p (tree op1_tree, | |
690 unsigned HOST_WIDE_INT op2, | |
691 unsigned HOST_WIDE_INT *result) | |
692 { | |
693 if (tree_fits_uhwi_p (op1_tree)) | |
694 { | |
695 unsigned HOST_WIDE_INT op1 = tree_to_uhwi (op1_tree); | |
696 /* The first part checks for overflow. */ | |
697 if (op1 * op2 >= op2 && op1 * op2 <= m_maximum_scale) | |
698 { | |
699 if (result) | |
700 *result = op1 * op2; | |
701 return true; | |
702 } | |
703 } | |
704 return false; | |
705 } | |
706 | |
707 /* Return true if it is worth using loop versioning on a memory access | |
708 of type TYPE. Store the size of the access in *SIZE if so. */ | |
709 | |
710 bool | |
711 loop_versioning::acceptable_type_p (tree type, unsigned HOST_WIDE_INT *size) | |
712 { | |
713 return (TYPE_SIZE_UNIT (type) | |
714 && acceptable_multiplier_p (TYPE_SIZE_UNIT (type), 1, size)); | |
715 } | |
716 | |
717 /* See whether OP is constant and whether we can multiply TERM by that | |
718 constant without exceeding M_MAXIMUM_SCALE. Return true and update | |
719 TERM if so. */ | |
720 | |
721 bool | |
722 loop_versioning::multiply_term_by (address_term_info &term, tree op) | |
723 { | |
724 return acceptable_multiplier_p (op, term.multiplier, &term.multiplier); | |
725 } | |
726 | |
727 /* Decide whether an address fragment of the form STRIDE * MULTIPLIER | |
728 is likely to be indexing an innermost dimension, returning the result | |
729 as an INNER_* probability. */ | |
730 | |
731 inner_likelihood | |
732 loop_versioning::get_inner_likelihood (tree stride, | |
733 unsigned HOST_WIDE_INT multiplier) | |
734 { | |
735 const unsigned int MAX_NITERS = 8; | |
736 | |
737 /* Iterate over possible values of STRIDE. Return INNER_LIKELY if at | |
738 least one of those values is likely to be for the innermost dimension. | |
739 Record in UNLIKELY_P if at least one of those values is unlikely to be | |
740 for the innermost dimension. | |
741 | |
742 E.g. for: | |
743 | |
744 stride = cond ? a * b : 1 | |
745 | |
746 we should treat STRIDE as being a likely inner dimension, since | |
747 we know that it is 1 under at least some circumstances. (See the | |
748 Fortran example below.) However: | |
749 | |
750 stride = a * b | |
751 | |
752 on its own is unlikely to be for the innermost dimension, since | |
753 that would require both a and b to be 1 at runtime. */ | |
754 bool unlikely_p = false; | |
755 tree worklist[MAX_NITERS]; | |
756 unsigned int length = 0; | |
757 worklist[length++] = stride; | |
758 for (unsigned int i = 0; i < length; ++i) | |
759 { | |
760 tree expr = worklist[i]; | |
761 | |
762 if (CONSTANT_CLASS_P (expr)) | |
763 { | |
764 /* See if EXPR * MULTIPLIER would be consistent with an individual | |
765 access or a small grouped access. */ | |
766 if (acceptable_multiplier_p (expr, multiplier)) | |
767 return INNER_LIKELY; | |
768 else | |
769 unlikely_p = true; | |
770 } | |
771 else if (gimple *stmt = maybe_get_stmt (expr)) | |
772 { | |
773 /* If EXPR is set by a PHI node, queue its arguments in case | |
774 we find one that is consistent with an inner dimension. | |
775 | |
776 An important instance of this is the Fortran handling of array | |
777 descriptors, which calculates the stride of the inner dimension | |
778 using a PHI equivalent of: | |
779 | |
780 raw_stride = a.dim[0].stride; | |
781 stride = raw_stride != 0 ? raw_stride : 1; | |
782 | |
783 (Strides for outer dimensions do not treat 0 specially.) */ | |
784 if (gphi *phi = dyn_cast <gphi *> (stmt)) | |
785 { | |
786 unsigned int nargs = gimple_phi_num_args (phi); | |
787 for (unsigned int j = 0; j < nargs && length < MAX_NITERS; ++j) | |
788 worklist[length++] = strip_casts (gimple_phi_arg_def (phi, j)); | |
789 } | |
790 /* If the value is set by an assignment, expect it to be read | |
791 from memory (such as an array descriptor) rather than be | |
792 calculated. */ | |
793 else if (gassign *assign = dyn_cast <gassign *> (stmt)) | |
794 { | |
795 if (!gimple_assign_load_p (assign)) | |
796 unlikely_p = true; | |
797 } | |
798 /* Things like calls don't really tell us anything. */ | |
799 } | |
800 } | |
801 | |
802 /* We didn't find any possible values of STRIDE that were likely to be | |
803 for the innermost dimension. If we found one that was actively | |
804 unlikely to be for the innermost dimension, assume that that applies | |
805 to STRIDE too. */ | |
806 return unlikely_p ? INNER_UNLIKELY : INNER_DONT_KNOW; | |
807 } | |
808 | |
809 /* Dump the likelihood that TERM's stride is for the innermost dimension. | |
810 ADDRESS is the address that contains TERM. */ | |
811 | |
812 void | |
813 loop_versioning::dump_inner_likelihood (address_info &address, | |
814 address_term_info &term) | |
815 { | |
816 if (term.inner_likelihood == INNER_LIKELY) | |
817 dump_printf_loc (MSG_NOTE, address.stmt, "%T is likely to be the" | |
818 " innermost dimension\n", term.stride); | |
819 else if (term.inner_likelihood == INNER_UNLIKELY) | |
820 dump_printf_loc (MSG_NOTE, address.stmt, "%T is probably not the" | |
821 " innermost dimension\n", term.stride); | |
822 else | |
823 dump_printf_loc (MSG_NOTE, address.stmt, "cannot tell whether %T" | |
824 " is the innermost dimension\n", term.stride); | |
825 } | |
826 | |
827 /* The caller has identified that STRIDE is the stride of interest | |
828 in TERM, and that the stride is applied in OP_LOOP. Record this | |
829 information in TERM, deciding whether STRIDE is likely to be for | |
830 the innermost dimension of an array and whether it represents a | |
831 versioning opportunity. ADDRESS is the address that contains TERM. */ | |
832 | |
833 void | |
834 loop_versioning::analyze_stride (address_info &address, | |
835 address_term_info &term, | |
836 tree stride, class loop *op_loop) | |
837 { | |
838 term.stride = stride; | |
839 | |
840 term.inner_likelihood = get_inner_likelihood (stride, term.multiplier); | |
841 if (dump_enabled_p ()) | |
842 dump_inner_likelihood (address, term); | |
843 | |
844 /* To be a versioning opportunity we require: | |
845 | |
846 - The multiplier applied by TERM is equal to the access size, | |
847 so that when STRIDE is 1, the accesses in successive loop | |
848 iterations are consecutive. | |
849 | |
850 This is deliberately conservative. We could relax it to handle | |
851 other cases (such as those with gaps between iterations) if we | |
852 find any real testcases for which it's useful. | |
853 | |
854 - the stride is applied in the same loop as STMT rather than | |
855 in an outer loop. Although versioning for strides applied in | |
856 outer loops could help in some cases -- such as enabling | |
857 more loop interchange -- the savings are much lower than for | |
858 inner loops. | |
859 | |
860 - the stride is an SSA name that is invariant in STMT's loop, | |
861 since otherwise versioning isn't possible. */ | |
862 unsigned HOST_WIDE_INT access_size = address.max_offset - address.min_offset; | |
863 if (term.multiplier == access_size | |
864 && address.loop == op_loop | |
865 && TREE_CODE (stride) == SSA_NAME | |
866 && expr_invariant_in_loop_p (address.loop, stride)) | |
867 { | |
868 term.versioning_opportunity_p = true; | |
869 if (dump_enabled_p ()) | |
870 dump_printf_loc (MSG_NOTE, address.stmt, "%T == 1 is a versioning" | |
871 " opportunity\n", stride); | |
872 } | |
873 } | |
874 | |
875 /* See whether address term TERM (which belongs to ADDRESS) is the result | |
876 of multiplying a varying SSA name by a loop-invariant SSA name. | |
877 Return true and update TERM if so. | |
878 | |
879 This handles both cases that SCEV might handle, such as: | |
880 | |
881 for (int i = 0; i < n; ++i) | |
882 res += a[i * stride]; | |
883 | |
884 and ones in which the term varies arbitrarily between iterations, such as: | |
885 | |
886 for (int i = 0; i < n; ++i) | |
887 res += a[index[i] * stride]; */ | |
888 | |
889 bool | |
890 loop_versioning::find_per_loop_multiplication (address_info &address, | |
891 address_term_info &term) | |
892 { | |
893 gassign *mult = maybe_get_assign (term.expr); | |
894 if (!mult || gimple_assign_rhs_code (mult) != MULT_EXPR) | |
895 return false; | |
896 | |
897 class loop *mult_loop = loop_containing_stmt (mult); | |
898 if (!loop_outer (mult_loop)) | |
899 return false; | |
900 | |
901 tree op1 = strip_casts (gimple_assign_rhs1 (mult)); | |
902 tree op2 = strip_casts (gimple_assign_rhs2 (mult)); | |
903 if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME) | |
904 return false; | |
905 | |
906 bool invariant1_p = expr_invariant_in_loop_p (mult_loop, op1); | |
907 bool invariant2_p = expr_invariant_in_loop_p (mult_loop, op2); | |
908 if (invariant1_p == invariant2_p) | |
909 return false; | |
910 | |
911 /* Make sure that the loop invariant is OP2 rather than OP1. */ | |
912 if (invariant1_p) | |
913 std::swap (op1, op2); | |
914 | |
915 if (dump_enabled_p ()) | |
916 dump_printf_loc (MSG_NOTE, address.stmt, "address term %T = varying %T" | |
917 " * loop-invariant %T\n", term.expr, op1, op2); | |
918 analyze_stride (address, term, op2, mult_loop); | |
919 return true; | |
920 } | |
921 | |
922 /* Try to use scalar evolutions to find an address stride for TERM, | |
923 which belongs to ADDRESS. Return true and update TERM if so. | |
924 | |
925 Here we are interested in any evolution information we can find, | |
926 not just evolutions wrt ADDRESS->LOOP. For example, if we find that | |
927 an outer loop obviously iterates over the inner dimension of an array, | |
928 that information can help us eliminate worthless versioning opportunities | |
929 in inner loops. */ | |
930 | |
931 bool | |
932 loop_versioning::analyze_term_using_scevs (address_info &address, | |
933 address_term_info &term) | |
934 { | |
935 gimple *setter = maybe_get_stmt (term.expr); | |
936 if (!setter) | |
937 return false; | |
938 | |
939 class loop *wrt_loop = loop_containing_stmt (setter); | |
940 if (!loop_outer (wrt_loop)) | |
941 return false; | |
942 | |
943 tree chrec = strip_casts (analyze_scalar_evolution (wrt_loop, term.expr)); | |
944 if (TREE_CODE (chrec) == POLYNOMIAL_CHREC) | |
945 { | |
946 if (dump_enabled_p ()) | |
947 dump_printf_loc (MSG_NOTE, address.stmt, | |
948 "address term %T = %T\n", term.expr, chrec); | |
949 | |
950 /* Peel casts and accumulate constant multiplications, up to the | |
951 limit allowed by M_MAXIMUM_SCALE. */ | |
952 tree stride = strip_casts (CHREC_RIGHT (chrec)); | |
953 while (TREE_CODE (stride) == MULT_EXPR | |
954 && multiply_term_by (term, TREE_OPERAND (stride, 1))) | |
955 stride = strip_casts (TREE_OPERAND (stride, 0)); | |
956 | |
957 gassign *assign; | |
958 while ((assign = maybe_get_assign (stride)) | |
959 && gimple_assign_rhs_code (assign) == MULT_EXPR | |
960 && multiply_term_by (term, gimple_assign_rhs2 (assign))) | |
961 { | |
962 if (dump_enabled_p ()) | |
963 dump_printf_loc (MSG_NOTE, address.stmt, | |
964 "looking through %G", assign); | |
965 stride = strip_casts (gimple_assign_rhs1 (assign)); | |
966 } | |
967 | |
968 analyze_stride (address, term, stride, get_chrec_loop (chrec)); | |
969 return true; | |
970 } | |
971 | |
972 return false; | |
973 } | |
974 | |
975 /* Address term TERM is an arbitrary term that provides no versioning | |
976 opportunities. Analyze it to see whether it contains any likely | |
977 inner strides, so that we don't mistakenly version for other | |
978 (less likely) candidates. | |
979 | |
980 This copes with invariant innermost indices such as: | |
981 | |
982 x(i, :) = 100 | |
983 | |
984 where the "i" component of the address is invariant in the loop | |
985 but provides the real inner stride. | |
986 | |
987 ADDRESS is the address that contains TERM. */ | |
988 | |
989 void | |
990 loop_versioning::analyze_arbitrary_term (address_info &address, | |
991 address_term_info &term) | |
992 | |
993 { | |
994 /* A multiplication offers two potential strides. Pick the one that | |
995 is most likely to be an innermost stride. */ | |
996 tree expr = term.expr, alt = NULL_TREE; | |
997 gassign *mult = maybe_get_assign (expr); | |
998 if (mult && gimple_assign_rhs_code (mult) == MULT_EXPR) | |
999 { | |
1000 expr = strip_casts (gimple_assign_rhs1 (mult)); | |
1001 alt = strip_casts (gimple_assign_rhs2 (mult)); | |
1002 } | |
1003 term.stride = expr; | |
1004 term.inner_likelihood = get_inner_likelihood (expr, term.multiplier); | |
1005 if (alt) | |
1006 { | |
1007 inner_likelihood alt_l = get_inner_likelihood (alt, term.multiplier); | |
1008 if (alt_l > term.inner_likelihood) | |
1009 { | |
1010 term.stride = alt; | |
1011 term.inner_likelihood = alt_l; | |
1012 } | |
1013 } | |
1014 if (dump_enabled_p ()) | |
1015 dump_inner_likelihood (address, term); | |
1016 } | |
1017 | |
1018 /* Try to identify loop strides in ADDRESS and try to choose realistic | |
1019 versioning opportunities based on these strides. | |
1020 | |
1021 The main difficulty here isn't finding strides that could be used | |
1022 in a version check (that's pretty easy). The problem instead is to | |
1023 avoid versioning for some stride S that is unlikely ever to be 1 at | |
1024 runtime. Versioning for S == 1 on its own would lead to unnecessary | |
1025 code bloat, while adding S == 1 to more realistic version conditions | |
1026 would lose the optimisation opportunity offered by those other conditions. | |
1027 | |
1028 For example, versioning for a stride of 1 in the Fortran code: | |
1029 | |
1030 integer :: a(:,:) | |
1031 a(1,:) = 1 | |
1032 | |
1033 is not usually a good idea, since the assignment is iterating over | |
1034 an outer dimension and is relatively unlikely to have a stride of 1. | |
1035 (It isn't impossible, since the inner dimension might be 1, or the | |
1036 array might be transposed.) Similarly, in: | |
1037 | |
1038 integer :: a(:,:), b(:,:) | |
1039 b(:,1) = a(1,:) | |
1040 | |
1041 b(:,1) is relatively likely to have a stride of 1 while a(1,:) isn't. | |
1042 Versioning for when both strides are 1 would lose most of the benefit | |
1043 of versioning for b's access. | |
1044 | |
1045 The approach we take is as follows: | |
1046 | |
1047 - Analyze each term to see whether it has an identifiable stride, | |
1048 regardless of which loop applies the stride. | |
1049 | |
1050 - Evaluate the likelihood that each such stride is for the innermost | |
1051 dimension of an array, on the scale "likely", "don't know" or "unlikely". | |
1052 | |
1053 - If there is a single "likely" innermost stride, and that stride is | |
1054 applied in the loop that contains STMT, version the loop for when the | |
1055 stride is 1. This deals with the cases in which we're fairly | |
1056 confident of doing the right thing, such as the b(:,1) reference above. | |
1057 | |
1058 - If there are no "likely" innermost strides, and the loop that contains | |
1059 STMT uses a stride that we rated as "don't know", version for when | |
1060 that stride is 1. This is principally used for C code such as: | |
1061 | |
1062 for (int i = 0; i < n; ++i) | |
1063 a[i * x] = ...; | |
1064 | |
1065 and: | |
1066 | |
1067 for (int j = 0; j < n; ++j) | |
1068 for (int i = 0; i < n; ++i) | |
1069 a[i * x + j * y] = ...; | |
1070 | |
1071 where nothing in the way "x" and "y" are set gives a hint as to | |
1072 whether "i" iterates over the innermost dimension of the array. | |
1073 In these situations it seems reasonable to assume the the | |
1074 programmer has nested the loops appropriately (although of course | |
1075 there are examples like GEMM in which this assumption doesn't hold | |
1076 for all accesses in the loop). | |
1077 | |
1078 This case is also useful for the Fortran equivalent of the | |
1079 above C code. */ | |
1080 | |
1081 void | |
1082 loop_versioning::analyze_address_fragment (address_info &address) | |
1083 { | |
1084 if (dump_enabled_p ()) | |
1085 { | |
1086 dump_printf_loc (MSG_NOTE, address.stmt, "analyzing address fragment "); | |
1087 dump_address_info (MSG_NOTE, address); | |
1088 dump_printf (MSG_NOTE, "\n"); | |
1089 } | |
1090 | |
1091 /* Analyze each component of the sum to see whether it involves an | |
1092 apparent stride. | |
1093 | |
1094 There is an overlap between the addresses that | |
1095 find_per_loop_multiplication and analyze_term_using_scevs can handle, | |
1096 but the former is much cheaper than SCEV analysis, so try it first. */ | |
1097 for (unsigned int i = 0; i < address.terms.length (); ++i) | |
1098 if (!find_per_loop_multiplication (address, address.terms[i]) | |
1099 && !analyze_term_using_scevs (address, address.terms[i]) | |
1100 && !POINTER_TYPE_P (TREE_TYPE (address.terms[i].expr))) | |
1101 analyze_arbitrary_term (address, address.terms[i]); | |
1102 | |
1103 /* Check for strides that are likely to be for the innermost dimension. | |
1104 | |
1105 1. If there is a single likely inner stride, if it is an SSA name, | |
1106 and if it is worth versioning the loop for when the SSA name | |
1107 equals 1, record that we want to do so. | |
1108 | |
1109 2. Otherwise, if there any likely inner strides, bail out. This means | |
1110 one of: | |
1111 | |
1112 (a) There are multiple likely inner strides. This suggests we're | |
1113 confused and be can't be confident of doing the right thing. | |
1114 | |
1115 (b) There is a single likely inner stride and it is a constant | |
1116 rather than an SSA name. This can mean either that the access | |
1117 is a natural one without any variable strides, such as: | |
1118 | |
1119 for (int i = 0; i < n; ++i) | |
1120 a[i] += 1; | |
1121 | |
1122 or that a variable stride is applied to an outer dimension, | |
1123 such as: | |
1124 | |
1125 for (int i = 0; i < n; ++i) | |
1126 for (int j = 0; j < n; ++j) | |
1127 a[j * stride][i] += 1; | |
1128 | |
1129 (c) There is a single likely inner stride, and it is an SSA name, | |
1130 but it isn't a worthwhile versioning opportunity. This usually | |
1131 means that the variable stride is applied by an outer loop, | |
1132 such as: | |
1133 | |
1134 for (int i = 0; i < n; ++i) | |
1135 for (int j = 0; j < n; ++j) | |
1136 a[j][i * stride] += 1; | |
1137 | |
1138 or (using an example with a more natural loop nesting): | |
1139 | |
1140 for (int i = 0; i < n; ++i) | |
1141 for (int j = 0; j < n; ++j) | |
1142 a[i][j] += b[i * stride]; | |
1143 | |
1144 in cases where b[i * stride] cannot (yet) be hoisted for | |
1145 aliasing reasons. | |
1146 | |
1147 3. If there are no likely inner strides, fall through to the next | |
1148 set of checks. | |
1149 | |
1150 Pointer equality is enough to check for uniqueness in (1), since we | |
1151 only care about SSA names. */ | |
1152 tree chosen_stride = NULL_TREE; | |
1153 tree version_stride = NULL_TREE; | |
1154 for (unsigned int i = 0; i < address.terms.length (); ++i) | |
1155 if (chosen_stride != address.terms[i].stride | |
1156 && address.terms[i].inner_likelihood == INNER_LIKELY) | |
1157 { | |
1158 if (chosen_stride) | |
1159 return; | |
1160 chosen_stride = address.terms[i].stride; | |
1161 if (address.terms[i].versioning_opportunity_p) | |
1162 version_stride = chosen_stride; | |
1163 } | |
1164 | |
1165 /* If there are no likely inner strides, see if there is a single | |
1166 versioning opportunity for a stride that was rated as INNER_DONT_KNOW. | |
1167 See the comment above the function for the cases that this code | |
1168 handles. */ | |
1169 if (!chosen_stride) | |
1170 for (unsigned int i = 0; i < address.terms.length (); ++i) | |
1171 if (version_stride != address.terms[i].stride | |
1172 && address.terms[i].inner_likelihood == INNER_DONT_KNOW | |
1173 && address.terms[i].versioning_opportunity_p) | |
1174 { | |
1175 if (version_stride) | |
1176 return; | |
1177 version_stride = address.terms[i].stride; | |
1178 } | |
1179 | |
1180 if (version_stride) | |
1181 version_for_unity (address.stmt, version_stride); | |
1182 } | |
1183 | |
1184 /* Treat EXPR * MULTIPLIER + OFFSET as a fragment of an address that addresses | |
1185 TYPE_SIZE bytes and record this address fragment for later processing. | |
1186 STMT is the statement that contains the address. */ | |
1187 | |
1188 void | |
1189 loop_versioning::record_address_fragment (gimple *stmt, | |
1190 unsigned HOST_WIDE_INT type_size, | |
1191 tree expr, | |
1192 unsigned HOST_WIDE_INT multiplier, | |
1193 HOST_WIDE_INT offset) | |
1194 { | |
1195 /* We're only interested in computed values. */ | |
1196 if (TREE_CODE (expr) != SSA_NAME) | |
1197 return; | |
1198 | |
1199 /* Quick exit if no part of the address is calculated in STMT's loop, | |
1200 since such addresses have no versioning opportunities. */ | |
1201 class loop *loop = loop_containing_stmt (stmt); | |
1202 if (expr_invariant_in_loop_p (loop, expr)) | |
1203 return; | |
1204 | |
1205 /* Set up an address_info for EXPR * MULTIPLIER. */ | |
1206 address_info *address = XOBNEW (&m_obstack, address_info); | |
1207 new (address) address_info; | |
1208 address->stmt = stmt; | |
1209 address->loop = loop; | |
1210 address->base = NULL_TREE; | |
1211 address->terms.quick_grow (1); | |
1212 address->terms[0].expr = expr; | |
1213 address->terms[0].multiplier = multiplier; | |
1214 address->terms[0].stride = NULL_TREE; | |
1215 address->terms[0].inner_likelihood = INNER_UNLIKELY; | |
1216 address->terms[0].versioning_opportunity_p = false; | |
1217 address->min_offset = offset; | |
1218 | |
1219 /* Peel apart the expression into a sum of address_terms, where each | |
1220 term is multiplied by a constant. Treat a + b and a - b the same, | |
1221 since it doesn't matter for our purposes whether an address is | |
1222 increasing or decreasing. Distribute (a + b) * constant into | |
1223 a * constant + b * constant. | |
1224 | |
1225 We don't care which loop each term belongs to, since we want to | |
1226 examine as many candidate strides as possible when determining | |
1227 which is likely to be for the innermost dimension. We therefore | |
1228 don't limit the search to statements in STMT's loop. */ | |
1229 for (unsigned int i = 0; i < address->terms.length (); ) | |
1230 { | |
1231 if (gassign *assign = maybe_get_assign (address->terms[i].expr)) | |
1232 { | |
1233 tree_code code = gimple_assign_rhs_code (assign); | |
1234 if (code == PLUS_EXPR | |
1235 || code == POINTER_PLUS_EXPR | |
1236 || code == MINUS_EXPR) | |
1237 { | |
1238 tree op1 = gimple_assign_rhs1 (assign); | |
1239 tree op2 = gimple_assign_rhs2 (assign); | |
1240 if (TREE_CODE (op2) == INTEGER_CST) | |
1241 { | |
1242 address->terms[i].expr = strip_casts (op1); | |
1243 /* This is heuristic only, so don't worry about truncation | |
1244 or overflow. */ | |
1245 address->min_offset += (TREE_INT_CST_LOW (op2) | |
1246 * address->terms[i].multiplier); | |
1247 continue; | |
1248 } | |
1249 else if (address->terms.length () < address_info::MAX_TERMS) | |
1250 { | |
1251 unsigned int j = address->terms.length (); | |
1252 address->terms.quick_push (address->terms[i]); | |
1253 address->terms[i].expr = strip_casts (op1); | |
1254 address->terms[j].expr = strip_casts (op2); | |
1255 continue; | |
1256 } | |
1257 } | |
1258 if (code == MULT_EXPR) | |
1259 { | |
1260 tree op1 = gimple_assign_rhs1 (assign); | |
1261 tree op2 = gimple_assign_rhs2 (assign); | |
1262 if (multiply_term_by (address->terms[i], op2)) | |
1263 { | |
1264 address->terms[i].expr = strip_casts (op1); | |
1265 continue; | |
1266 } | |
1267 } | |
1268 if (CONVERT_EXPR_CODE_P (code)) | |
1269 { | |
1270 tree op1 = gimple_assign_rhs1 (assign); | |
1271 address->terms[i].expr = strip_casts (op1); | |
1272 continue; | |
1273 } | |
1274 } | |
1275 i += 1; | |
1276 } | |
1277 | |
1278 /* Peel off any symbolic pointer. */ | |
1279 if (TREE_CODE (address->terms[0].expr) != SSA_NAME | |
1280 && address->terms[0].multiplier == 1) | |
1281 { | |
1282 if (address->terms.length () == 1) | |
1283 { | |
1284 obstack_free (&m_obstack, address); | |
1285 return; | |
1286 } | |
1287 address->base = address->terms[0].expr; | |
1288 address->terms.ordered_remove (0); | |
1289 } | |
1290 | |
1291 /* Require all remaining terms to be SSA names. (This could be false | |
1292 for unfolded statements, but they aren't worth dealing with.) */ | |
1293 for (unsigned int i = 0; i < address->terms.length (); ++i) | |
1294 if (TREE_CODE (address->terms[i].expr) != SSA_NAME) | |
1295 { | |
1296 obstack_free (&m_obstack, address); | |
1297 return; | |
1298 } | |
1299 | |
1300 /* The loop above set MIN_OFFSET based on the first byte of the | |
1301 referenced data. Calculate the end + 1. */ | |
1302 address->max_offset = address->min_offset + type_size; | |
1303 | |
1304 /* Put the terms into a canonical order for the hash table lookup below. */ | |
1305 address->terms.qsort (compare_address_terms); | |
1306 | |
1307 if (dump_enabled_p ()) | |
1308 { | |
1309 dump_printf_loc (MSG_NOTE, stmt, "recording address fragment %T", expr); | |
1310 if (multiplier != 1) | |
1311 dump_printf (MSG_NOTE, " * %wd", multiplier); | |
1312 dump_printf (MSG_NOTE, " = "); | |
1313 dump_address_info (MSG_NOTE, *address); | |
1314 dump_printf (MSG_NOTE, "\n"); | |
1315 } | |
1316 | |
1317 /* Pool address information with the same terms (but potentially | |
1318 different offsets). */ | |
1319 address_info **slot = m_address_table.find_slot (address, INSERT); | |
1320 if (address_info *old_address = *slot) | |
1321 { | |
1322 /* We've already seen an address with the same terms. Extend the | |
1323 offset range to account for this access. Doing this can paper | |
1324 over gaps, such as in: | |
1325 | |
1326 a[i * stride * 4] + a[i * stride * 4 + 3]; | |
1327 | |
1328 where nothing references "+ 1" or "+ 2". However, the vectorizer | |
1329 handles such gapped accesses without problems, so it's not worth | |
1330 trying to exclude them. */ | |
1331 if (old_address->min_offset > address->min_offset) | |
1332 old_address->min_offset = address->min_offset; | |
1333 if (old_address->max_offset < address->max_offset) | |
1334 old_address->max_offset = address->max_offset; | |
1335 obstack_free (&m_obstack, address); | |
1336 } | |
1337 else | |
1338 { | |
1339 /* This is the first time we've seen an address with these terms. */ | |
1340 *slot = address; | |
1341 m_address_list.safe_push (address); | |
1342 } | |
1343 } | |
1344 | |
1345 /* Analyze expression EXPR, which occurs in STMT. */ | |
1346 | |
1347 void | |
1348 loop_versioning::analyze_expr (gimple *stmt, tree expr) | |
1349 { | |
1350 unsigned HOST_WIDE_INT type_size; | |
1351 | |
1352 while (handled_component_p (expr)) | |
1353 { | |
1354 /* See whether we can use versioning to avoid a multiplication | |
1355 in an array index. */ | |
1356 if (TREE_CODE (expr) == ARRAY_REF | |
1357 && acceptable_type_p (TREE_TYPE (expr), &type_size)) | |
1358 record_address_fragment (stmt, type_size, | |
1359 TREE_OPERAND (expr, 1), type_size, 0); | |
1360 expr = TREE_OPERAND (expr, 0); | |
1361 } | |
1362 | |
1363 /* See whether we can use versioning to avoid a multiplication | |
1364 in the pointer calculation of a MEM_REF. */ | |
1365 if (TREE_CODE (expr) == MEM_REF | |
1366 && acceptable_type_p (TREE_TYPE (expr), &type_size)) | |
1367 record_address_fragment (stmt, type_size, TREE_OPERAND (expr, 0), 1, | |
1368 /* This is heuristic only, so don't worry | |
1369 about truncation or overflow. */ | |
1370 TREE_INT_CST_LOW (TREE_OPERAND (expr, 1))); | |
1371 | |
1372 /* These would be easy to handle if they existed at this stage. */ | |
1373 gcc_checking_assert (TREE_CODE (expr) != TARGET_MEM_REF); | |
1374 } | |
1375 | |
1376 /* Analyze all the statements in BB looking for useful version checks. | |
1377 Return true on success, false if something prevents the block from | |
1378 being versioned. */ | |
1379 | |
1380 bool | |
1381 loop_versioning::analyze_block (basic_block bb) | |
1382 { | |
1383 class loop *loop = bb->loop_father; | |
1384 loop_info &li = get_loop_info (loop); | |
1385 for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi); | |
1386 gsi_next (&gsi)) | |
1387 { | |
1388 gimple *stmt = gsi_stmt (gsi); | |
1389 if (is_gimple_debug (stmt)) | |
1390 continue; | |
1391 | |
1392 if (expensive_stmt_p (stmt)) | |
1393 { | |
1394 if (dump_enabled_p ()) | |
1395 dump_printf_loc (MSG_NOTE, stmt, "expensive statement" | |
1396 " prevents versioning: %G", stmt); | |
1397 return false; | |
1398 } | |
1399 | |
1400 /* Only look for direct versioning opportunities in inner loops | |
1401 since the benefit tends to be much smaller for outer loops. */ | |
1402 if (!loop->inner) | |
1403 { | |
1404 unsigned int nops = gimple_num_ops (stmt); | |
1405 for (unsigned int i = 0; i < nops; ++i) | |
1406 if (tree op = gimple_op (stmt, i)) | |
1407 analyze_expr (stmt, op); | |
1408 } | |
1409 | |
1410 /* The point of the instruction limit is to prevent excessive | |
1411 code growth, so this is a size-based estimate even though | |
1412 the optimization is aimed at speed. */ | |
1413 li.num_insns += estimate_num_insns (stmt, &eni_size_weights); | |
1414 } | |
1415 | |
1416 return true; | |
1417 } | |
1418 | |
1419 /* Analyze all the blocks in the function, looking for useful version checks. | |
1420 Return true if we found one. */ | |
1421 | |
1422 bool | |
1423 loop_versioning::analyze_blocks () | |
1424 { | |
1425 AUTO_DUMP_SCOPE ("analyze_blocks", | |
1426 dump_user_location_t::from_function_decl (m_fn->decl)); | |
1427 | |
1428 /* For now we don't try to version the whole function, although | |
1429 versioning at that level could be useful in some cases. */ | |
1430 get_loop_info (get_loop (m_fn, 0)).rejected_p = true; | |
1431 | |
1432 class loop *loop; | |
1433 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) | |
1434 { | |
1435 loop_info &linfo = get_loop_info (loop); | |
1436 | |
1437 /* Ignore cold loops. */ | |
1438 if (!optimize_loop_for_speed_p (loop)) | |
1439 linfo.rejected_p = true; | |
1440 | |
1441 /* See whether an inner loop prevents versioning of this loop. */ | |
1442 if (!linfo.rejected_p) | |
1443 for (class loop *inner = loop->inner; inner; inner = inner->next) | |
1444 if (get_loop_info (inner).rejected_p) | |
1445 { | |
1446 linfo.rejected_p = true; | |
1447 break; | |
1448 } | |
1449 | |
1450 /* If versioning the loop is still a possibility, examine the | |
1451 statements in the loop to look for versioning opportunities. */ | |
1452 if (!linfo.rejected_p) | |
1453 { | |
1454 void *start_point = obstack_alloc (&m_obstack, 0); | |
1455 | |
1456 for (basic_block bb = linfo.block_list; bb; | |
1457 bb = m_next_block_in_loop[bb->index]) | |
1458 if (!analyze_block (bb)) | |
1459 { | |
1460 linfo.rejected_p = true; | |
1461 break; | |
1462 } | |
1463 | |
1464 if (!linfo.rejected_p) | |
1465 { | |
1466 /* Process any queued address fragments, now that we have | |
1467 complete grouping information. */ | |
1468 address_info *address; | |
1469 unsigned int i; | |
1470 FOR_EACH_VEC_ELT (m_address_list, i, address) | |
1471 analyze_address_fragment (*address); | |
1472 } | |
1473 | |
1474 m_address_table.empty (); | |
1475 m_address_list.truncate (0); | |
1476 obstack_free (&m_obstack, start_point); | |
1477 } | |
1478 } | |
1479 | |
1480 return m_num_conditions != 0; | |
1481 } | |
1482 | |
1483 /* Use the ranges in VRS to remove impossible versioning conditions from | |
1484 LOOP. */ | |
1485 | |
1486 void | |
1487 loop_versioning::prune_loop_conditions (class loop *loop, vr_values *vrs) | |
1488 { | |
1489 loop_info &li = get_loop_info (loop); | |
1490 | |
1491 int to_remove = -1; | |
1492 bitmap_iterator bi; | |
1493 unsigned int i; | |
1494 EXECUTE_IF_SET_IN_BITMAP (&li.unity_names, 0, i, bi) | |
1495 { | |
1496 tree name = ssa_name (i); | |
1497 const value_range_equiv *vr = vrs->get_value_range (name); | |
1498 if (vr && !vr->may_contain_p (build_one_cst (TREE_TYPE (name)))) | |
1499 { | |
1500 if (dump_enabled_p ()) | |
1501 dump_printf_loc (MSG_NOTE, find_loop_location (loop), | |
1502 "%T can never be 1 in this loop\n", name); | |
1503 | |
1504 if (to_remove >= 0) | |
1505 bitmap_clear_bit (&li.unity_names, to_remove); | |
1506 to_remove = i; | |
1507 m_num_conditions -= 1; | |
1508 } | |
1509 } | |
1510 if (to_remove >= 0) | |
1511 bitmap_clear_bit (&li.unity_names, to_remove); | |
1512 } | |
1513 | |
1514 /* Remove any scheduled loop version conditions that will never be true. | |
1515 Return true if any remain. */ | |
1516 | |
1517 bool | |
1518 loop_versioning::prune_conditions () | |
1519 { | |
1520 AUTO_DUMP_SCOPE ("prune_loop_conditions", | |
1521 dump_user_location_t::from_function_decl (m_fn->decl)); | |
1522 | |
1523 calculate_dominance_info (CDI_DOMINATORS); | |
1524 lv_dom_walker dom_walker (*this); | |
1525 dom_walker.walk (ENTRY_BLOCK_PTR_FOR_FN (m_fn)); | |
1526 return m_num_conditions != 0; | |
1527 } | |
1528 | |
1529 /* Merge the version checks for INNER into immediately-enclosing loop | |
1530 OUTER. */ | |
1531 | |
1532 void | |
1533 loop_versioning::merge_loop_info (class loop *outer, class loop *inner) | |
1534 { | |
1535 loop_info &inner_li = get_loop_info (inner); | |
1536 loop_info &outer_li = get_loop_info (outer); | |
1537 | |
1538 if (dump_enabled_p ()) | |
1539 { | |
1540 bitmap_iterator bi; | |
1541 unsigned int i; | |
1542 EXECUTE_IF_SET_IN_BITMAP (&inner_li.unity_names, 0, i, bi) | |
1543 if (!bitmap_bit_p (&outer_li.unity_names, i)) | |
1544 dump_printf_loc (MSG_NOTE, find_loop_location (inner), | |
1545 "hoisting check that %T == 1 to outer loop\n", | |
1546 ssa_name (i)); | |
1547 } | |
1548 | |
1549 bitmap_ior_into (&outer_li.unity_names, &inner_li.unity_names); | |
1550 if (loop_depth (outer_li.outermost) < loop_depth (inner_li.outermost)) | |
1551 outer_li.outermost = inner_li.outermost; | |
1552 } | |
1553 | |
1554 /* Add LOOP to the queue of loops to version. */ | |
1555 | |
1556 void | |
1557 loop_versioning::add_loop_to_queue (class loop *loop) | |
1558 { | |
1559 loop_info &li = get_loop_info (loop); | |
1560 | |
1561 if (dump_enabled_p ()) | |
1562 dump_printf_loc (MSG_NOTE, find_loop_location (loop), | |
1563 "queuing this loop for versioning\n"); | |
1564 m_loops_to_version.safe_push (loop); | |
1565 | |
1566 /* Don't try to version superloops. */ | |
1567 li.rejected_p = true; | |
1568 } | |
1569 | |
1570 /* Decide whether the cost model would allow us to version LOOP, | |
1571 either directly or as part of a parent loop, and return true if so. | |
1572 This does not imply that the loop is actually worth versioning in its | |
1573 own right, just that it would be valid to version it if something | |
1574 benefited. | |
1575 | |
1576 We have already made this decision for all inner loops of LOOP. */ | |
1577 | |
1578 bool | |
1579 loop_versioning::decide_whether_loop_is_versionable (class loop *loop) | |
1580 { | |
1581 loop_info &li = get_loop_info (loop); | |
1582 | |
1583 if (li.rejected_p) | |
1584 return false; | |
1585 | |
1586 /* Examine the decisions made for inner loops. */ | |
1587 for (class loop *inner = loop->inner; inner; inner = inner->next) | |
1588 { | |
1589 loop_info &inner_li = get_loop_info (inner); | |
1590 if (inner_li.rejected_p) | |
1591 { | |
1592 if (dump_enabled_p ()) | |
1593 dump_printf_loc (MSG_NOTE, find_loop_location (loop), | |
1594 "not versioning this loop because one of its" | |
1595 " inner loops should not be versioned\n"); | |
1596 return false; | |
1597 } | |
1598 | |
1599 if (inner_li.worth_versioning_p ()) | |
1600 li.subloops_benefit_p = true; | |
1601 | |
1602 /* Accumulate the number of instructions from subloops that are not | |
1603 the innermost, or that don't benefit from versioning. Only the | |
1604 instructions from innermost loops that benefit from versioning | |
1605 should be weighed against loop-versioning-max-inner-insns; | |
1606 everything else should be weighed against | |
1607 loop-versioning-max-outer-insns. */ | |
1608 if (!inner_li.worth_versioning_p () || inner->inner) | |
1609 { | |
1610 if (dump_enabled_p ()) | |
1611 dump_printf_loc (MSG_NOTE, find_loop_location (loop), | |
1612 "counting %d instructions from this loop" | |
1613 " against its parent loop\n", inner_li.num_insns); | |
1614 li.num_insns += inner_li.num_insns; | |
1615 } | |
1616 } | |
1617 | |
1618 /* Enforce the size limits. */ | |
1619 if (li.worth_versioning_p ()) | |
1620 { | |
1621 unsigned int max_num_insns = max_insns_for_loop (loop); | |
1622 if (dump_enabled_p ()) | |
1623 dump_printf_loc (MSG_NOTE, find_loop_location (loop), | |
1624 "this loop has %d instructions, against" | |
1625 " a versioning limit of %d\n", | |
1626 li.num_insns, max_num_insns); | |
1627 if (li.num_insns > max_num_insns) | |
1628 { | |
1629 if (dump_enabled_p ()) | |
1630 dump_printf_loc (MSG_MISSED_OPTIMIZATION | |
1631 | MSG_PRIORITY_USER_FACING, | |
1632 find_loop_location (loop), | |
1633 "this loop is too big to version"); | |
1634 return false; | |
1635 } | |
1636 } | |
1637 | |
1638 /* Hoist all version checks from subloops to this loop. */ | |
1639 for (class loop *subloop = loop->inner; subloop; subloop = subloop->next) | |
1640 merge_loop_info (loop, subloop); | |
1641 | |
1642 return true; | |
1643 } | |
1644 | |
1645 /* Decide which loops to version and add them to the versioning queue. | |
1646 Return true if there are any loops to version. */ | |
1647 | |
1648 bool | |
1649 loop_versioning::make_versioning_decisions () | |
1650 { | |
1651 AUTO_DUMP_SCOPE ("make_versioning_decisions", | |
1652 dump_user_location_t::from_function_decl (m_fn->decl)); | |
1653 | |
1654 class loop *loop; | |
1655 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) | |
1656 { | |
1657 loop_info &linfo = get_loop_info (loop); | |
1658 if (decide_whether_loop_is_versionable (loop)) | |
1659 { | |
1660 /* Commit to versioning LOOP directly if we can't hoist the | |
1661 version checks any further. */ | |
1662 if (linfo.worth_versioning_p () | |
1663 && (loop_depth (loop) == 1 || linfo.outermost == loop)) | |
1664 add_loop_to_queue (loop); | |
1665 } | |
1666 else | |
1667 { | |
1668 /* We can't version this loop, so individually version any | |
1669 subloops that would benefit and haven't been versioned yet. */ | |
1670 linfo.rejected_p = true; | |
1671 for (class loop *subloop = loop->inner; subloop; | |
1672 subloop = subloop->next) | |
1673 if (get_loop_info (subloop).worth_versioning_p ()) | |
1674 add_loop_to_queue (subloop); | |
1675 } | |
1676 } | |
1677 | |
1678 return !m_loops_to_version.is_empty (); | |
1679 } | |
1680 | |
1681 /* Attempt to implement loop versioning for LOOP, using the information | |
1682 cached in the associated loop_info. Return true on success. */ | |
1683 | |
1684 bool | |
1685 loop_versioning::version_loop (class loop *loop) | |
1686 { | |
1687 loop_info &li = get_loop_info (loop); | |
1688 | |
1689 /* Build up a condition that selects the original loop instead of | |
1690 the simplified loop. */ | |
1691 tree cond = boolean_false_node; | |
1692 bitmap_iterator bi; | |
1693 unsigned int i; | |
1694 EXECUTE_IF_SET_IN_BITMAP (&li.unity_names, 0, i, bi) | |
1695 { | |
1696 tree name = ssa_name (i); | |
1697 tree ne_one = fold_build2 (NE_EXPR, boolean_type_node, name, | |
1698 build_one_cst (TREE_TYPE (name))); | |
1699 cond = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, cond, ne_one); | |
1700 } | |
1701 | |
1702 /* Convert the condition into a suitable gcond. */ | |
1703 gimple_seq stmts = NULL; | |
1704 cond = force_gimple_operand_1 (cond, &stmts, is_gimple_condexpr, NULL_TREE); | |
1705 | |
1706 /* Version the loop. */ | |
1707 initialize_original_copy_tables (); | |
1708 basic_block cond_bb; | |
1709 li.optimized_loop = loop_version (loop, cond, &cond_bb, | |
1710 profile_probability::unlikely (), | |
1711 profile_probability::likely (), | |
1712 profile_probability::unlikely (), | |
1713 profile_probability::likely (), true); | |
1714 free_original_copy_tables (); | |
1715 if (!li.optimized_loop) | |
1716 { | |
1717 if (dump_enabled_p ()) | |
1718 dump_printf_loc (MSG_MISSED_OPTIMIZATION, find_loop_location (loop), | |
1719 "tried but failed to version this loop for when" | |
1720 " certain strides are 1\n"); | |
1721 return false; | |
1722 } | |
1723 | |
1724 if (dump_enabled_p ()) | |
1725 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, find_loop_location (loop), | |
1726 "versioned this loop for when certain strides are 1\n"); | |
1727 | |
1728 /* Insert the statements that feed COND. */ | |
1729 if (stmts) | |
1730 { | |
1731 gimple_stmt_iterator gsi = gsi_last_bb (cond_bb); | |
1732 gsi_insert_seq_before (&gsi, stmts, GSI_SAME_STMT); | |
1733 } | |
1734 | |
1735 return true; | |
1736 } | |
1737 | |
1738 /* Attempt to version all loops in the versioning queue. */ | |
1739 | |
1740 void | |
1741 loop_versioning::implement_versioning_decisions () | |
1742 { | |
1743 /* No AUTO_DUMP_SCOPE here since all messages are top-level and | |
1744 user-facing at this point. */ | |
1745 | |
1746 bool any_succeeded_p = false; | |
1747 class loop *loop; | |
1748 unsigned int i; | |
1749 FOR_EACH_VEC_ELT (m_loops_to_version, i, loop) | |
1750 if (version_loop (loop)) | |
1751 any_succeeded_p = true; | |
1752 if (!any_succeeded_p) | |
1753 return; | |
1754 | |
1755 update_ssa (TODO_update_ssa); | |
1756 | |
1757 /* Simplify the new loop, which is used when COND is false. */ | |
1758 FOR_EACH_VEC_ELT (m_loops_to_version, i, loop) | |
1759 { | |
1760 loop_info &linfo = get_loop_info (loop); | |
1761 if (linfo.optimized_loop) | |
1762 name_prop (linfo).substitute_and_fold (linfo.optimized_loop->header); | |
1763 } | |
1764 } | |
1765 | |
1766 /* Run the pass and return a set of TODO_* flags. */ | |
1767 | |
1768 unsigned int | |
1769 loop_versioning::run () | |
1770 { | |
1771 gcc_assert (scev_initialized_p ()); | |
1772 | |
1773 if (analyze_blocks () | |
1774 && prune_conditions () | |
1775 && make_versioning_decisions ()) | |
1776 implement_versioning_decisions (); | |
1777 | |
1778 return 0; | |
1779 } | |
1780 | |
1781 /* Loop versioning pass. */ | |
1782 | |
1783 const pass_data pass_data_loop_versioning = | |
1784 { | |
1785 GIMPLE_PASS, /* type */ | |
1786 "lversion", /* name */ | |
1787 OPTGROUP_LOOP, /* optinfo_flags */ | |
1788 TV_LOOP_VERSIONING, /* tv_id */ | |
1789 PROP_cfg, /* properties_required */ | |
1790 0, /* properties_provided */ | |
1791 0, /* properties_destroyed */ | |
1792 0, /* todo_flags_start */ | |
1793 0, /* todo_flags_finish */ | |
1794 }; | |
1795 | |
1796 class pass_loop_versioning : public gimple_opt_pass | |
1797 { | |
1798 public: | |
1799 pass_loop_versioning (gcc::context *ctxt) | |
1800 : gimple_opt_pass (pass_data_loop_versioning, ctxt) | |
1801 {} | |
1802 | |
1803 /* opt_pass methods: */ | |
1804 virtual bool gate (function *) { return flag_version_loops_for_strides; } | |
1805 virtual unsigned int execute (function *); | |
1806 }; | |
1807 | |
1808 unsigned int | |
1809 pass_loop_versioning::execute (function *fn) | |
1810 { | |
1811 if (number_of_loops (fn) <= 1) | |
1812 return 0; | |
1813 | |
1814 return loop_versioning (fn).run (); | |
1815 } | |
1816 | |
1817 } // anon namespace | |
1818 | |
1819 gimple_opt_pass * | |
1820 make_pass_loop_versioning (gcc::context *ctxt) | |
1821 { | |
1822 return new pass_loop_versioning (ctxt); | |
1823 } |