131
|
1 /* Loop interchange.
|
|
2 Copyright (C) 2017-2018 Free Software Foundation, Inc.
|
|
3 Contributed by ARM Ltd.
|
|
4
|
|
5 This file is part of GCC.
|
|
6
|
|
7 GCC is free software; you can redistribute it and/or modify it
|
|
8 under the terms of the GNU General Public License as published by the
|
|
9 Free Software Foundation; either version 3, or (at your option) any
|
|
10 later version.
|
|
11
|
|
12 GCC is distributed in the hope that it will be useful, but WITHOUT
|
|
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
15 for more details.
|
|
16
|
|
17 You should have received a copy of the GNU General Public License
|
|
18 along with GCC; see the file COPYING3. If not see
|
|
19 <http://www.gnu.org/licenses/>. */
|
|
20
|
|
21 #include "config.h"
|
|
22 #include "system.h"
|
|
23 #include "coretypes.h"
|
|
24 #include "backend.h"
|
|
25 #include "is-a.h"
|
|
26 #include "tree.h"
|
|
27 #include "gimple.h"
|
|
28 #include "tree-pass.h"
|
|
29 #include "ssa.h"
|
|
30 #include "gimple-pretty-print.h"
|
|
31 #include "fold-const.h"
|
|
32 #include "gimplify.h"
|
|
33 #include "gimple-iterator.h"
|
|
34 #include "gimplify-me.h"
|
|
35 #include "cfgloop.h"
|
|
36 #include "params.h"
|
|
37 #include "tree-ssa.h"
|
|
38 #include "tree-scalar-evolution.h"
|
|
39 #include "tree-ssa-loop-manip.h"
|
|
40 #include "tree-ssa-loop-niter.h"
|
|
41 #include "tree-ssa-loop-ivopts.h"
|
|
42 #include "tree-ssa-dce.h"
|
|
43 #include "tree-data-ref.h"
|
|
44 #include "tree-vectorizer.h"
|
|
45
|
|
46 /* This pass performs loop interchange: for example, the loop nest
|
|
47
|
|
48 for (int j = 0; j < N; j++)
|
|
49 for (int k = 0; k < N; k++)
|
|
50 for (int i = 0; i < N; i++)
|
|
51 c[i][j] = c[i][j] + a[i][k]*b[k][j];
|
|
52
|
|
53 is transformed to
|
|
54
|
|
55 for (int i = 0; i < N; i++)
|
|
56 for (int j = 0; j < N; j++)
|
|
57 for (int k = 0; k < N; k++)
|
|
58 c[i][j] = c[i][j] + a[i][k]*b[k][j];
|
|
59
|
|
60 This pass implements loop interchange in the following steps:
|
|
61
|
|
62 1) Find perfect loop nest for each innermost loop and compute data
|
|
63 dependence relations for it. For above example, loop nest is
|
|
64 <loop_j, loop_k, loop_i>.
|
|
65 2) From innermost to outermost loop, this pass tries to interchange
|
|
66 each loop pair. For above case, it firstly tries to interchange
|
|
67 <loop_k, loop_i> and loop nest becomes <loop_j, loop_i, loop_k>.
|
|
68 Then it tries to interchange <loop_j, loop_i> and loop nest becomes
|
|
69 <loop_i, loop_j, loop_k>. The overall effect is to move innermost
|
|
70 loop to the outermost position. For loop pair <loop_i, loop_j>
|
|
71 to be interchanged, we:
|
|
72 3) Check if data dependence relations are valid for loop interchange.
|
|
73 4) Check if both loops can be interchanged in terms of transformation.
|
|
74 5) Check if interchanging the two loops is profitable.
|
|
75 6) Interchange the two loops by mapping induction variables.
|
|
76
|
|
77 This pass also handles reductions in loop nest. So far we only support
|
|
78 simple reduction of inner loop and double reduction of the loop nest. */
|
|
79
|
|
80 /* Maximum number of stmts in each loop that should be interchanged. */
|
|
81 #define MAX_NUM_STMT (PARAM_VALUE (PARAM_LOOP_INTERCHANGE_MAX_NUM_STMTS))
|
|
82 /* Maximum number of data references in loop nest. */
|
|
83 #define MAX_DATAREFS (PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
|
|
84
|
|
85 /* Comparison ratio of access stride between inner/outer loops to be
|
|
86 interchanged. This is the minimum stride ratio for loop interchange
|
|
87 to be profitable. */
|
|
88 #define OUTER_STRIDE_RATIO (PARAM_VALUE (PARAM_LOOP_INTERCHANGE_STRIDE_RATIO))
|
|
89 /* The same as above, but we require higher ratio for interchanging the
|
|
90 innermost two loops. */
|
|
91 #define INNER_STRIDE_RATIO ((OUTER_STRIDE_RATIO) + 1)
|
|
92
|
|
93 /* Comparison ratio of stmt cost between inner/outer loops. Loops won't
|
|
94 be interchanged if outer loop has too many stmts. */
|
|
95 #define STMT_COST_RATIO (3)
|
|
96
|
|
97 /* Vector of strides that DR accesses in each level loop of a loop nest. */
|
|
98 #define DR_ACCESS_STRIDE(dr) ((vec<tree> *) dr->aux)
|
|
99
|
|
100 /* Structure recording loop induction variable. */
|
|
101 typedef struct induction
|
|
102 {
|
|
103 /* IV itself. */
|
|
104 tree var;
|
|
105 /* IV's initializing value, which is the init arg of the IV PHI node. */
|
|
106 tree init_val;
|
|
107 /* IV's initializing expr, which is (the expanded result of) init_val. */
|
|
108 tree init_expr;
|
|
109 /* IV's step. */
|
|
110 tree step;
|
|
111 } *induction_p;
|
|
112
|
|
113 /* Enum type for loop reduction variable. */
|
|
114 enum reduction_type
|
|
115 {
|
|
116 UNKNOWN_RTYPE = 0,
|
|
117 SIMPLE_RTYPE,
|
|
118 DOUBLE_RTYPE
|
|
119 };
|
|
120
|
|
121 /* Structure recording loop reduction variable. */
|
|
122 typedef struct reduction
|
|
123 {
|
|
124 /* Reduction itself. */
|
|
125 tree var;
|
|
126 /* PHI node defining reduction variable. */
|
|
127 gphi *phi;
|
|
128 /* Init and next variables of the reduction. */
|
|
129 tree init;
|
|
130 tree next;
|
|
131 /* Lcssa PHI node if reduction is used outside of its definition loop. */
|
|
132 gphi *lcssa_phi;
|
|
133 /* Stmts defining init and next. */
|
|
134 gimple *producer;
|
|
135 gimple *consumer;
|
|
136 /* If init is loaded from memory, this is the loading memory reference. */
|
|
137 tree init_ref;
|
|
138 /* If reduction is finally stored to memory, this is the stored memory
|
|
139 reference. */
|
|
140 tree fini_ref;
|
|
141 enum reduction_type type;
|
|
142 } *reduction_p;
|
|
143
|
|
144
|
|
145 /* Dump reduction RE. */
|
|
146
|
|
147 static void
|
|
148 dump_reduction (reduction_p re)
|
|
149 {
|
|
150 if (re->type == SIMPLE_RTYPE)
|
|
151 fprintf (dump_file, " Simple reduction: ");
|
|
152 else if (re->type == DOUBLE_RTYPE)
|
|
153 fprintf (dump_file, " Double reduction: ");
|
|
154 else
|
|
155 fprintf (dump_file, " Unknown reduction: ");
|
|
156
|
|
157 print_gimple_stmt (dump_file, re->phi, 0);
|
|
158 }
|
|
159
|
|
160 /* Dump LOOP's induction IV. */
|
|
161 static void
|
|
162 dump_induction (struct loop *loop, induction_p iv)
|
|
163 {
|
|
164 fprintf (dump_file, " Induction: ");
|
|
165 print_generic_expr (dump_file, iv->var, TDF_SLIM);
|
|
166 fprintf (dump_file, " = {");
|
|
167 print_generic_expr (dump_file, iv->init_expr, TDF_SLIM);
|
|
168 fprintf (dump_file, ", ");
|
|
169 print_generic_expr (dump_file, iv->step, TDF_SLIM);
|
|
170 fprintf (dump_file, "}_%d\n", loop->num);
|
|
171 }
|
|
172
|
|
173 /* Loop candidate for interchange. */
|
|
174
|
|
175 struct loop_cand
|
|
176 {
|
|
177 loop_cand (struct loop *, struct loop *);
|
|
178 ~loop_cand ();
|
|
179
|
|
180 reduction_p find_reduction_by_stmt (gimple *);
|
|
181 void classify_simple_reduction (reduction_p);
|
|
182 bool analyze_iloop_reduction_var (tree);
|
|
183 bool analyze_oloop_reduction_var (loop_cand *, tree);
|
|
184 bool analyze_induction_var (tree, tree);
|
|
185 bool analyze_carried_vars (loop_cand *);
|
|
186 bool analyze_lcssa_phis (void);
|
|
187 bool can_interchange_p (loop_cand *);
|
|
188 void undo_simple_reduction (reduction_p, bitmap);
|
|
189
|
|
190 /* The loop itself. */
|
|
191 struct loop *m_loop;
|
|
192 /* The outer loop for interchange. It equals to loop if this loop cand
|
|
193 itself represents the outer loop. */
|
|
194 struct loop *m_outer;
|
|
195 /* Vector of induction variables in loop. */
|
|
196 vec<induction_p> m_inductions;
|
|
197 /* Vector of reduction variables in loop. */
|
|
198 vec<reduction_p> m_reductions;
|
|
199 /* Lcssa PHI nodes of this loop. */
|
|
200 vec<gphi *> m_lcssa_nodes;
|
|
201 /* Single exit edge of this loop. */
|
|
202 edge m_exit;
|
|
203 /* Basic blocks of this loop. */
|
|
204 basic_block *m_bbs;
|
|
205 /* Number of stmts of this loop. Inner loops' stmts are not included. */
|
|
206 int m_num_stmts;
|
|
207 /* Number of constant initialized simple reduction. */
|
|
208 int m_const_init_reduc;
|
|
209 };
|
|
210
|
|
211 /* Constructor. */
|
|
212
|
|
213 loop_cand::loop_cand (struct loop *loop, struct loop *outer)
|
|
214 : m_loop (loop), m_outer (outer), m_exit (single_exit (loop)),
|
|
215 m_bbs (get_loop_body (loop)), m_num_stmts (0), m_const_init_reduc (0)
|
|
216 {
|
|
217 m_inductions.create (3);
|
|
218 m_reductions.create (3);
|
|
219 m_lcssa_nodes.create (3);
|
|
220 }
|
|
221
|
|
222 /* Destructor. */
|
|
223
|
|
224 loop_cand::~loop_cand ()
|
|
225 {
|
|
226 induction_p iv;
|
|
227 for (unsigned i = 0; m_inductions.iterate (i, &iv); ++i)
|
|
228 free (iv);
|
|
229
|
|
230 reduction_p re;
|
|
231 for (unsigned i = 0; m_reductions.iterate (i, &re); ++i)
|
|
232 free (re);
|
|
233
|
|
234 m_inductions.release ();
|
|
235 m_reductions.release ();
|
|
236 m_lcssa_nodes.release ();
|
|
237 free (m_bbs);
|
|
238 }
|
|
239
|
|
240 /* Return single use stmt of VAR in LOOP, otherwise return NULL. */
|
|
241
|
|
242 static gimple *
|
|
243 single_use_in_loop (tree var, struct loop *loop)
|
|
244 {
|
|
245 gimple *stmt, *res = NULL;
|
|
246 use_operand_p use_p;
|
|
247 imm_use_iterator iterator;
|
|
248
|
|
249 FOR_EACH_IMM_USE_FAST (use_p, iterator, var)
|
|
250 {
|
|
251 stmt = USE_STMT (use_p);
|
|
252 if (is_gimple_debug (stmt))
|
|
253 continue;
|
|
254
|
|
255 if (!flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
|
|
256 continue;
|
|
257
|
|
258 if (res)
|
|
259 return NULL;
|
|
260
|
|
261 res = stmt;
|
|
262 }
|
|
263 return res;
|
|
264 }
|
|
265
|
|
266 /* Return true if E is unsupported in loop interchange, i.e, E is a complex
|
|
267 edge or part of irreducible loop. */
|
|
268
|
|
269 static inline bool
|
|
270 unsupported_edge (edge e)
|
|
271 {
|
|
272 return (e->flags & (EDGE_COMPLEX | EDGE_IRREDUCIBLE_LOOP));
|
|
273 }
|
|
274
|
|
275 /* Return the reduction if STMT is one of its lcssa PHI, producer or consumer
|
|
276 stmt. */
|
|
277
|
|
278 reduction_p
|
|
279 loop_cand::find_reduction_by_stmt (gimple *stmt)
|
|
280 {
|
|
281 gphi *phi = dyn_cast <gphi *> (stmt);
|
|
282 reduction_p re;
|
|
283
|
|
284 for (unsigned i = 0; m_reductions.iterate (i, &re); ++i)
|
|
285 if ((phi != NULL && phi == re->lcssa_phi)
|
|
286 || (stmt == re->producer || stmt == re->consumer))
|
|
287 return re;
|
|
288
|
|
289 return NULL;
|
|
290 }
|
|
291
|
|
292 /* Return true if current loop_cand be interchanged. ILOOP is not NULL if
|
|
293 current loop_cand is outer loop in loop nest. */
|
|
294
|
|
295 bool
|
|
296 loop_cand::can_interchange_p (loop_cand *iloop)
|
|
297 {
|
|
298 /* For now we only support at most one reduction. */
|
|
299 unsigned allowed_reduction_num = 1;
|
|
300
|
|
301 /* Only support reduction if the loop nest to be interchanged is the
|
|
302 innermostin two loops. */
|
|
303 if ((iloop == NULL && m_loop->inner != NULL)
|
|
304 || (iloop != NULL && iloop->m_loop->inner != NULL))
|
|
305 allowed_reduction_num = 0;
|
|
306
|
|
307 if (m_reductions.length () > allowed_reduction_num
|
|
308 || (m_reductions.length () == 1
|
|
309 && m_reductions[0]->type == UNKNOWN_RTYPE))
|
|
310 return false;
|
|
311
|
|
312 /* Only support lcssa PHI node which is for reduction. */
|
|
313 if (m_lcssa_nodes.length () > allowed_reduction_num)
|
|
314 return false;
|
|
315
|
|
316 /* Check if basic block has any unsupported operation. Note basic blocks
|
|
317 of inner loops are not checked here. */
|
|
318 for (unsigned i = 0; i < m_loop->num_nodes; i++)
|
|
319 {
|
|
320 basic_block bb = m_bbs[i];
|
|
321 gphi_iterator psi;
|
|
322 gimple_stmt_iterator gsi;
|
|
323
|
|
324 /* Skip basic blocks of inner loops. */
|
|
325 if (bb->loop_father != m_loop)
|
|
326 continue;
|
|
327
|
|
328 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
329 {
|
|
330 gimple *stmt = gsi_stmt (gsi);
|
|
331 if (is_gimple_debug (stmt))
|
|
332 continue;
|
|
333
|
|
334 if (gimple_has_side_effects (stmt))
|
|
335 return false;
|
|
336
|
|
337 m_num_stmts++;
|
|
338 if (gcall *call = dyn_cast <gcall *> (stmt))
|
|
339 {
|
|
340 /* In basic block of outer loop, the call should be cheap since
|
|
341 it will be moved to inner loop. */
|
|
342 if (iloop != NULL
|
|
343 && !gimple_inexpensive_call_p (call))
|
|
344 return false;
|
|
345 continue;
|
|
346 }
|
|
347
|
|
348 if (!iloop || !gimple_vuse (stmt))
|
|
349 continue;
|
|
350
|
|
351 /* Support stmt accessing memory in outer loop only if it is for
|
|
352 inner loop's reduction. */
|
|
353 if (iloop->find_reduction_by_stmt (stmt))
|
|
354 continue;
|
|
355
|
|
356 tree lhs;
|
|
357 /* Support loop invariant memory reference if it's only used once by
|
|
358 inner loop. */
|
|
359 /* ??? How's this checking for invariantness? */
|
|
360 if (gimple_assign_single_p (stmt)
|
|
361 && (lhs = gimple_assign_lhs (stmt)) != NULL_TREE
|
|
362 && TREE_CODE (lhs) == SSA_NAME
|
|
363 && single_use_in_loop (lhs, iloop->m_loop))
|
|
364 continue;
|
|
365
|
|
366 return false;
|
|
367 }
|
|
368 /* Check if loop has too many stmts. */
|
|
369 if (m_num_stmts > MAX_NUM_STMT)
|
|
370 return false;
|
|
371
|
|
372 /* Allow PHI nodes in any basic block of inner loop, PHI nodes in outer
|
|
373 loop's header, or PHI nodes in dest bb of inner loop's exit edge. */
|
|
374 if (!iloop || bb == m_loop->header
|
|
375 || bb == iloop->m_exit->dest)
|
|
376 continue;
|
|
377
|
|
378 /* Don't allow any other PHI nodes. */
|
|
379 for (psi = gsi_start_phis (bb); !gsi_end_p (psi); gsi_next (&psi))
|
|
380 if (!virtual_operand_p (PHI_RESULT (psi.phi ())))
|
|
381 return false;
|
|
382 }
|
|
383
|
|
384 return true;
|
|
385 }
|
|
386
|
|
387 /* Programmers and optimizers (like loop store motion) may optimize code:
|
|
388
|
|
389 for (int i = 0; i < N; i++)
|
|
390 for (int j = 0; j < N; j++)
|
|
391 a[i] += b[j][i] * c[j][i];
|
|
392
|
|
393 into reduction:
|
|
394
|
|
395 for (int i = 0; i < N; i++)
|
|
396 {
|
|
397 // producer. Note sum can be intitialized to a constant.
|
|
398 int sum = a[i];
|
|
399 for (int j = 0; j < N; j++)
|
|
400 {
|
|
401 sum += b[j][i] * c[j][i];
|
|
402 }
|
|
403 // consumer.
|
|
404 a[i] = sum;
|
|
405 }
|
|
406
|
|
407 The result code can't be interchanged without undoing the optimization.
|
|
408 This function classifies this kind reduction and records information so
|
|
409 that we can undo the store motion during interchange. */
|
|
410
|
|
411 void
|
|
412 loop_cand::classify_simple_reduction (reduction_p re)
|
|
413 {
|
|
414 gimple *producer, *consumer;
|
|
415
|
|
416 /* Check init variable of reduction and how it is initialized. */
|
|
417 if (TREE_CODE (re->init) == SSA_NAME)
|
|
418 {
|
|
419 producer = SSA_NAME_DEF_STMT (re->init);
|
|
420 re->producer = producer;
|
|
421 basic_block bb = gimple_bb (producer);
|
|
422 if (!bb || bb->loop_father != m_outer)
|
|
423 return;
|
|
424
|
|
425 if (!gimple_assign_load_p (producer))
|
|
426 return;
|
|
427
|
|
428 re->init_ref = gimple_assign_rhs1 (producer);
|
|
429 }
|
|
430 else if (CONSTANT_CLASS_P (re->init))
|
|
431 m_const_init_reduc++;
|
|
432 else
|
|
433 return;
|
|
434
|
|
435 /* Check how reduction variable is used. */
|
|
436 consumer = single_use_in_loop (PHI_RESULT (re->lcssa_phi), m_outer);
|
|
437 if (!consumer
|
|
438 || !gimple_store_p (consumer))
|
|
439 return;
|
|
440
|
|
441 re->fini_ref = gimple_get_lhs (consumer);
|
|
442 re->consumer = consumer;
|
|
443
|
|
444 /* Simple reduction with constant initializer. */
|
|
445 if (!re->init_ref)
|
|
446 {
|
|
447 gcc_assert (CONSTANT_CLASS_P (re->init));
|
|
448 re->init_ref = unshare_expr (re->fini_ref);
|
|
449 }
|
|
450
|
|
451 /* Require memory references in producer and consumer are the same so
|
|
452 that we can undo reduction during interchange. */
|
|
453 if (re->init_ref && !operand_equal_p (re->init_ref, re->fini_ref, 0))
|
|
454 return;
|
|
455
|
|
456 re->type = SIMPLE_RTYPE;
|
|
457 }
|
|
458
|
|
459 /* Analyze reduction variable VAR for inner loop of the loop nest to be
|
|
460 interchanged. Return true if analysis succeeds. */
|
|
461
|
|
462 bool
|
|
463 loop_cand::analyze_iloop_reduction_var (tree var)
|
|
464 {
|
|
465 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
|
|
466 gphi *lcssa_phi = NULL, *use_phi;
|
|
467 tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
|
|
468 tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (m_loop));
|
|
469 reduction_p re;
|
|
470 gimple *stmt, *next_def, *single_use = NULL;
|
|
471 use_operand_p use_p;
|
|
472 imm_use_iterator iterator;
|
|
473
|
|
474 if (TREE_CODE (next) != SSA_NAME)
|
|
475 return false;
|
|
476
|
|
477 next_def = SSA_NAME_DEF_STMT (next);
|
|
478 basic_block bb = gimple_bb (next_def);
|
|
479 if (!bb || !flow_bb_inside_loop_p (m_loop, bb))
|
|
480 return false;
|
|
481
|
|
482 /* In restricted reduction, the var is (and must be) used in defining
|
|
483 the updated var. The process can be depicted as below:
|
|
484
|
|
485 var ;; = PHI<init, next>
|
|
486 |
|
|
487 |
|
|
488 v
|
|
489 +---------------------+
|
|
490 | reduction operators | <-- other operands
|
|
491 +---------------------+
|
|
492 |
|
|
493 |
|
|
494 v
|
|
495 next
|
|
496
|
|
497 In terms loop interchange, we don't change how NEXT is computed based
|
|
498 on VAR and OTHER OPERANDS. In case of double reduction in loop nest
|
|
499 to be interchanged, we don't changed it at all. In the case of simple
|
|
500 reduction in inner loop, we only make change how VAR/NEXT is loaded or
|
|
501 stored. With these conditions, we can relax restrictions on reduction
|
|
502 in a way that reduction operation is seen as black box. In general,
|
|
503 we can ignore reassociation of reduction operator; we can handle fake
|
|
504 reductions in which VAR is not even used to compute NEXT. */
|
|
505 if (! single_imm_use (var, &use_p, &single_use)
|
|
506 || ! flow_bb_inside_loop_p (m_loop, gimple_bb (single_use)))
|
|
507 return false;
|
|
508
|
|
509 /* Check the reduction operation. We require a left-associative operation.
|
|
510 For FP math we also need to be allowed to associate operations. */
|
|
511 if (gassign *ass = dyn_cast <gassign *> (single_use))
|
|
512 {
|
|
513 enum tree_code code = gimple_assign_rhs_code (ass);
|
|
514 if (! (associative_tree_code (code)
|
|
515 || (code == MINUS_EXPR
|
|
516 && use_p->use == gimple_assign_rhs1_ptr (ass)))
|
|
517 || (FLOAT_TYPE_P (TREE_TYPE (var))
|
|
518 && ! flag_associative_math))
|
|
519 return false;
|
|
520 }
|
|
521 else
|
|
522 return false;
|
|
523
|
|
524 /* Handle and verify a series of stmts feeding the reduction op. */
|
|
525 if (single_use != next_def
|
|
526 && !check_reduction_path (dump_user_location_t (), m_loop, phi, next,
|
|
527 gimple_assign_rhs_code (single_use)))
|
|
528 return false;
|
|
529
|
|
530 /* Only support cases in which INIT is used in inner loop. */
|
|
531 if (TREE_CODE (init) == SSA_NAME)
|
|
532 FOR_EACH_IMM_USE_FAST (use_p, iterator, init)
|
|
533 {
|
|
534 stmt = USE_STMT (use_p);
|
|
535 if (is_gimple_debug (stmt))
|
|
536 continue;
|
|
537
|
|
538 if (!flow_bb_inside_loop_p (m_loop, gimple_bb (stmt)))
|
|
539 return false;
|
|
540 }
|
|
541
|
|
542 FOR_EACH_IMM_USE_FAST (use_p, iterator, next)
|
|
543 {
|
|
544 stmt = USE_STMT (use_p);
|
|
545 if (is_gimple_debug (stmt))
|
|
546 continue;
|
|
547
|
|
548 /* Or else it's used in PHI itself. */
|
|
549 use_phi = dyn_cast <gphi *> (stmt);
|
|
550 if (use_phi == phi)
|
|
551 continue;
|
|
552
|
|
553 if (use_phi != NULL
|
|
554 && lcssa_phi == NULL
|
|
555 && gimple_bb (stmt) == m_exit->dest
|
|
556 && PHI_ARG_DEF_FROM_EDGE (use_phi, m_exit) == next)
|
|
557 lcssa_phi = use_phi;
|
|
558 else
|
|
559 return false;
|
|
560 }
|
|
561 if (!lcssa_phi)
|
|
562 return false;
|
|
563
|
|
564 re = XCNEW (struct reduction);
|
|
565 re->var = var;
|
|
566 re->init = init;
|
|
567 re->next = next;
|
|
568 re->phi = phi;
|
|
569 re->lcssa_phi = lcssa_phi;
|
|
570
|
|
571 classify_simple_reduction (re);
|
|
572
|
|
573 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
574 dump_reduction (re);
|
|
575
|
|
576 m_reductions.safe_push (re);
|
|
577 return true;
|
|
578 }
|
|
579
|
|
580 /* Analyze reduction variable VAR for outer loop of the loop nest to be
|
|
581 interchanged. ILOOP is not NULL and points to inner loop. For the
|
|
582 moment, we only support double reduction for outer loop, like:
|
|
583
|
|
584 for (int i = 0; i < n; i++)
|
|
585 {
|
|
586 int sum = 0;
|
|
587
|
|
588 for (int j = 0; j < n; j++) // outer loop
|
|
589 for (int k = 0; k < n; k++) // inner loop
|
|
590 sum += a[i][k]*b[k][j];
|
|
591
|
|
592 s[i] = sum;
|
|
593 }
|
|
594
|
|
595 Note the innermost two loops are the loop nest to be interchanged.
|
|
596 Return true if analysis succeeds. */
|
|
597
|
|
598 bool
|
|
599 loop_cand::analyze_oloop_reduction_var (loop_cand *iloop, tree var)
|
|
600 {
|
|
601 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
|
|
602 gphi *lcssa_phi = NULL, *use_phi;
|
|
603 tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
|
|
604 tree next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (m_loop));
|
|
605 reduction_p re;
|
|
606 gimple *stmt, *next_def;
|
|
607 use_operand_p use_p;
|
|
608 imm_use_iterator iterator;
|
|
609
|
|
610 if (TREE_CODE (next) != SSA_NAME)
|
|
611 return false;
|
|
612
|
|
613 next_def = SSA_NAME_DEF_STMT (next);
|
|
614 basic_block bb = gimple_bb (next_def);
|
|
615 if (!bb || !flow_bb_inside_loop_p (m_loop, bb))
|
|
616 return false;
|
|
617
|
|
618 /* Find inner loop's simple reduction that uses var as initializer. */
|
|
619 reduction_p inner_re = NULL;
|
|
620 for (unsigned i = 0; iloop->m_reductions.iterate (i, &inner_re); ++i)
|
|
621 if (inner_re->init == var || operand_equal_p (inner_re->init, var, 0))
|
|
622 break;
|
|
623
|
|
624 if (inner_re == NULL
|
|
625 || inner_re->type != UNKNOWN_RTYPE
|
|
626 || inner_re->producer != phi)
|
|
627 return false;
|
|
628
|
|
629 /* In case of double reduction, outer loop's reduction should be updated
|
|
630 by inner loop's simple reduction. */
|
|
631 if (next_def != inner_re->lcssa_phi)
|
|
632 return false;
|
|
633
|
|
634 /* Outer loop's reduction should only be used to initialize inner loop's
|
|
635 simple reduction. */
|
|
636 if (! single_imm_use (var, &use_p, &stmt)
|
|
637 || stmt != inner_re->phi)
|
|
638 return false;
|
|
639
|
|
640 /* Check this reduction is correctly used outside of loop via lcssa phi. */
|
|
641 FOR_EACH_IMM_USE_FAST (use_p, iterator, next)
|
|
642 {
|
|
643 stmt = USE_STMT (use_p);
|
|
644 if (is_gimple_debug (stmt))
|
|
645 continue;
|
|
646
|
|
647 /* Or else it's used in PHI itself. */
|
|
648 use_phi = dyn_cast <gphi *> (stmt);
|
|
649 if (use_phi == phi)
|
|
650 continue;
|
|
651
|
|
652 if (lcssa_phi == NULL
|
|
653 && use_phi != NULL
|
|
654 && gimple_bb (stmt) == m_exit->dest
|
|
655 && PHI_ARG_DEF_FROM_EDGE (use_phi, m_exit) == next)
|
|
656 lcssa_phi = use_phi;
|
|
657 else
|
|
658 return false;
|
|
659 }
|
|
660 if (!lcssa_phi)
|
|
661 return false;
|
|
662
|
|
663 re = XCNEW (struct reduction);
|
|
664 re->var = var;
|
|
665 re->init = init;
|
|
666 re->next = next;
|
|
667 re->phi = phi;
|
|
668 re->lcssa_phi = lcssa_phi;
|
|
669 re->type = DOUBLE_RTYPE;
|
|
670 inner_re->type = DOUBLE_RTYPE;
|
|
671
|
|
672 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
673 dump_reduction (re);
|
|
674
|
|
675 m_reductions.safe_push (re);
|
|
676 return true;
|
|
677 }
|
|
678
|
|
679 /* Return true if VAR is induction variable of current loop whose scev is
|
|
680 specified by CHREC. */
|
|
681
|
|
682 bool
|
|
683 loop_cand::analyze_induction_var (tree var, tree chrec)
|
|
684 {
|
|
685 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (var));
|
|
686 tree init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
|
|
687
|
|
688 /* Var is loop invariant, though it's unlikely to happen. */
|
|
689 if (tree_does_not_contain_chrecs (chrec))
|
|
690 {
|
|
691 struct induction *iv = XCNEW (struct induction);
|
|
692 iv->var = var;
|
|
693 iv->init_val = init;
|
|
694 iv->init_expr = chrec;
|
|
695 iv->step = build_int_cst (TREE_TYPE (chrec), 0);
|
|
696 m_inductions.safe_push (iv);
|
|
697 return true;
|
|
698 }
|
|
699
|
|
700 if (TREE_CODE (chrec) != POLYNOMIAL_CHREC
|
|
701 || CHREC_VARIABLE (chrec) != (unsigned) m_loop->num
|
|
702 || tree_contains_chrecs (CHREC_LEFT (chrec), NULL)
|
|
703 || tree_contains_chrecs (CHREC_RIGHT (chrec), NULL))
|
|
704 return false;
|
|
705
|
|
706 struct induction *iv = XCNEW (struct induction);
|
|
707 iv->var = var;
|
|
708 iv->init_val = init;
|
|
709 iv->init_expr = CHREC_LEFT (chrec);
|
|
710 iv->step = CHREC_RIGHT (chrec);
|
|
711
|
|
712 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
713 dump_induction (m_loop, iv);
|
|
714
|
|
715 m_inductions.safe_push (iv);
|
|
716 return true;
|
|
717 }
|
|
718
|
|
719 /* Return true if all loop carried variables defined in loop header can
|
|
720 be successfully analyzed. */
|
|
721
|
|
722 bool
|
|
723 loop_cand::analyze_carried_vars (loop_cand *iloop)
|
|
724 {
|
|
725 edge e = loop_preheader_edge (m_outer);
|
|
726 gphi_iterator gsi;
|
|
727
|
|
728 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
729 fprintf (dump_file, "\nLoop(%d) carried vars:\n", m_loop->num);
|
|
730
|
|
731 for (gsi = gsi_start_phis (m_loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
732 {
|
|
733 gphi *phi = gsi.phi ();
|
|
734
|
|
735 tree var = PHI_RESULT (phi);
|
|
736 if (virtual_operand_p (var))
|
|
737 continue;
|
|
738
|
|
739 tree chrec = analyze_scalar_evolution (m_loop, var);
|
|
740 chrec = instantiate_scev (e, m_loop, chrec);
|
|
741
|
|
742 /* Analyze var as reduction variable. */
|
|
743 if (chrec_contains_undetermined (chrec)
|
|
744 || chrec_contains_symbols_defined_in_loop (chrec, m_outer->num))
|
|
745 {
|
|
746 if (iloop && !analyze_oloop_reduction_var (iloop, var))
|
|
747 return false;
|
|
748 if (!iloop && !analyze_iloop_reduction_var (var))
|
|
749 return false;
|
|
750 }
|
|
751 /* Analyze var as induction variable. */
|
|
752 else if (!analyze_induction_var (var, chrec))
|
|
753 return false;
|
|
754 }
|
|
755
|
|
756 return true;
|
|
757 }
|
|
758
|
|
759 /* Return TRUE if loop closed PHI nodes can be analyzed successfully. */
|
|
760
|
|
761 bool
|
|
762 loop_cand::analyze_lcssa_phis (void)
|
|
763 {
|
|
764 gphi_iterator gsi;
|
|
765 for (gsi = gsi_start_phis (m_exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
766 {
|
|
767 gphi *phi = gsi.phi ();
|
|
768
|
|
769 if (virtual_operand_p (PHI_RESULT (phi)))
|
|
770 continue;
|
|
771
|
|
772 /* TODO: We only support lcssa phi for reduction for now. */
|
|
773 if (!find_reduction_by_stmt (phi))
|
|
774 return false;
|
|
775 }
|
|
776
|
|
777 return true;
|
|
778 }
|
|
779
|
|
780 /* CONSUMER is a stmt in BB storing reduction result into memory object.
|
|
781 When the reduction is intialized from constant value, we need to add
|
|
782 a stmt loading from the memory object to target basic block in inner
|
|
783 loop during undoing the reduction. Problem is that memory reference
|
|
784 may use ssa variables not dominating the target basic block. This
|
|
785 function finds all stmts on which CONSUMER depends in basic block BB,
|
|
786 records and returns them via STMTS. */
|
|
787
|
|
788 static void
|
|
789 find_deps_in_bb_for_stmt (gimple_seq *stmts, basic_block bb, gimple *consumer)
|
|
790 {
|
|
791 auto_vec<gimple *, 4> worklist;
|
|
792 use_operand_p use_p;
|
|
793 ssa_op_iter iter;
|
|
794 gimple *stmt, *def_stmt;
|
|
795 gimple_stmt_iterator gsi;
|
|
796
|
|
797 /* First clear flag for stmts in bb. */
|
|
798 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
799 gimple_set_plf (gsi_stmt (gsi), GF_PLF_1, false);
|
|
800
|
|
801 /* DFS search all depended stmts in bb and mark flag for these stmts. */
|
|
802 worklist.safe_push (consumer);
|
|
803 while (!worklist.is_empty ())
|
|
804 {
|
|
805 stmt = worklist.pop ();
|
|
806 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE)
|
|
807 {
|
|
808 def_stmt = SSA_NAME_DEF_STMT (USE_FROM_PTR (use_p));
|
|
809
|
|
810 if (is_a <gphi *> (def_stmt)
|
|
811 || gimple_bb (def_stmt) != bb
|
|
812 || gimple_plf (def_stmt, GF_PLF_1))
|
|
813 continue;
|
|
814
|
|
815 worklist.safe_push (def_stmt);
|
|
816 }
|
|
817 gimple_set_plf (stmt, GF_PLF_1, true);
|
|
818 }
|
|
819 for (gsi = gsi_start_nondebug_bb (bb);
|
|
820 !gsi_end_p (gsi) && (stmt = gsi_stmt (gsi)) != consumer;)
|
|
821 {
|
|
822 /* Move dep stmts to sequence STMTS. */
|
|
823 if (gimple_plf (stmt, GF_PLF_1))
|
|
824 {
|
|
825 gsi_remove (&gsi, false);
|
|
826 gimple_seq_add_stmt_without_update (stmts, stmt);
|
|
827 }
|
|
828 else
|
|
829 gsi_next_nondebug (&gsi);
|
|
830 }
|
|
831 }
|
|
832
|
|
833 /* User can write, optimizers can generate simple reduction RE for inner
|
|
834 loop. In order to make interchange valid, we have to undo reduction by
|
|
835 moving producer and consumer stmts into the inner loop. For example,
|
|
836 below code:
|
|
837
|
|
838 init = MEM_REF[idx]; //producer
|
|
839 loop:
|
|
840 var = phi<init, next>
|
|
841 next = var op ...
|
|
842 reduc_sum = phi<next>
|
|
843 MEM_REF[idx] = reduc_sum //consumer
|
|
844
|
|
845 is transformed into:
|
|
846
|
|
847 loop:
|
|
848 new_var = MEM_REF[idx]; //producer after moving
|
|
849 next = new_var op ...
|
|
850 MEM_REF[idx] = next; //consumer after moving
|
|
851
|
|
852 Note if the reduction variable is initialized to constant, like:
|
|
853
|
|
854 var = phi<0.0, next>
|
|
855
|
|
856 we compute new_var as below:
|
|
857
|
|
858 loop:
|
|
859 tmp = MEM_REF[idx];
|
|
860 new_var = !first_iteration ? tmp : 0.0;
|
|
861
|
|
862 so that the initial const is used in the first iteration of loop. Also
|
|
863 record ssa variables for dead code elimination in DCE_SEEDS. */
|
|
864
|
|
865 void
|
|
866 loop_cand::undo_simple_reduction (reduction_p re, bitmap dce_seeds)
|
|
867 {
|
|
868 gimple *stmt;
|
|
869 gimple_stmt_iterator from, to = gsi_after_labels (m_loop->header);
|
|
870 gimple_seq stmts = NULL;
|
|
871 tree new_var;
|
|
872
|
|
873 /* Prepare the initialization stmts and insert it to inner loop. */
|
|
874 if (re->producer != NULL)
|
|
875 {
|
|
876 gimple_set_vuse (re->producer, NULL_TREE);
|
|
877 from = gsi_for_stmt (re->producer);
|
|
878 gsi_remove (&from, false);
|
|
879 gimple_seq_add_stmt_without_update (&stmts, re->producer);
|
|
880 new_var = re->init;
|
|
881 }
|
|
882 else
|
|
883 {
|
|
884 /* Find all stmts on which expression "MEM_REF[idx]" depends. */
|
|
885 find_deps_in_bb_for_stmt (&stmts, gimple_bb (re->consumer), re->consumer);
|
|
886 /* Because we generate new stmt loading from the MEM_REF to TMP. */
|
|
887 tree cond, tmp = copy_ssa_name (re->var);
|
|
888 stmt = gimple_build_assign (tmp, re->init_ref);
|
|
889 gimple_seq_add_stmt_without_update (&stmts, stmt);
|
|
890
|
|
891 /* Init new_var to MEM_REF or CONST depending on if it is the first
|
|
892 iteration. */
|
|
893 induction_p iv = m_inductions[0];
|
|
894 cond = fold_build2 (NE_EXPR, boolean_type_node, iv->var, iv->init_val);
|
|
895 new_var = copy_ssa_name (re->var);
|
|
896 stmt = gimple_build_assign (new_var, COND_EXPR, cond, tmp, re->init);
|
|
897 gimple_seq_add_stmt_without_update (&stmts, stmt);
|
|
898 }
|
|
899 gsi_insert_seq_before (&to, stmts, GSI_SAME_STMT);
|
|
900
|
|
901 /* Replace all uses of reduction var with new variable. */
|
|
902 use_operand_p use_p;
|
|
903 imm_use_iterator iterator;
|
|
904 FOR_EACH_IMM_USE_STMT (stmt, iterator, re->var)
|
|
905 {
|
|
906 FOR_EACH_IMM_USE_ON_STMT (use_p, iterator)
|
|
907 SET_USE (use_p, new_var);
|
|
908
|
|
909 update_stmt (stmt);
|
|
910 }
|
|
911
|
|
912 /* Move consumer stmt into inner loop, just after reduction next's def. */
|
|
913 unlink_stmt_vdef (re->consumer);
|
|
914 release_ssa_name (gimple_vdef (re->consumer));
|
|
915 gimple_set_vdef (re->consumer, NULL_TREE);
|
|
916 gimple_set_vuse (re->consumer, NULL_TREE);
|
|
917 gimple_assign_set_rhs1 (re->consumer, re->next);
|
|
918 from = gsi_for_stmt (re->consumer);
|
|
919 to = gsi_for_stmt (SSA_NAME_DEF_STMT (re->next));
|
|
920 gsi_move_after (&from, &to);
|
|
921
|
|
922 /* Mark the reduction variables for DCE. */
|
|
923 bitmap_set_bit (dce_seeds, SSA_NAME_VERSION (re->var));
|
|
924 bitmap_set_bit (dce_seeds, SSA_NAME_VERSION (PHI_RESULT (re->lcssa_phi)));
|
|
925 }
|
|
926
|
|
927 /* Free DATAREFS and its auxiliary memory. */
|
|
928
|
|
929 static void
|
|
930 free_data_refs_with_aux (vec<data_reference_p> datarefs)
|
|
931 {
|
|
932 data_reference_p dr;
|
|
933 for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
|
|
934 if (dr->aux != NULL)
|
|
935 {
|
|
936 DR_ACCESS_STRIDE (dr)->release ();
|
|
937 delete (vec<tree> *) dr->aux;
|
|
938 }
|
|
939
|
|
940 free_data_refs (datarefs);
|
|
941 }
|
|
942
|
|
943 /* Class for loop interchange transformation. */
|
|
944
|
|
945 class tree_loop_interchange
|
|
946 {
|
|
947 public:
|
|
948 tree_loop_interchange (vec<struct loop *> loop_nest)
|
|
949 : m_loop_nest (loop_nest), m_niters_iv_var (NULL_TREE),
|
|
950 m_dce_seeds (BITMAP_ALLOC (NULL)) { }
|
|
951 ~tree_loop_interchange () { BITMAP_FREE (m_dce_seeds); }
|
|
952 bool interchange (vec<data_reference_p>, vec<ddr_p>);
|
|
953
|
|
954 private:
|
|
955 void update_data_info (unsigned, unsigned, vec<data_reference_p>, vec<ddr_p>);
|
|
956 bool valid_data_dependences (unsigned, unsigned, vec<ddr_p>);
|
|
957 void interchange_loops (loop_cand &, loop_cand &);
|
|
958 void map_inductions_to_loop (loop_cand &, loop_cand &);
|
|
959 void move_code_to_inner_loop (struct loop *, struct loop *, basic_block *);
|
|
960
|
|
961 /* The whole loop nest in which interchange is ongoing. */
|
|
962 vec<struct loop *> m_loop_nest;
|
|
963 /* We create new IV which is only used in loop's exit condition check.
|
|
964 In case of 3-level loop nest interchange, when we interchange the
|
|
965 innermost two loops, new IV created in the middle level loop does
|
|
966 not need to be preserved in interchanging the outermost two loops
|
|
967 later. We record the IV so that it can be skipped. */
|
|
968 tree m_niters_iv_var;
|
|
969 /* Bitmap of seed variables for dead code elimination after interchange. */
|
|
970 bitmap m_dce_seeds;
|
|
971 };
|
|
972
|
|
973 /* Update data refs' access stride and dependence information after loop
|
|
974 interchange. I_IDX/O_IDX gives indices of interchanged loops in loop
|
|
975 nest. DATAREFS are data references. DDRS are data dependences. */
|
|
976
|
|
977 void
|
|
978 tree_loop_interchange::update_data_info (unsigned i_idx, unsigned o_idx,
|
|
979 vec<data_reference_p> datarefs,
|
|
980 vec<ddr_p> ddrs)
|
|
981 {
|
|
982 struct data_reference *dr;
|
|
983 struct data_dependence_relation *ddr;
|
|
984
|
|
985 /* Update strides of data references. */
|
|
986 for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
|
|
987 {
|
|
988 vec<tree> *stride = DR_ACCESS_STRIDE (dr);
|
|
989 gcc_assert (stride->length () > i_idx);
|
|
990 std::swap ((*stride)[i_idx], (*stride)[o_idx]);
|
|
991 }
|
|
992 /* Update data dependences. */
|
|
993 for (unsigned i = 0; ddrs.iterate (i, &ddr); ++i)
|
|
994 if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
|
|
995 {
|
|
996 for (unsigned j = 0; j < DDR_NUM_DIST_VECTS (ddr); ++j)
|
|
997 {
|
|
998 lambda_vector dist_vect = DDR_DIST_VECT (ddr, j);
|
|
999 std::swap (dist_vect[i_idx], dist_vect[o_idx]);
|
|
1000 }
|
|
1001 }
|
|
1002 }
|
|
1003
|
|
1004 /* Check data dependence relations, return TRUE if it's valid to interchange
|
|
1005 two loops specified by I_IDX/O_IDX. Theoretically, interchanging the two
|
|
1006 loops is valid only if dist vector, after interchanging, doesn't have '>'
|
|
1007 as the leftmost non-'=' direction. Practically, this function have been
|
|
1008 conservative here by not checking some valid cases. */
|
|
1009
|
|
1010 bool
|
|
1011 tree_loop_interchange::valid_data_dependences (unsigned i_idx, unsigned o_idx,
|
|
1012 vec<ddr_p> ddrs)
|
|
1013 {
|
|
1014 struct data_dependence_relation *ddr;
|
|
1015
|
|
1016 for (unsigned i = 0; ddrs.iterate (i, &ddr); ++i)
|
|
1017 {
|
|
1018 /* Skip no-dependence case. */
|
|
1019 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
|
|
1020 continue;
|
|
1021
|
|
1022 for (unsigned j = 0; j < DDR_NUM_DIST_VECTS (ddr); ++j)
|
|
1023 {
|
|
1024 lambda_vector dist_vect = DDR_DIST_VECT (ddr, j);
|
|
1025 unsigned level = dependence_level (dist_vect, m_loop_nest.length ());
|
|
1026
|
|
1027 /* If there is no carried dependence. */
|
|
1028 if (level == 0)
|
|
1029 continue;
|
|
1030
|
|
1031 level --;
|
|
1032
|
|
1033 /* If dependence is not carried by any loop in between the two
|
|
1034 loops [oloop, iloop] to interchange. */
|
|
1035 if (level < o_idx || level > i_idx)
|
|
1036 continue;
|
|
1037
|
|
1038 /* Be conservative, skip case if either direction at i_idx/o_idx
|
|
1039 levels is not '=' or '<'. */
|
|
1040 if (dist_vect[i_idx] < 0 || dist_vect[o_idx] < 0)
|
|
1041 return false;
|
|
1042 }
|
|
1043 }
|
|
1044
|
|
1045 return true;
|
|
1046 }
|
|
1047
|
|
1048 /* Interchange two loops specified by ILOOP and OLOOP. */
|
|
1049
|
|
1050 void
|
|
1051 tree_loop_interchange::interchange_loops (loop_cand &iloop, loop_cand &oloop)
|
|
1052 {
|
|
1053 reduction_p re;
|
|
1054 gimple_stmt_iterator gsi;
|
|
1055 tree i_niters, o_niters, var_after;
|
|
1056
|
|
1057 /* Undo inner loop's simple reduction. */
|
|
1058 for (unsigned i = 0; iloop.m_reductions.iterate (i, &re); ++i)
|
|
1059 if (re->type != DOUBLE_RTYPE)
|
|
1060 {
|
|
1061 if (re->producer)
|
|
1062 reset_debug_uses (re->producer);
|
|
1063
|
|
1064 iloop.undo_simple_reduction (re, m_dce_seeds);
|
|
1065 }
|
|
1066
|
|
1067 /* Only need to reset debug uses for double reduction. */
|
|
1068 for (unsigned i = 0; oloop.m_reductions.iterate (i, &re); ++i)
|
|
1069 {
|
|
1070 gcc_assert (re->type == DOUBLE_RTYPE);
|
|
1071 reset_debug_uses (SSA_NAME_DEF_STMT (re->var));
|
|
1072 reset_debug_uses (SSA_NAME_DEF_STMT (re->next));
|
|
1073 }
|
|
1074
|
|
1075 /* Prepare niters for both loops. */
|
|
1076 struct loop *loop_nest = m_loop_nest[0];
|
|
1077 edge instantiate_below = loop_preheader_edge (loop_nest);
|
|
1078 gsi = gsi_last_bb (loop_preheader_edge (loop_nest)->src);
|
|
1079 i_niters = number_of_latch_executions (iloop.m_loop);
|
|
1080 i_niters = analyze_scalar_evolution (loop_outer (iloop.m_loop), i_niters);
|
|
1081 i_niters = instantiate_scev (instantiate_below, loop_outer (iloop.m_loop),
|
|
1082 i_niters);
|
|
1083 i_niters = force_gimple_operand_gsi (&gsi, unshare_expr (i_niters), true,
|
|
1084 NULL_TREE, false, GSI_CONTINUE_LINKING);
|
|
1085 o_niters = number_of_latch_executions (oloop.m_loop);
|
|
1086 if (oloop.m_loop != loop_nest)
|
|
1087 {
|
|
1088 o_niters = analyze_scalar_evolution (loop_outer (oloop.m_loop), o_niters);
|
|
1089 o_niters = instantiate_scev (instantiate_below, loop_outer (oloop.m_loop),
|
|
1090 o_niters);
|
|
1091 }
|
|
1092 o_niters = force_gimple_operand_gsi (&gsi, unshare_expr (o_niters), true,
|
|
1093 NULL_TREE, false, GSI_CONTINUE_LINKING);
|
|
1094
|
|
1095 /* Move src's code to tgt loop. This is necessary when src is the outer
|
|
1096 loop and tgt is the inner loop. */
|
|
1097 move_code_to_inner_loop (oloop.m_loop, iloop.m_loop, oloop.m_bbs);
|
|
1098
|
|
1099 /* Map outer loop's IV to inner loop, and vice versa. */
|
|
1100 map_inductions_to_loop (oloop, iloop);
|
|
1101 map_inductions_to_loop (iloop, oloop);
|
|
1102
|
|
1103 /* Create canonical IV for both loops. Note canonical IV for outer/inner
|
|
1104 loop is actually from inner/outer loop. Also we record the new IV
|
|
1105 created for the outer loop so that it can be skipped in later loop
|
|
1106 interchange. */
|
|
1107 create_canonical_iv (oloop.m_loop, oloop.m_exit,
|
|
1108 i_niters, &m_niters_iv_var, &var_after);
|
|
1109 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
|
|
1110 create_canonical_iv (iloop.m_loop, iloop.m_exit,
|
|
1111 o_niters, NULL, &var_after);
|
|
1112 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
|
|
1113
|
|
1114 /* Scrap niters estimation of interchanged loops. */
|
|
1115 iloop.m_loop->any_upper_bound = false;
|
|
1116 iloop.m_loop->any_likely_upper_bound = false;
|
|
1117 free_numbers_of_iterations_estimates (iloop.m_loop);
|
|
1118 oloop.m_loop->any_upper_bound = false;
|
|
1119 oloop.m_loop->any_likely_upper_bound = false;
|
|
1120 free_numbers_of_iterations_estimates (oloop.m_loop);
|
|
1121
|
|
1122 /* Clear all cached scev information. This is expensive but shouldn't be
|
|
1123 a problem given we interchange in very limited times. */
|
|
1124 scev_reset_htab ();
|
|
1125
|
|
1126 /* ??? The association between the loop data structure and the
|
|
1127 CFG changed, so what was loop N at the source level is now
|
|
1128 loop M. We should think of retaining the association or breaking
|
|
1129 it fully by creating a new loop instead of re-using the "wrong" one. */
|
|
1130 }
|
|
1131
|
|
1132 /* Map induction variables of SRC loop to TGT loop. The function firstly
|
|
1133 creates the same IV of SRC loop in TGT loop, then deletes the original
|
|
1134 IV and re-initialize it using the newly created IV. For example, loop
|
|
1135 nest:
|
|
1136
|
|
1137 for (i = 0; i < N; i++)
|
|
1138 for (j = 0; j < M; j++)
|
|
1139 {
|
|
1140 //use of i;
|
|
1141 //use of j;
|
|
1142 }
|
|
1143
|
|
1144 will be transformed into:
|
|
1145
|
|
1146 for (jj = 0; jj < M; jj++)
|
|
1147 for (ii = 0; ii < N; ii++)
|
|
1148 {
|
|
1149 //use of ii;
|
|
1150 //use of jj;
|
|
1151 }
|
|
1152
|
|
1153 after loop interchange. */
|
|
1154
|
|
1155 void
|
|
1156 tree_loop_interchange::map_inductions_to_loop (loop_cand &src, loop_cand &tgt)
|
|
1157 {
|
|
1158 induction_p iv;
|
|
1159 edge e = tgt.m_exit;
|
|
1160 gimple_stmt_iterator incr_pos = gsi_last_bb (e->src), gsi;
|
|
1161
|
|
1162 /* Map source loop's IV to target loop. */
|
|
1163 for (unsigned i = 0; src.m_inductions.iterate (i, &iv); ++i)
|
|
1164 {
|
|
1165 gimple *use_stmt, *stmt = SSA_NAME_DEF_STMT (iv->var);
|
|
1166 gcc_assert (is_a <gphi *> (stmt));
|
|
1167
|
|
1168 use_operand_p use_p;
|
|
1169 /* Only map original IV to target loop. */
|
|
1170 if (m_niters_iv_var != iv->var)
|
|
1171 {
|
|
1172 /* Map the IV by creating the same one in target loop. */
|
|
1173 tree var_before, var_after;
|
|
1174 tree base = unshare_expr (iv->init_expr);
|
|
1175 tree step = unshare_expr (iv->step);
|
|
1176 create_iv (base, step, SSA_NAME_VAR (iv->var),
|
|
1177 tgt.m_loop, &incr_pos, false, &var_before, &var_after);
|
|
1178 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_before));
|
|
1179 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (var_after));
|
|
1180
|
|
1181 /* Replace uses of the original IV var with newly created IV var. */
|
|
1182 imm_use_iterator imm_iter;
|
|
1183 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, iv->var)
|
|
1184 {
|
|
1185 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
|
|
1186 SET_USE (use_p, var_before);
|
|
1187
|
|
1188 update_stmt (use_stmt);
|
|
1189 }
|
|
1190 }
|
|
1191
|
|
1192 /* Mark all uses for DCE. */
|
|
1193 ssa_op_iter op_iter;
|
|
1194 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, op_iter, SSA_OP_USE)
|
|
1195 {
|
|
1196 tree use = USE_FROM_PTR (use_p);
|
|
1197 if (TREE_CODE (use) == SSA_NAME
|
|
1198 && ! SSA_NAME_IS_DEFAULT_DEF (use))
|
|
1199 bitmap_set_bit (m_dce_seeds, SSA_NAME_VERSION (use));
|
|
1200 }
|
|
1201
|
|
1202 /* Delete definition of the original IV in the source loop. */
|
|
1203 gsi = gsi_for_stmt (stmt);
|
|
1204 remove_phi_node (&gsi, true);
|
|
1205 }
|
|
1206 }
|
|
1207
|
|
1208 /* Move stmts of outer loop to inner loop. */
|
|
1209
|
|
1210 void
|
|
1211 tree_loop_interchange::move_code_to_inner_loop (struct loop *outer,
|
|
1212 struct loop *inner,
|
|
1213 basic_block *outer_bbs)
|
|
1214 {
|
|
1215 basic_block oloop_exit_bb = single_exit (outer)->src;
|
|
1216 gimple_stmt_iterator gsi, to;
|
|
1217
|
|
1218 for (unsigned i = 0; i < outer->num_nodes; i++)
|
|
1219 {
|
|
1220 basic_block bb = outer_bbs[i];
|
|
1221
|
|
1222 /* Skip basic blocks of inner loop. */
|
|
1223 if (flow_bb_inside_loop_p (inner, bb))
|
|
1224 continue;
|
|
1225
|
|
1226 /* Move code from header/latch to header/latch. */
|
|
1227 if (bb == outer->header)
|
|
1228 to = gsi_after_labels (inner->header);
|
|
1229 else if (bb == outer->latch)
|
|
1230 to = gsi_after_labels (inner->latch);
|
|
1231 else
|
|
1232 /* Otherwise, simply move to exit->src. */
|
|
1233 to = gsi_last_bb (single_exit (inner)->src);
|
|
1234
|
|
1235 for (gsi = gsi_after_labels (bb); !gsi_end_p (gsi);)
|
|
1236 {
|
|
1237 gimple *stmt = gsi_stmt (gsi);
|
|
1238
|
|
1239 if (oloop_exit_bb == bb
|
|
1240 && stmt == gsi_stmt (gsi_last_bb (oloop_exit_bb)))
|
|
1241 {
|
|
1242 gsi_next (&gsi);
|
|
1243 continue;
|
|
1244 }
|
|
1245
|
|
1246 if (gimple_vuse (stmt))
|
|
1247 gimple_set_vuse (stmt, NULL_TREE);
|
|
1248 if (gimple_vdef (stmt))
|
|
1249 {
|
|
1250 unlink_stmt_vdef (stmt);
|
|
1251 release_ssa_name (gimple_vdef (stmt));
|
|
1252 gimple_set_vdef (stmt, NULL_TREE);
|
|
1253 }
|
|
1254
|
|
1255 reset_debug_uses (stmt);
|
|
1256 gsi_move_before (&gsi, &to);
|
|
1257 }
|
|
1258 }
|
|
1259 }
|
|
1260
|
|
1261 /* Given data reference DR in LOOP_NEST, the function computes DR's access
|
|
1262 stride at each level of loop from innermost LOOP to outer. On success,
|
|
1263 it saves access stride at each level loop in a vector which is pointed
|
|
1264 by DR->aux. For example:
|
|
1265
|
|
1266 int arr[100][100][100];
|
|
1267 for (i = 0; i < 100; i++) ;(DR->aux)strides[0] = 40000
|
|
1268 for (j = 100; j > 0; j--) ;(DR->aux)strides[1] = 400
|
|
1269 for (k = 0; k < 100; k++) ;(DR->aux)strides[2] = 4
|
|
1270 arr[i][j - 1][k] = 0; */
|
|
1271
|
|
1272 static void
|
|
1273 compute_access_stride (struct loop *loop_nest, struct loop *loop,
|
|
1274 data_reference_p dr)
|
|
1275 {
|
|
1276 vec<tree> *strides = new vec<tree> ();
|
|
1277 basic_block bb = gimple_bb (DR_STMT (dr));
|
|
1278
|
|
1279 while (!flow_bb_inside_loop_p (loop, bb))
|
|
1280 {
|
|
1281 strides->safe_push (build_int_cst (sizetype, 0));
|
|
1282 loop = loop_outer (loop);
|
|
1283 }
|
|
1284 gcc_assert (loop == bb->loop_father);
|
|
1285
|
|
1286 tree ref = DR_REF (dr);
|
|
1287 if (TREE_CODE (ref) == COMPONENT_REF
|
|
1288 && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
|
|
1289 {
|
|
1290 /* We can't take address of bitfields. If the bitfield is at constant
|
|
1291 offset from the start of the struct, just use address of the
|
|
1292 struct, for analysis of the strides that shouldn't matter. */
|
|
1293 if (!TREE_OPERAND (ref, 2)
|
|
1294 || TREE_CODE (TREE_OPERAND (ref, 2)) == INTEGER_CST)
|
|
1295 ref = TREE_OPERAND (ref, 0);
|
|
1296 /* Otherwise, if we have a bit field representative, use that. */
|
|
1297 else if (DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (ref, 1))
|
|
1298 != NULL_TREE)
|
|
1299 {
|
|
1300 tree repr = DECL_BIT_FIELD_REPRESENTATIVE (TREE_OPERAND (ref, 1));
|
|
1301 ref = build3 (COMPONENT_REF, TREE_TYPE (repr), TREE_OPERAND (ref, 0),
|
|
1302 repr, TREE_OPERAND (ref, 2));
|
|
1303 }
|
|
1304 /* Otherwise punt. */
|
|
1305 else
|
|
1306 {
|
|
1307 dr->aux = strides;
|
|
1308 return;
|
|
1309 }
|
|
1310 }
|
|
1311 tree scev_base = build_fold_addr_expr (ref);
|
|
1312 tree scev = analyze_scalar_evolution (loop, scev_base);
|
|
1313 scev = instantiate_scev (loop_preheader_edge (loop_nest), loop, scev);
|
|
1314 if (! chrec_contains_undetermined (scev))
|
|
1315 {
|
|
1316 tree sl = scev;
|
|
1317 struct loop *expected = loop;
|
|
1318 while (TREE_CODE (sl) == POLYNOMIAL_CHREC)
|
|
1319 {
|
|
1320 struct loop *sl_loop = get_chrec_loop (sl);
|
|
1321 while (sl_loop != expected)
|
|
1322 {
|
|
1323 strides->safe_push (size_int (0));
|
|
1324 expected = loop_outer (expected);
|
|
1325 }
|
|
1326 strides->safe_push (CHREC_RIGHT (sl));
|
|
1327 sl = CHREC_LEFT (sl);
|
|
1328 expected = loop_outer (expected);
|
|
1329 }
|
|
1330 if (! tree_contains_chrecs (sl, NULL))
|
|
1331 while (expected != loop_outer (loop_nest))
|
|
1332 {
|
|
1333 strides->safe_push (size_int (0));
|
|
1334 expected = loop_outer (expected);
|
|
1335 }
|
|
1336 }
|
|
1337
|
|
1338 dr->aux = strides;
|
|
1339 }
|
|
1340
|
|
1341 /* Given loop nest LOOP_NEST with innermost LOOP, the function computes
|
|
1342 access strides with respect to each level loop for all data refs in
|
|
1343 DATAREFS from inner loop to outer loop. On success, it returns the
|
|
1344 outermost loop that access strides can be computed successfully for
|
|
1345 all data references. If access strides cannot be computed at least
|
|
1346 for two levels of loop for any data reference, it returns NULL. */
|
|
1347
|
|
1348 static struct loop *
|
|
1349 compute_access_strides (struct loop *loop_nest, struct loop *loop,
|
|
1350 vec<data_reference_p> datarefs)
|
|
1351 {
|
|
1352 unsigned i, j, num_loops = (unsigned) -1;
|
|
1353 data_reference_p dr;
|
|
1354 vec<tree> *stride;
|
|
1355
|
|
1356 for (i = 0; datarefs.iterate (i, &dr); ++i)
|
|
1357 {
|
|
1358 compute_access_stride (loop_nest, loop, dr);
|
|
1359 stride = DR_ACCESS_STRIDE (dr);
|
|
1360 if (stride->length () < num_loops)
|
|
1361 {
|
|
1362 num_loops = stride->length ();
|
|
1363 if (num_loops < 2)
|
|
1364 return NULL;
|
|
1365 }
|
|
1366 }
|
|
1367
|
|
1368 for (i = 0; datarefs.iterate (i, &dr); ++i)
|
|
1369 {
|
|
1370 stride = DR_ACCESS_STRIDE (dr);
|
|
1371 if (stride->length () > num_loops)
|
|
1372 stride->truncate (num_loops);
|
|
1373
|
|
1374 for (j = 0; j < (num_loops >> 1); ++j)
|
|
1375 std::swap ((*stride)[j], (*stride)[num_loops - j - 1]);
|
|
1376 }
|
|
1377
|
|
1378 loop = superloop_at_depth (loop, loop_depth (loop) + 1 - num_loops);
|
|
1379 gcc_assert (loop_nest == loop || flow_loop_nested_p (loop_nest, loop));
|
|
1380 return loop;
|
|
1381 }
|
|
1382
|
|
1383 /* Prune access strides for data references in DATAREFS by removing strides
|
|
1384 of loops that isn't in current LOOP_NEST. */
|
|
1385
|
|
1386 static void
|
|
1387 prune_access_strides_not_in_loop (struct loop *loop_nest,
|
|
1388 struct loop *innermost,
|
|
1389 vec<data_reference_p> datarefs)
|
|
1390 {
|
|
1391 data_reference_p dr;
|
|
1392 unsigned num_loops = loop_depth (innermost) - loop_depth (loop_nest) + 1;
|
|
1393 gcc_assert (num_loops > 1);
|
|
1394
|
|
1395 /* Block remove strides of loops that is not in current loop nest. */
|
|
1396 for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
|
|
1397 {
|
|
1398 vec<tree> *stride = DR_ACCESS_STRIDE (dr);
|
|
1399 if (stride->length () > num_loops)
|
|
1400 stride->block_remove (0, stride->length () - num_loops);
|
|
1401 }
|
|
1402 }
|
|
1403
|
|
1404 /* Dump access strides for all DATAREFS. */
|
|
1405
|
|
1406 static void
|
|
1407 dump_access_strides (vec<data_reference_p> datarefs)
|
|
1408 {
|
|
1409 data_reference_p dr;
|
|
1410 fprintf (dump_file, "Access Strides for DRs:\n");
|
|
1411 for (unsigned i = 0; datarefs.iterate (i, &dr); ++i)
|
|
1412 {
|
|
1413 fprintf (dump_file, " ");
|
|
1414 print_generic_expr (dump_file, DR_REF (dr), TDF_SLIM);
|
|
1415 fprintf (dump_file, ":\t\t<");
|
|
1416
|
|
1417 vec<tree> *stride = DR_ACCESS_STRIDE (dr);
|
|
1418 unsigned num_loops = stride->length ();
|
|
1419 for (unsigned j = 0; j < num_loops; ++j)
|
|
1420 {
|
|
1421 print_generic_expr (dump_file, (*stride)[j], TDF_SLIM);
|
|
1422 fprintf (dump_file, "%s", (j < num_loops - 1) ? ",\t" : ">\n");
|
|
1423 }
|
|
1424 }
|
|
1425 }
|
|
1426
|
|
1427 /* Return true if it's profitable to interchange two loops whose index
|
|
1428 in whole loop nest vector are I_IDX/O_IDX respectively. The function
|
|
1429 computes and compares three types information from all DATAREFS:
|
|
1430 1) Access stride for loop I_IDX and O_IDX.
|
|
1431 2) Number of invariant memory references with respect to I_IDX before
|
|
1432 and after loop interchange.
|
|
1433 3) Flags indicating if all memory references access sequential memory
|
|
1434 in ILOOP, before and after loop interchange.
|
|
1435 If INNMOST_LOOP_P is true, the two loops for interchanging are the two
|
|
1436 innermost loops in loop nest. This function also dumps information if
|
|
1437 DUMP_INFO_P is true. */
|
|
1438
|
|
1439 static bool
|
|
1440 should_interchange_loops (unsigned i_idx, unsigned o_idx,
|
|
1441 vec<data_reference_p> datarefs,
|
|
1442 unsigned i_stmt_cost, unsigned o_stmt_cost,
|
|
1443 bool innermost_loops_p, bool dump_info_p = true)
|
|
1444 {
|
|
1445 unsigned HOST_WIDE_INT ratio;
|
|
1446 unsigned i, j, num_old_inv_drs = 0, num_new_inv_drs = 0;
|
|
1447 struct data_reference *dr;
|
|
1448 bool all_seq_dr_before_p = true, all_seq_dr_after_p = true;
|
|
1449 widest_int iloop_strides = 0, oloop_strides = 0;
|
|
1450 unsigned num_unresolved_drs = 0;
|
|
1451 unsigned num_resolved_ok_drs = 0;
|
|
1452 unsigned num_resolved_not_ok_drs = 0;
|
|
1453
|
|
1454 if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS))
|
|
1455 fprintf (dump_file, "\nData ref strides:\n\tmem_ref:\t\tiloop\toloop\n");
|
|
1456
|
|
1457 for (i = 0; datarefs.iterate (i, &dr); ++i)
|
|
1458 {
|
|
1459 vec<tree> *stride = DR_ACCESS_STRIDE (dr);
|
|
1460 tree iloop_stride = (*stride)[i_idx], oloop_stride = (*stride)[o_idx];
|
|
1461
|
|
1462 bool subloop_stride_p = false;
|
|
1463 /* Data ref can't be invariant or sequential access at current loop if
|
|
1464 its address changes with respect to any subloops. */
|
|
1465 for (j = i_idx + 1; j < stride->length (); ++j)
|
|
1466 if (!integer_zerop ((*stride)[j]))
|
|
1467 {
|
|
1468 subloop_stride_p = true;
|
|
1469 break;
|
|
1470 }
|
|
1471
|
|
1472 if (integer_zerop (iloop_stride))
|
|
1473 {
|
|
1474 if (!subloop_stride_p)
|
|
1475 num_old_inv_drs++;
|
|
1476 }
|
|
1477 if (integer_zerop (oloop_stride))
|
|
1478 {
|
|
1479 if (!subloop_stride_p)
|
|
1480 num_new_inv_drs++;
|
|
1481 }
|
|
1482
|
|
1483 if (TREE_CODE (iloop_stride) == INTEGER_CST
|
|
1484 && TREE_CODE (oloop_stride) == INTEGER_CST)
|
|
1485 {
|
|
1486 iloop_strides = wi::add (iloop_strides, wi::to_widest (iloop_stride));
|
|
1487 oloop_strides = wi::add (oloop_strides, wi::to_widest (oloop_stride));
|
|
1488 }
|
|
1489 else if (multiple_of_p (TREE_TYPE (iloop_stride),
|
|
1490 iloop_stride, oloop_stride))
|
|
1491 num_resolved_ok_drs++;
|
|
1492 else if (multiple_of_p (TREE_TYPE (iloop_stride),
|
|
1493 oloop_stride, iloop_stride))
|
|
1494 num_resolved_not_ok_drs++;
|
|
1495 else
|
|
1496 num_unresolved_drs++;
|
|
1497
|
|
1498 /* Data ref can't be sequential access if its address changes in sub
|
|
1499 loop. */
|
|
1500 if (subloop_stride_p)
|
|
1501 {
|
|
1502 all_seq_dr_before_p = false;
|
|
1503 all_seq_dr_after_p = false;
|
|
1504 continue;
|
|
1505 }
|
|
1506 /* Track if all data references are sequential accesses before/after loop
|
|
1507 interchange. Note invariant is considered sequential here. */
|
|
1508 tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr)));
|
|
1509 if (all_seq_dr_before_p
|
|
1510 && ! (integer_zerop (iloop_stride)
|
|
1511 || operand_equal_p (access_size, iloop_stride, 0)))
|
|
1512 all_seq_dr_before_p = false;
|
|
1513 if (all_seq_dr_after_p
|
|
1514 && ! (integer_zerop (oloop_stride)
|
|
1515 || operand_equal_p (access_size, oloop_stride, 0)))
|
|
1516 all_seq_dr_after_p = false;
|
|
1517 }
|
|
1518
|
|
1519 if (dump_info_p && dump_file && (dump_flags & TDF_DETAILS))
|
|
1520 {
|
|
1521 fprintf (dump_file, "\toverall:\t\t");
|
|
1522 print_decu (iloop_strides, dump_file);
|
|
1523 fprintf (dump_file, "\t");
|
|
1524 print_decu (oloop_strides, dump_file);
|
|
1525 fprintf (dump_file, "\n");
|
|
1526
|
|
1527 fprintf (dump_file, "Invariant data ref: before(%d), after(%d)\n",
|
|
1528 num_old_inv_drs, num_new_inv_drs);
|
|
1529 fprintf (dump_file, "All consecutive stride: before(%s), after(%s)\n",
|
|
1530 all_seq_dr_before_p ? "true" : "false",
|
|
1531 all_seq_dr_after_p ? "true" : "false");
|
|
1532 fprintf (dump_file, "OK to interchage with variable strides: %d\n",
|
|
1533 num_resolved_ok_drs);
|
|
1534 fprintf (dump_file, "Not OK to interchage with variable strides: %d\n",
|
|
1535 num_resolved_not_ok_drs);
|
|
1536 fprintf (dump_file, "Variable strides we cannot decide: %d\n",
|
|
1537 num_unresolved_drs);
|
|
1538 fprintf (dump_file, "Stmt cost of inner loop: %d\n", i_stmt_cost);
|
|
1539 fprintf (dump_file, "Stmt cost of outer loop: %d\n", o_stmt_cost);
|
|
1540 }
|
|
1541
|
|
1542 if (num_unresolved_drs != 0 || num_resolved_not_ok_drs != 0)
|
|
1543 return false;
|
|
1544
|
|
1545 /* Stmts of outer loop will be moved to inner loop. If there are two many
|
|
1546 such stmts, it could make inner loop costly. Here we compare stmt cost
|
|
1547 between outer and inner loops. */
|
|
1548 if (i_stmt_cost && o_stmt_cost
|
|
1549 && num_old_inv_drs + o_stmt_cost > num_new_inv_drs
|
|
1550 && o_stmt_cost * STMT_COST_RATIO > i_stmt_cost)
|
|
1551 return false;
|
|
1552
|
|
1553 /* We use different stride comparison ratio for interchanging innermost
|
|
1554 two loops or not. The idea is to be conservative in interchange for
|
|
1555 the innermost loops. */
|
|
1556 ratio = innermost_loops_p ? INNER_STRIDE_RATIO : OUTER_STRIDE_RATIO;
|
|
1557 /* Do interchange if it gives better data locality behavior. */
|
|
1558 if (wi::gtu_p (iloop_strides, wi::mul (oloop_strides, ratio)))
|
|
1559 return true;
|
|
1560 if (wi::gtu_p (iloop_strides, oloop_strides))
|
|
1561 {
|
|
1562 /* Or it creates more invariant memory references. */
|
|
1563 if ((!all_seq_dr_before_p || all_seq_dr_after_p)
|
|
1564 && num_new_inv_drs > num_old_inv_drs)
|
|
1565 return true;
|
|
1566 /* Or it makes all memory references sequential. */
|
|
1567 if (num_new_inv_drs >= num_old_inv_drs
|
|
1568 && !all_seq_dr_before_p && all_seq_dr_after_p)
|
|
1569 return true;
|
|
1570 }
|
|
1571
|
|
1572 return false;
|
|
1573 }
|
|
1574
|
|
1575 /* Try to interchange inner loop of a loop nest to outer level. */
|
|
1576
|
|
1577 bool
|
|
1578 tree_loop_interchange::interchange (vec<data_reference_p> datarefs,
|
|
1579 vec<ddr_p> ddrs)
|
|
1580 {
|
|
1581 dump_user_location_t loc = find_loop_location (m_loop_nest[0]);
|
|
1582 bool changed_p = false;
|
|
1583 /* In each iteration we try to interchange I-th loop with (I+1)-th loop.
|
|
1584 The overall effect is to push inner loop to outermost level in whole
|
|
1585 loop nest. */
|
|
1586 for (unsigned i = m_loop_nest.length (); i > 1; --i)
|
|
1587 {
|
|
1588 unsigned i_idx = i - 1, o_idx = i - 2;
|
|
1589
|
|
1590 /* Check validity for loop interchange. */
|
|
1591 if (!valid_data_dependences (i_idx, o_idx, ddrs))
|
|
1592 break;
|
|
1593
|
|
1594 loop_cand iloop (m_loop_nest[i_idx], m_loop_nest[o_idx]);
|
|
1595 loop_cand oloop (m_loop_nest[o_idx], m_loop_nest[o_idx]);
|
|
1596
|
|
1597 /* Check if we can do transformation for loop interchange. */
|
|
1598 if (!iloop.analyze_carried_vars (NULL)
|
|
1599 || !iloop.analyze_lcssa_phis ()
|
|
1600 || !oloop.analyze_carried_vars (&iloop)
|
|
1601 || !oloop.analyze_lcssa_phis ()
|
|
1602 || !iloop.can_interchange_p (NULL)
|
|
1603 || !oloop.can_interchange_p (&iloop))
|
|
1604 break;
|
|
1605
|
|
1606 /* Outer loop's stmts will be moved to inner loop during interchange.
|
|
1607 If there are many of them, it may make inner loop very costly. We
|
|
1608 need to check number of outer loop's stmts in profit cost model of
|
|
1609 interchange. */
|
|
1610 int stmt_cost = oloop.m_num_stmts;
|
|
1611 /* Count out the exit checking stmt of outer loop. */
|
|
1612 stmt_cost --;
|
|
1613 /* Count out IV's increasing stmt, IVOPTs takes care if it. */
|
|
1614 stmt_cost -= oloop.m_inductions.length ();
|
|
1615 /* Count in the additional load and cond_expr stmts caused by inner
|
|
1616 loop's constant initialized reduction. */
|
|
1617 stmt_cost += iloop.m_const_init_reduc * 2;
|
|
1618 if (stmt_cost < 0)
|
|
1619 stmt_cost = 0;
|
|
1620
|
|
1621 /* Check profitability for loop interchange. */
|
|
1622 if (should_interchange_loops (i_idx, o_idx, datarefs,
|
|
1623 (unsigned) iloop.m_num_stmts,
|
|
1624 (unsigned) stmt_cost,
|
|
1625 iloop.m_loop->inner == NULL))
|
|
1626 {
|
|
1627 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
1628 fprintf (dump_file,
|
|
1629 "Loop_pair<outer:%d, inner:%d> is interchanged\n\n",
|
|
1630 oloop.m_loop->num, iloop.m_loop->num);
|
|
1631
|
|
1632 changed_p = true;
|
|
1633 interchange_loops (iloop, oloop);
|
|
1634 /* No need to update if there is no further loop interchange. */
|
|
1635 if (o_idx > 0)
|
|
1636 update_data_info (i_idx, o_idx, datarefs, ddrs);
|
|
1637 }
|
|
1638 else
|
|
1639 {
|
|
1640 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
1641 fprintf (dump_file,
|
|
1642 "Loop_pair<outer:%d, inner:%d> is not interchanged\n\n",
|
|
1643 oloop.m_loop->num, iloop.m_loop->num);
|
|
1644 }
|
|
1645 }
|
|
1646 simple_dce_from_worklist (m_dce_seeds);
|
|
1647
|
|
1648 if (changed_p)
|
|
1649 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loc,
|
|
1650 "loops interchanged in loop nest\n");
|
|
1651
|
|
1652 return changed_p;
|
|
1653 }
|
|
1654
|
|
1655
|
|
1656 /* Loop interchange pass. */
|
|
1657
|
|
1658 namespace {
|
|
1659
|
|
1660 const pass_data pass_data_linterchange =
|
|
1661 {
|
|
1662 GIMPLE_PASS, /* type */
|
|
1663 "linterchange", /* name */
|
|
1664 OPTGROUP_LOOP, /* optinfo_flags */
|
|
1665 TV_LINTERCHANGE, /* tv_id */
|
|
1666 PROP_cfg, /* properties_required */
|
|
1667 0, /* properties_provided */
|
|
1668 0, /* properties_destroyed */
|
|
1669 0, /* todo_flags_start */
|
|
1670 0, /* todo_flags_finish */
|
|
1671 };
|
|
1672
|
|
1673 class pass_linterchange : public gimple_opt_pass
|
|
1674 {
|
|
1675 public:
|
|
1676 pass_linterchange (gcc::context *ctxt)
|
|
1677 : gimple_opt_pass (pass_data_linterchange, ctxt)
|
|
1678 {}
|
|
1679
|
|
1680 /* opt_pass methods: */
|
|
1681 opt_pass * clone () { return new pass_linterchange (m_ctxt); }
|
|
1682 virtual bool gate (function *) { return flag_loop_interchange; }
|
|
1683 virtual unsigned int execute (function *);
|
|
1684
|
|
1685 }; // class pass_linterchange
|
|
1686
|
|
1687
|
|
1688 /* Return true if LOOP has proper form for interchange. We check three
|
|
1689 conditions in the function:
|
|
1690 1) In general, a loop can be interchanged only if it doesn't have
|
|
1691 basic blocks other than header, exit and latch besides possible
|
|
1692 inner loop nest. This basically restricts loop interchange to
|
|
1693 below form loop nests:
|
|
1694
|
|
1695 header<---+
|
|
1696 | |
|
|
1697 v |
|
|
1698 INNER_LOOP |
|
|
1699 | |
|
|
1700 v |
|
|
1701 exit--->latch
|
|
1702
|
|
1703 2) Data reference in basic block that executes in different times
|
|
1704 than loop head/exit is not allowed.
|
|
1705 3) Record the innermost outer loop that doesn't form rectangle loop
|
|
1706 nest with LOOP. */
|
|
1707
|
|
1708 static bool
|
|
1709 proper_loop_form_for_interchange (struct loop *loop, struct loop **min_outer)
|
|
1710 {
|
|
1711 edge e0, e1, exit;
|
|
1712
|
|
1713 /* Don't interchange if loop has unsupported information for the moment. */
|
|
1714 if (loop->safelen > 0
|
|
1715 || loop->constraints != 0
|
|
1716 || loop->can_be_parallel
|
|
1717 || loop->dont_vectorize
|
|
1718 || loop->force_vectorize
|
|
1719 || loop->in_oacc_kernels_region
|
|
1720 || loop->orig_loop_num != 0
|
|
1721 || loop->simduid != NULL_TREE)
|
|
1722 return false;
|
|
1723
|
|
1724 /* Don't interchange if outer loop has basic block other than header, exit
|
|
1725 and latch. */
|
|
1726 if (loop->inner != NULL
|
|
1727 && loop->num_nodes != loop->inner->num_nodes + 3)
|
|
1728 return false;
|
|
1729
|
|
1730 if ((exit = single_dom_exit (loop)) == NULL)
|
|
1731 return false;
|
|
1732
|
|
1733 /* Check control flow on loop header/exit blocks. */
|
|
1734 if (loop->header == exit->src
|
|
1735 && (EDGE_COUNT (loop->header->preds) != 2
|
|
1736 || EDGE_COUNT (loop->header->succs) != 2))
|
|
1737 return false;
|
|
1738 else if (loop->header != exit->src
|
|
1739 && (EDGE_COUNT (loop->header->preds) != 2
|
|
1740 || !single_succ_p (loop->header)
|
|
1741 || unsupported_edge (single_succ_edge (loop->header))
|
|
1742 || EDGE_COUNT (exit->src->succs) != 2
|
|
1743 || !single_pred_p (exit->src)
|
|
1744 || unsupported_edge (single_pred_edge (exit->src))))
|
|
1745 return false;
|
|
1746
|
|
1747 e0 = EDGE_PRED (loop->header, 0);
|
|
1748 e1 = EDGE_PRED (loop->header, 1);
|
|
1749 if (unsupported_edge (e0) || unsupported_edge (e1)
|
|
1750 || (e0->src != loop->latch && e1->src != loop->latch)
|
|
1751 || (e0->src->loop_father == loop && e1->src->loop_father == loop))
|
|
1752 return false;
|
|
1753
|
|
1754 e0 = EDGE_SUCC (exit->src, 0);
|
|
1755 e1 = EDGE_SUCC (exit->src, 1);
|
|
1756 if (unsupported_edge (e0) || unsupported_edge (e1)
|
|
1757 || (e0->dest != loop->latch && e1->dest != loop->latch)
|
|
1758 || (e0->dest->loop_father == loop && e1->dest->loop_father == loop))
|
|
1759 return false;
|
|
1760
|
|
1761 /* Don't interchange if any reference is in basic block that doesn't
|
|
1762 dominate exit block. */
|
|
1763 basic_block *bbs = get_loop_body (loop);
|
|
1764 for (unsigned i = 0; i < loop->num_nodes; i++)
|
|
1765 {
|
|
1766 basic_block bb = bbs[i];
|
|
1767
|
|
1768 if (bb->loop_father != loop
|
|
1769 || bb == loop->header || bb == exit->src
|
|
1770 || dominated_by_p (CDI_DOMINATORS, exit->src, bb))
|
|
1771 continue;
|
|
1772
|
|
1773 for (gimple_stmt_iterator gsi = gsi_start_nondebug_bb (bb);
|
|
1774 !gsi_end_p (gsi); gsi_next_nondebug (&gsi))
|
|
1775 if (gimple_vuse (gsi_stmt (gsi)))
|
|
1776 {
|
|
1777 free (bbs);
|
|
1778 return false;
|
|
1779 }
|
|
1780 }
|
|
1781 free (bbs);
|
|
1782
|
|
1783 tree niters = number_of_latch_executions (loop);
|
|
1784 niters = analyze_scalar_evolution (loop_outer (loop), niters);
|
|
1785 if (!niters || chrec_contains_undetermined (niters))
|
|
1786 return false;
|
|
1787
|
|
1788 /* Record the innermost outer loop that doesn't form rectangle loop nest. */
|
|
1789 for (loop_p loop2 = loop_outer (loop);
|
|
1790 loop2 && flow_loop_nested_p (*min_outer, loop2);
|
|
1791 loop2 = loop_outer (loop2))
|
|
1792 {
|
|
1793 niters = instantiate_scev (loop_preheader_edge (loop2),
|
|
1794 loop_outer (loop), niters);
|
|
1795 if (!evolution_function_is_invariant_p (niters, loop2->num))
|
|
1796 {
|
|
1797 *min_outer = loop2;
|
|
1798 break;
|
|
1799 }
|
|
1800 }
|
|
1801 return true;
|
|
1802 }
|
|
1803
|
|
1804 /* Return true if any two adjacent loops in loop nest [INNERMOST, LOOP_NEST]
|
|
1805 should be interchanged by looking into all DATAREFS. */
|
|
1806
|
|
1807 static bool
|
|
1808 should_interchange_loop_nest (struct loop *loop_nest, struct loop *innermost,
|
|
1809 vec<data_reference_p> datarefs)
|
|
1810 {
|
|
1811 unsigned idx = loop_depth (innermost) - loop_depth (loop_nest);
|
|
1812 gcc_assert (idx > 0);
|
|
1813
|
|
1814 /* Check if any two adjacent loops should be interchanged. */
|
|
1815 for (struct loop *loop = innermost;
|
|
1816 loop != loop_nest; loop = loop_outer (loop), idx--)
|
|
1817 if (should_interchange_loops (idx, idx - 1, datarefs, 0, 0,
|
|
1818 loop == innermost, false))
|
|
1819 return true;
|
|
1820
|
|
1821 return false;
|
|
1822 }
|
|
1823
|
|
1824 /* Given loop nest LOOP_NEST and data references DATAREFS, compute data
|
|
1825 dependences for loop interchange and store it in DDRS. Note we compute
|
|
1826 dependences directly rather than call generic interface so that we can
|
|
1827 return on unknown dependence instantly. */
|
|
1828
|
|
1829 static bool
|
|
1830 tree_loop_interchange_compute_ddrs (vec<loop_p> loop_nest,
|
|
1831 vec<data_reference_p> datarefs,
|
|
1832 vec<ddr_p> *ddrs)
|
|
1833 {
|
|
1834 struct data_reference *a, *b;
|
|
1835 struct loop *innermost = loop_nest.last ();
|
|
1836
|
|
1837 for (unsigned i = 0; datarefs.iterate (i, &a); ++i)
|
|
1838 {
|
|
1839 bool a_outer_p = gimple_bb (DR_STMT (a))->loop_father != innermost;
|
|
1840 for (unsigned j = i + 1; datarefs.iterate (j, &b); ++j)
|
|
1841 if (DR_IS_WRITE (a) || DR_IS_WRITE (b))
|
|
1842 {
|
|
1843 bool b_outer_p = gimple_bb (DR_STMT (b))->loop_father != innermost;
|
|
1844 /* Don't support multiple write references in outer loop. */
|
|
1845 if (a_outer_p && b_outer_p && DR_IS_WRITE (a) && DR_IS_WRITE (b))
|
|
1846 return false;
|
|
1847
|
|
1848 ddr_p ddr = initialize_data_dependence_relation (a, b, loop_nest);
|
|
1849 ddrs->safe_push (ddr);
|
|
1850 compute_affine_dependence (ddr, loop_nest[0]);
|
|
1851
|
|
1852 /* Give up if ddr is unknown dependence or classic direct vector
|
|
1853 is not available. */
|
|
1854 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
|
|
1855 || (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
|
|
1856 && DDR_NUM_DIR_VECTS (ddr) == 0))
|
|
1857 return false;
|
|
1858
|
|
1859 /* If either data references is in outer loop of nest, we require
|
|
1860 no dependence here because the data reference need to be moved
|
|
1861 into inner loop during interchange. */
|
|
1862 if (a_outer_p && b_outer_p
|
|
1863 && operand_equal_p (DR_REF (a), DR_REF (b), 0))
|
|
1864 continue;
|
|
1865 if (DDR_ARE_DEPENDENT (ddr) != chrec_known
|
|
1866 && (a_outer_p || b_outer_p))
|
|
1867 return false;
|
|
1868 }
|
|
1869 }
|
|
1870
|
|
1871 return true;
|
|
1872 }
|
|
1873
|
|
1874 /* Prune DATAREFS by removing any data reference not inside of LOOP. */
|
|
1875
|
|
1876 static inline void
|
|
1877 prune_datarefs_not_in_loop (struct loop *loop, vec<data_reference_p> datarefs)
|
|
1878 {
|
|
1879 unsigned i, j;
|
|
1880 struct data_reference *dr;
|
|
1881
|
|
1882 for (i = 0, j = 0; datarefs.iterate (i, &dr); ++i)
|
|
1883 {
|
|
1884 if (flow_bb_inside_loop_p (loop, gimple_bb (DR_STMT (dr))))
|
|
1885 datarefs[j++] = dr;
|
|
1886 else
|
|
1887 {
|
|
1888 if (dr->aux)
|
|
1889 {
|
|
1890 DR_ACCESS_STRIDE (dr)->release ();
|
|
1891 delete (vec<tree> *) dr->aux;
|
|
1892 }
|
|
1893 free_data_ref (dr);
|
|
1894 }
|
|
1895 }
|
|
1896 datarefs.truncate (j);
|
|
1897 }
|
|
1898
|
|
1899 /* Find and store data references in DATAREFS for LOOP nest. If there's
|
|
1900 difficult data reference in a basic block, we shrink the loop nest to
|
|
1901 inner loop of that basic block's father loop. On success, return the
|
|
1902 outer loop of the result loop nest. */
|
|
1903
|
|
1904 static struct loop *
|
|
1905 prepare_data_references (struct loop *loop, vec<data_reference_p> *datarefs)
|
|
1906 {
|
|
1907 struct loop *loop_nest = loop;
|
|
1908 vec<data_reference_p> *bb_refs;
|
|
1909 basic_block bb, *bbs = get_loop_body_in_dom_order (loop);
|
|
1910
|
|
1911 for (unsigned i = 0; i < loop->num_nodes; i++)
|
|
1912 bbs[i]->aux = NULL;
|
|
1913
|
|
1914 /* Find data references for all basic blocks. Shrink loop nest on difficult
|
|
1915 data reference. */
|
|
1916 for (unsigned i = 0; loop_nest && i < loop->num_nodes; ++i)
|
|
1917 {
|
|
1918 bb = bbs[i];
|
|
1919 if (!flow_bb_inside_loop_p (loop_nest, bb))
|
|
1920 continue;
|
|
1921
|
|
1922 bb_refs = new vec<data_reference_p> ();
|
|
1923 if (find_data_references_in_bb (loop, bb, bb_refs) == chrec_dont_know)
|
|
1924 {
|
|
1925 loop_nest = bb->loop_father->inner;
|
|
1926 if (loop_nest && !loop_nest->inner)
|
|
1927 loop_nest = NULL;
|
|
1928
|
|
1929 free_data_refs (*bb_refs);
|
|
1930 delete bb_refs;
|
|
1931 }
|
|
1932 else if (bb_refs->is_empty ())
|
|
1933 delete bb_refs;
|
|
1934 else
|
|
1935 bb->aux = bb_refs;
|
|
1936 }
|
|
1937
|
|
1938 /* Collect all data references in loop nest. */
|
|
1939 for (unsigned i = 0; i < loop->num_nodes; i++)
|
|
1940 {
|
|
1941 bb = bbs[i];
|
|
1942 if (!bb->aux)
|
|
1943 continue;
|
|
1944
|
|
1945 bb_refs = (vec<data_reference_p> *) bb->aux;
|
|
1946 if (loop_nest && flow_bb_inside_loop_p (loop_nest, bb))
|
|
1947 datarefs->safe_splice (*bb_refs);
|
|
1948 else
|
|
1949 free_data_refs (*bb_refs);
|
|
1950
|
|
1951 delete bb_refs;
|
|
1952 bb->aux = NULL;
|
|
1953 }
|
|
1954 free (bbs);
|
|
1955
|
|
1956 return loop_nest;
|
|
1957 }
|
|
1958
|
|
1959 /* Given innermost LOOP, return true if perfect loop nest can be found and
|
|
1960 data dependences can be computed. If succeed, record the perfect loop
|
|
1961 nest in LOOP_NEST; record all data references in DATAREFS and record all
|
|
1962 data dependence relations in DDRS.
|
|
1963
|
|
1964 We do support a restricted form of imperfect loop nest, i.e, loop nest
|
|
1965 with load/store in outer loop initializing/finalizing simple reduction
|
|
1966 of the innermost loop. For such outer loop reference, we require that
|
|
1967 it has no dependence with others sinve it will be moved to inner loop
|
|
1968 in interchange. */
|
|
1969
|
|
1970 static bool
|
|
1971 prepare_perfect_loop_nest (struct loop *loop, vec<loop_p> *loop_nest,
|
|
1972 vec<data_reference_p> *datarefs, vec<ddr_p> *ddrs)
|
|
1973 {
|
|
1974 struct loop *start_loop = NULL, *innermost = loop;
|
|
1975 struct loop *outermost = loops_for_fn (cfun)->tree_root;
|
|
1976
|
|
1977 /* Find loop nest from the innermost loop. The outermost is the innermost
|
|
1978 outer*/
|
|
1979 while (loop->num != 0 && loop->inner == start_loop
|
|
1980 && flow_loop_nested_p (outermost, loop))
|
|
1981 {
|
|
1982 if (!proper_loop_form_for_interchange (loop, &outermost))
|
|
1983 break;
|
|
1984
|
|
1985 start_loop = loop;
|
|
1986 /* If this loop has sibling loop, the father loop won't be in perfect
|
|
1987 loop nest. */
|
|
1988 if (loop->next != NULL)
|
|
1989 break;
|
|
1990
|
|
1991 loop = loop_outer (loop);
|
|
1992 }
|
|
1993 if (!start_loop || !start_loop->inner)
|
|
1994 return false;
|
|
1995
|
|
1996 /* Prepare the data reference vector for the loop nest, pruning outer
|
|
1997 loops we cannot handle. */
|
|
1998 start_loop = prepare_data_references (start_loop, datarefs);
|
|
1999 if (!start_loop
|
|
2000 /* Check if there is no data reference. */
|
|
2001 || datarefs->is_empty ()
|
|
2002 /* Check if there are too many of data references. */
|
|
2003 || (int) datarefs->length () > MAX_DATAREFS)
|
|
2004 return false;
|
|
2005
|
|
2006 /* Compute access strides for all data references, pruning outer
|
|
2007 loops we cannot analyze refs in. */
|
|
2008 start_loop = compute_access_strides (start_loop, innermost, *datarefs);
|
|
2009 if (!start_loop)
|
|
2010 return false;
|
|
2011
|
|
2012 /* Check if any interchange is profitable in the loop nest. */
|
|
2013 if (!should_interchange_loop_nest (start_loop, innermost, *datarefs))
|
|
2014 return false;
|
|
2015
|
|
2016 /* Check if data dependences can be computed for loop nest starting from
|
|
2017 start_loop. */
|
|
2018 loop = start_loop;
|
|
2019 do {
|
|
2020 loop_nest->truncate (0);
|
|
2021
|
|
2022 if (loop != start_loop)
|
|
2023 prune_datarefs_not_in_loop (start_loop, *datarefs);
|
|
2024
|
|
2025 if (find_loop_nest (start_loop, loop_nest)
|
|
2026 && tree_loop_interchange_compute_ddrs (*loop_nest, *datarefs, ddrs))
|
|
2027 {
|
|
2028 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
2029 fprintf (dump_file,
|
|
2030 "\nConsider loop interchange for loop_nest<%d - %d>\n",
|
|
2031 start_loop->num, innermost->num);
|
|
2032
|
|
2033 if (loop != start_loop)
|
|
2034 prune_access_strides_not_in_loop (start_loop, innermost, *datarefs);
|
|
2035
|
|
2036 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
2037 dump_access_strides (*datarefs);
|
|
2038
|
|
2039 return true;
|
|
2040 }
|
|
2041
|
|
2042 free_dependence_relations (*ddrs);
|
|
2043 *ddrs = vNULL;
|
|
2044 /* Try to compute data dependences with the outermost loop stripped. */
|
|
2045 loop = start_loop;
|
|
2046 start_loop = start_loop->inner;
|
|
2047 } while (start_loop && start_loop->inner);
|
|
2048
|
|
2049 return false;
|
|
2050 }
|
|
2051
|
|
2052 /* Main entry for loop interchange pass. */
|
|
2053
|
|
2054 unsigned int
|
|
2055 pass_linterchange::execute (function *fun)
|
|
2056 {
|
|
2057 if (number_of_loops (fun) <= 2)
|
|
2058 return 0;
|
|
2059
|
|
2060 bool changed_p = false;
|
|
2061 struct loop *loop;
|
|
2062 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
|
|
2063 {
|
|
2064 vec<loop_p> loop_nest = vNULL;
|
|
2065 vec<data_reference_p> datarefs = vNULL;
|
|
2066 vec<ddr_p> ddrs = vNULL;
|
|
2067 if (prepare_perfect_loop_nest (loop, &loop_nest, &datarefs, &ddrs))
|
|
2068 {
|
|
2069 tree_loop_interchange loop_interchange (loop_nest);
|
|
2070 changed_p |= loop_interchange.interchange (datarefs, ddrs);
|
|
2071 }
|
|
2072 free_dependence_relations (ddrs);
|
|
2073 free_data_refs_with_aux (datarefs);
|
|
2074 loop_nest.release ();
|
|
2075 }
|
|
2076
|
|
2077 return changed_p ? (TODO_update_ssa_only_virtuals) : 0;
|
|
2078 }
|
|
2079
|
|
2080 } // anon namespace
|
|
2081
|
|
2082 gimple_opt_pass *
|
|
2083 make_pass_linterchange (gcc::context *ctxt)
|
|
2084 {
|
|
2085 return new pass_linterchange (ctxt);
|
|
2086 }
|