Mercurial > hg > CbC > CbC_gcc
annotate gcc/lambda-code.c @ 60:bd49c42ec43e
remove unnecessary files
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 15 Feb 2010 17:39:45 +0900 |
parents | 77e2b8dfacca |
children | b7f97abdc517 |
rev | line source |
---|---|
0 | 1 /* Loop transformation code generation |
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009 | |
3 Free Software Foundation, Inc. | |
4 Contributed by Daniel Berlin <dberlin@dberlin.org> | |
5 | |
6 This file is part of GCC. | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
7 |
0 | 8 GCC is free software; you can redistribute it and/or modify it under |
9 the terms of the GNU General Public License as published by the Free | |
10 Software Foundation; either version 3, or (at your option) any later | |
11 version. | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
12 |
0 | 13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 for more details. | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
17 |
0 | 18 You should have received a copy of the GNU General Public License |
19 along with GCC; see the file COPYING3. If not see | |
20 <http://www.gnu.org/licenses/>. */ | |
21 | |
22 #include "config.h" | |
23 #include "system.h" | |
24 #include "coretypes.h" | |
25 #include "tm.h" | |
26 #include "ggc.h" | |
27 #include "tree.h" | |
28 #include "target.h" | |
29 #include "rtl.h" | |
30 #include "basic-block.h" | |
31 #include "diagnostic.h" | |
32 #include "obstack.h" | |
33 #include "tree-flow.h" | |
34 #include "tree-dump.h" | |
35 #include "timevar.h" | |
36 #include "cfgloop.h" | |
37 #include "expr.h" | |
38 #include "optabs.h" | |
39 #include "tree-chrec.h" | |
40 #include "tree-data-ref.h" | |
41 #include "tree-pass.h" | |
42 #include "tree-scalar-evolution.h" | |
43 #include "vec.h" | |
44 #include "lambda.h" | |
45 #include "vecprim.h" | |
46 #include "pointer-set.h" | |
47 | |
48 /* This loop nest code generation is based on non-singular matrix | |
49 math. | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
50 |
0 | 51 A little terminology and a general sketch of the algorithm. See "A singular |
52 loop transformation framework based on non-singular matrices" by Wei Li and | |
53 Keshav Pingali for formal proofs that the various statements below are | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
54 correct. |
0 | 55 |
56 A loop iteration space represents the points traversed by the loop. A point in the | |
57 iteration space can be represented by a vector of size <loop depth>. You can | |
58 therefore represent the iteration space as an integral combinations of a set | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
59 of basis vectors. |
0 | 60 |
61 A loop iteration space is dense if every integer point between the loop | |
62 bounds is a point in the iteration space. Every loop with a step of 1 | |
63 therefore has a dense iteration space. | |
64 | |
65 for i = 1 to 3, step 1 is a dense iteration space. | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
66 |
0 | 67 A loop iteration space is sparse if it is not dense. That is, the iteration |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
68 space skips integer points that are within the loop bounds. |
0 | 69 |
70 for i = 1 to 3, step 2 is a sparse iteration space, because the integer point | |
71 2 is skipped. | |
72 | |
73 Dense source spaces are easy to transform, because they don't skip any | |
74 points to begin with. Thus we can compute the exact bounds of the target | |
75 space using min/max and floor/ceil. | |
76 | |
77 For a dense source space, we take the transformation matrix, decompose it | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
78 into a lower triangular part (H) and a unimodular part (U). |
0 | 79 We then compute the auxiliary space from the unimodular part (source loop |
80 nest . U = auxiliary space) , which has two important properties: | |
81 1. It traverses the iterations in the same lexicographic order as the source | |
82 space. | |
83 2. It is a dense space when the source is a dense space (even if the target | |
84 space is going to be sparse). | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
85 |
0 | 86 Given the auxiliary space, we use the lower triangular part to compute the |
87 bounds in the target space by simple matrix multiplication. | |
88 The gaps in the target space (IE the new loop step sizes) will be the | |
89 diagonals of the H matrix. | |
90 | |
91 Sparse source spaces require another step, because you can't directly compute | |
92 the exact bounds of the auxiliary and target space from the sparse space. | |
93 Rather than try to come up with a separate algorithm to handle sparse source | |
94 spaces directly, we just find a legal transformation matrix that gives you | |
95 the sparse source space, from a dense space, and then transform the dense | |
96 space. | |
97 | |
98 For a regular sparse space, you can represent the source space as an integer | |
99 lattice, and the base space of that lattice will always be dense. Thus, we | |
100 effectively use the lattice to figure out the transformation from the lattice | |
101 base space, to the sparse iteration space (IE what transform was applied to | |
102 the dense space to make it sparse). We then compose this transform with the | |
103 transformation matrix specified by the user (since our matrix transformations | |
104 are closed under composition, this is okay). We can then use the base space | |
105 (which is dense) plus the composed transformation matrix, to compute the rest | |
106 of the transform using the dense space algorithm above. | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
107 |
0 | 108 In other words, our sparse source space (B) is decomposed into a dense base |
109 space (A), and a matrix (L) that transforms A into B, such that A.L = B. | |
110 We then compute the composition of L and the user transformation matrix (T), | |
111 so that T is now a transform from A to the result, instead of from B to the | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
112 result. |
0 | 113 IE A.(LT) = result instead of B.T = result |
114 Since A is now a dense source space, we can use the dense source space | |
115 algorithm above to compute the result of applying transform (LT) to A. | |
116 | |
117 Fourier-Motzkin elimination is used to compute the bounds of the base space | |
118 of the lattice. */ | |
119 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
120 static bool perfect_nestify (struct loop *, VEC(tree,heap) *, |
0 | 121 VEC(tree,heap) *, VEC(int,heap) *, |
122 VEC(tree,heap) *); | |
123 /* Lattice stuff that is internal to the code generation algorithm. */ | |
124 | |
125 typedef struct lambda_lattice_s | |
126 { | |
127 /* Lattice base matrix. */ | |
128 lambda_matrix base; | |
129 /* Lattice dimension. */ | |
130 int dimension; | |
131 /* Origin vector for the coefficients. */ | |
132 lambda_vector origin; | |
133 /* Origin matrix for the invariants. */ | |
134 lambda_matrix origin_invariants; | |
135 /* Number of invariants. */ | |
136 int invariants; | |
137 } *lambda_lattice; | |
138 | |
139 #define LATTICE_BASE(T) ((T)->base) | |
140 #define LATTICE_DIMENSION(T) ((T)->dimension) | |
141 #define LATTICE_ORIGIN(T) ((T)->origin) | |
142 #define LATTICE_ORIGIN_INVARIANTS(T) ((T)->origin_invariants) | |
143 #define LATTICE_INVARIANTS(T) ((T)->invariants) | |
144 | |
145 static bool lle_equal (lambda_linear_expression, lambda_linear_expression, | |
146 int, int); | |
147 static lambda_lattice lambda_lattice_new (int, int, struct obstack *); | |
148 static lambda_lattice lambda_lattice_compute_base (lambda_loopnest, | |
149 struct obstack *); | |
150 | |
151 static bool can_convert_to_perfect_nest (struct loop *); | |
152 | |
153 /* Create a new lambda body vector. */ | |
154 | |
155 lambda_body_vector | |
156 lambda_body_vector_new (int size, struct obstack * lambda_obstack) | |
157 { | |
158 lambda_body_vector ret; | |
159 | |
160 ret = (lambda_body_vector)obstack_alloc (lambda_obstack, sizeof (*ret)); | |
161 LBV_COEFFICIENTS (ret) = lambda_vector_new (size); | |
162 LBV_SIZE (ret) = size; | |
163 LBV_DENOMINATOR (ret) = 1; | |
164 return ret; | |
165 } | |
166 | |
167 /* Compute the new coefficients for the vector based on the | |
168 *inverse* of the transformation matrix. */ | |
169 | |
170 lambda_body_vector | |
171 lambda_body_vector_compute_new (lambda_trans_matrix transform, | |
172 lambda_body_vector vect, | |
173 struct obstack * lambda_obstack) | |
174 { | |
175 lambda_body_vector temp; | |
176 int depth; | |
177 | |
178 /* Make sure the matrix is square. */ | |
179 gcc_assert (LTM_ROWSIZE (transform) == LTM_COLSIZE (transform)); | |
180 | |
181 depth = LTM_ROWSIZE (transform); | |
182 | |
183 temp = lambda_body_vector_new (depth, lambda_obstack); | |
184 LBV_DENOMINATOR (temp) = | |
185 LBV_DENOMINATOR (vect) * LTM_DENOMINATOR (transform); | |
186 lambda_vector_matrix_mult (LBV_COEFFICIENTS (vect), depth, | |
187 LTM_MATRIX (transform), depth, | |
188 LBV_COEFFICIENTS (temp)); | |
189 LBV_SIZE (temp) = LBV_SIZE (vect); | |
190 return temp; | |
191 } | |
192 | |
193 /* Print out a lambda body vector. */ | |
194 | |
195 void | |
196 print_lambda_body_vector (FILE * outfile, lambda_body_vector body) | |
197 { | |
198 print_lambda_vector (outfile, LBV_COEFFICIENTS (body), LBV_SIZE (body)); | |
199 } | |
200 | |
201 /* Return TRUE if two linear expressions are equal. */ | |
202 | |
203 static bool | |
204 lle_equal (lambda_linear_expression lle1, lambda_linear_expression lle2, | |
205 int depth, int invariants) | |
206 { | |
207 int i; | |
208 | |
209 if (lle1 == NULL || lle2 == NULL) | |
210 return false; | |
211 if (LLE_CONSTANT (lle1) != LLE_CONSTANT (lle2)) | |
212 return false; | |
213 if (LLE_DENOMINATOR (lle1) != LLE_DENOMINATOR (lle2)) | |
214 return false; | |
215 for (i = 0; i < depth; i++) | |
216 if (LLE_COEFFICIENTS (lle1)[i] != LLE_COEFFICIENTS (lle2)[i]) | |
217 return false; | |
218 for (i = 0; i < invariants; i++) | |
219 if (LLE_INVARIANT_COEFFICIENTS (lle1)[i] != | |
220 LLE_INVARIANT_COEFFICIENTS (lle2)[i]) | |
221 return false; | |
222 return true; | |
223 } | |
224 | |
225 /* Create a new linear expression with dimension DIM, and total number | |
226 of invariants INVARIANTS. */ | |
227 | |
228 lambda_linear_expression | |
229 lambda_linear_expression_new (int dim, int invariants, | |
230 struct obstack * lambda_obstack) | |
231 { | |
232 lambda_linear_expression ret; | |
233 | |
234 ret = (lambda_linear_expression)obstack_alloc (lambda_obstack, | |
235 sizeof (*ret)); | |
236 LLE_COEFFICIENTS (ret) = lambda_vector_new (dim); | |
237 LLE_CONSTANT (ret) = 0; | |
238 LLE_INVARIANT_COEFFICIENTS (ret) = lambda_vector_new (invariants); | |
239 LLE_DENOMINATOR (ret) = 1; | |
240 LLE_NEXT (ret) = NULL; | |
241 | |
242 return ret; | |
243 } | |
244 | |
245 /* Print out a linear expression EXPR, with SIZE coefficients, to OUTFILE. | |
246 The starting letter used for variable names is START. */ | |
247 | |
248 static void | |
249 print_linear_expression (FILE * outfile, lambda_vector expr, int size, | |
250 char start) | |
251 { | |
252 int i; | |
253 bool first = true; | |
254 for (i = 0; i < size; i++) | |
255 { | |
256 if (expr[i] != 0) | |
257 { | |
258 if (first) | |
259 { | |
260 if (expr[i] < 0) | |
261 fprintf (outfile, "-"); | |
262 first = false; | |
263 } | |
264 else if (expr[i] > 0) | |
265 fprintf (outfile, " + "); | |
266 else | |
267 fprintf (outfile, " - "); | |
268 if (abs (expr[i]) == 1) | |
269 fprintf (outfile, "%c", start + i); | |
270 else | |
271 fprintf (outfile, "%d%c", abs (expr[i]), start + i); | |
272 } | |
273 } | |
274 } | |
275 | |
276 /* Print out a lambda linear expression structure, EXPR, to OUTFILE. The | |
277 depth/number of coefficients is given by DEPTH, the number of invariants is | |
278 given by INVARIANTS, and the character to start variable names with is given | |
279 by START. */ | |
280 | |
281 void | |
282 print_lambda_linear_expression (FILE * outfile, | |
283 lambda_linear_expression expr, | |
284 int depth, int invariants, char start) | |
285 { | |
286 fprintf (outfile, "\tLinear expression: "); | |
287 print_linear_expression (outfile, LLE_COEFFICIENTS (expr), depth, start); | |
288 fprintf (outfile, " constant: %d ", LLE_CONSTANT (expr)); | |
289 fprintf (outfile, " invariants: "); | |
290 print_linear_expression (outfile, LLE_INVARIANT_COEFFICIENTS (expr), | |
291 invariants, 'A'); | |
292 fprintf (outfile, " denominator: %d\n", LLE_DENOMINATOR (expr)); | |
293 } | |
294 | |
295 /* Print a lambda loop structure LOOP to OUTFILE. The depth/number of | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
296 coefficients is given by DEPTH, the number of invariants is |
0 | 297 given by INVARIANTS, and the character to start variable names with is given |
298 by START. */ | |
299 | |
300 void | |
301 print_lambda_loop (FILE * outfile, lambda_loop loop, int depth, | |
302 int invariants, char start) | |
303 { | |
304 int step; | |
305 lambda_linear_expression expr; | |
306 | |
307 gcc_assert (loop); | |
308 | |
309 expr = LL_LINEAR_OFFSET (loop); | |
310 step = LL_STEP (loop); | |
311 fprintf (outfile, " step size = %d \n", step); | |
312 | |
313 if (expr) | |
314 { | |
315 fprintf (outfile, " linear offset: \n"); | |
316 print_lambda_linear_expression (outfile, expr, depth, invariants, | |
317 start); | |
318 } | |
319 | |
320 fprintf (outfile, " lower bound: \n"); | |
321 for (expr = LL_LOWER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr)) | |
322 print_lambda_linear_expression (outfile, expr, depth, invariants, start); | |
323 fprintf (outfile, " upper bound: \n"); | |
324 for (expr = LL_UPPER_BOUND (loop); expr != NULL; expr = LLE_NEXT (expr)) | |
325 print_lambda_linear_expression (outfile, expr, depth, invariants, start); | |
326 } | |
327 | |
328 /* Create a new loop nest structure with DEPTH loops, and INVARIANTS as the | |
329 number of invariants. */ | |
330 | |
331 lambda_loopnest | |
332 lambda_loopnest_new (int depth, int invariants, | |
333 struct obstack * lambda_obstack) | |
334 { | |
335 lambda_loopnest ret; | |
336 ret = (lambda_loopnest)obstack_alloc (lambda_obstack, sizeof (*ret)); | |
337 | |
338 LN_LOOPS (ret) = (lambda_loop *) | |
339 obstack_alloc (lambda_obstack, depth * sizeof(LN_LOOPS(ret))); | |
340 LN_DEPTH (ret) = depth; | |
341 LN_INVARIANTS (ret) = invariants; | |
342 | |
343 return ret; | |
344 } | |
345 | |
346 /* Print a lambda loopnest structure, NEST, to OUTFILE. The starting | |
347 character to use for loop names is given by START. */ | |
348 | |
349 void | |
350 print_lambda_loopnest (FILE * outfile, lambda_loopnest nest, char start) | |
351 { | |
352 int i; | |
353 for (i = 0; i < LN_DEPTH (nest); i++) | |
354 { | |
355 fprintf (outfile, "Loop %c\n", start + i); | |
356 print_lambda_loop (outfile, LN_LOOPS (nest)[i], LN_DEPTH (nest), | |
357 LN_INVARIANTS (nest), 'i'); | |
358 fprintf (outfile, "\n"); | |
359 } | |
360 } | |
361 | |
362 /* Allocate a new lattice structure of DEPTH x DEPTH, with INVARIANTS number | |
363 of invariants. */ | |
364 | |
365 static lambda_lattice | |
366 lambda_lattice_new (int depth, int invariants, struct obstack * lambda_obstack) | |
367 { | |
368 lambda_lattice ret | |
369 = (lambda_lattice)obstack_alloc (lambda_obstack, sizeof (*ret)); | |
370 LATTICE_BASE (ret) = lambda_matrix_new (depth, depth); | |
371 LATTICE_ORIGIN (ret) = lambda_vector_new (depth); | |
372 LATTICE_ORIGIN_INVARIANTS (ret) = lambda_matrix_new (depth, invariants); | |
373 LATTICE_DIMENSION (ret) = depth; | |
374 LATTICE_INVARIANTS (ret) = invariants; | |
375 return ret; | |
376 } | |
377 | |
378 /* Compute the lattice base for NEST. The lattice base is essentially a | |
379 non-singular transform from a dense base space to a sparse iteration space. | |
380 We use it so that we don't have to specially handle the case of a sparse | |
381 iteration space in other parts of the algorithm. As a result, this routine | |
382 only does something interesting (IE produce a matrix that isn't the | |
383 identity matrix) if NEST is a sparse space. */ | |
384 | |
385 static lambda_lattice | |
386 lambda_lattice_compute_base (lambda_loopnest nest, | |
387 struct obstack * lambda_obstack) | |
388 { | |
389 lambda_lattice ret; | |
390 int depth, invariants; | |
391 lambda_matrix base; | |
392 | |
393 int i, j, step; | |
394 lambda_loop loop; | |
395 lambda_linear_expression expression; | |
396 | |
397 depth = LN_DEPTH (nest); | |
398 invariants = LN_INVARIANTS (nest); | |
399 | |
400 ret = lambda_lattice_new (depth, invariants, lambda_obstack); | |
401 base = LATTICE_BASE (ret); | |
402 for (i = 0; i < depth; i++) | |
403 { | |
404 loop = LN_LOOPS (nest)[i]; | |
405 gcc_assert (loop); | |
406 step = LL_STEP (loop); | |
407 /* If we have a step of 1, then the base is one, and the | |
408 origin and invariant coefficients are 0. */ | |
409 if (step == 1) | |
410 { | |
411 for (j = 0; j < depth; j++) | |
412 base[i][j] = 0; | |
413 base[i][i] = 1; | |
414 LATTICE_ORIGIN (ret)[i] = 0; | |
415 for (j = 0; j < invariants; j++) | |
416 LATTICE_ORIGIN_INVARIANTS (ret)[i][j] = 0; | |
417 } | |
418 else | |
419 { | |
420 /* Otherwise, we need the lower bound expression (which must | |
421 be an affine function) to determine the base. */ | |
422 expression = LL_LOWER_BOUND (loop); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
423 gcc_assert (expression && !LLE_NEXT (expression) |
0 | 424 && LLE_DENOMINATOR (expression) == 1); |
425 | |
426 /* The lower triangular portion of the base is going to be the | |
427 coefficient times the step */ | |
428 for (j = 0; j < i; j++) | |
429 base[i][j] = LLE_COEFFICIENTS (expression)[j] | |
430 * LL_STEP (LN_LOOPS (nest)[j]); | |
431 base[i][i] = step; | |
432 for (j = i + 1; j < depth; j++) | |
433 base[i][j] = 0; | |
434 | |
435 /* Origin for this loop is the constant of the lower bound | |
436 expression. */ | |
437 LATTICE_ORIGIN (ret)[i] = LLE_CONSTANT (expression); | |
438 | |
439 /* Coefficient for the invariants are equal to the invariant | |
440 coefficients in the expression. */ | |
441 for (j = 0; j < invariants; j++) | |
442 LATTICE_ORIGIN_INVARIANTS (ret)[i][j] = | |
443 LLE_INVARIANT_COEFFICIENTS (expression)[j]; | |
444 } | |
445 } | |
446 return ret; | |
447 } | |
448 | |
449 /* Compute the least common multiple of two numbers A and B . */ | |
450 | |
451 int | |
452 least_common_multiple (int a, int b) | |
453 { | |
454 return (abs (a) * abs (b) / gcd (a, b)); | |
455 } | |
456 | |
457 /* Perform Fourier-Motzkin elimination to calculate the bounds of the | |
458 auxiliary nest. | |
459 Fourier-Motzkin is a way of reducing systems of linear inequalities so that | |
460 it is easy to calculate the answer and bounds. | |
461 A sketch of how it works: | |
462 Given a system of linear inequalities, ai * xj >= bk, you can always | |
463 rewrite the constraints so they are all of the form | |
464 a <= x, or x <= b, or x >= constant for some x in x1 ... xj (and some b | |
465 in b1 ... bk, and some a in a1...ai) | |
466 You can then eliminate this x from the non-constant inequalities by | |
467 rewriting these as a <= b, x >= constant, and delete the x variable. | |
468 You can then repeat this for any remaining x variables, and then we have | |
469 an easy to use variable <= constant (or no variables at all) form that we | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
470 can construct our bounds from. |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
471 |
0 | 472 In our case, each time we eliminate, we construct part of the bound from |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
473 the ith variable, then delete the ith variable. |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
474 |
0 | 475 Remember the constant are in our vector a, our coefficient matrix is A, |
476 and our invariant coefficient matrix is B. | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
477 |
0 | 478 SIZE is the size of the matrices being passed. |
479 DEPTH is the loop nest depth. | |
480 INVARIANTS is the number of loop invariants. | |
481 A, B, and a are the coefficient matrix, invariant coefficient, and a | |
482 vector of constants, respectively. */ | |
483 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
484 static lambda_loopnest |
0 | 485 compute_nest_using_fourier_motzkin (int size, |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
486 int depth, |
0 | 487 int invariants, |
488 lambda_matrix A, | |
489 lambda_matrix B, | |
490 lambda_vector a, | |
491 struct obstack * lambda_obstack) | |
492 { | |
493 | |
494 int multiple, f1, f2; | |
495 int i, j, k; | |
496 lambda_linear_expression expression; | |
497 lambda_loop loop; | |
498 lambda_loopnest auxillary_nest; | |
499 lambda_matrix swapmatrix, A1, B1; | |
500 lambda_vector swapvector, a1; | |
501 int newsize; | |
502 | |
503 A1 = lambda_matrix_new (128, depth); | |
504 B1 = lambda_matrix_new (128, invariants); | |
505 a1 = lambda_vector_new (128); | |
506 | |
507 auxillary_nest = lambda_loopnest_new (depth, invariants, lambda_obstack); | |
508 | |
509 for (i = depth - 1; i >= 0; i--) | |
510 { | |
511 loop = lambda_loop_new (); | |
512 LN_LOOPS (auxillary_nest)[i] = loop; | |
513 LL_STEP (loop) = 1; | |
514 | |
515 for (j = 0; j < size; j++) | |
516 { | |
517 if (A[j][i] < 0) | |
518 { | |
519 /* Any linear expression in the matrix with a coefficient less | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
520 than 0 becomes part of the new lower bound. */ |
0 | 521 expression = lambda_linear_expression_new (depth, invariants, |
522 lambda_obstack); | |
523 | |
524 for (k = 0; k < i; k++) | |
525 LLE_COEFFICIENTS (expression)[k] = A[j][k]; | |
526 | |
527 for (k = 0; k < invariants; k++) | |
528 LLE_INVARIANT_COEFFICIENTS (expression)[k] = -1 * B[j][k]; | |
529 | |
530 LLE_DENOMINATOR (expression) = -1 * A[j][i]; | |
531 LLE_CONSTANT (expression) = -1 * a[j]; | |
532 | |
533 /* Ignore if identical to the existing lower bound. */ | |
534 if (!lle_equal (LL_LOWER_BOUND (loop), | |
535 expression, depth, invariants)) | |
536 { | |
537 LLE_NEXT (expression) = LL_LOWER_BOUND (loop); | |
538 LL_LOWER_BOUND (loop) = expression; | |
539 } | |
540 | |
541 } | |
542 else if (A[j][i] > 0) | |
543 { | |
544 /* Any linear expression with a coefficient greater than 0 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
545 becomes part of the new upper bound. */ |
0 | 546 expression = lambda_linear_expression_new (depth, invariants, |
547 lambda_obstack); | |
548 for (k = 0; k < i; k++) | |
549 LLE_COEFFICIENTS (expression)[k] = -1 * A[j][k]; | |
550 | |
551 for (k = 0; k < invariants; k++) | |
552 LLE_INVARIANT_COEFFICIENTS (expression)[k] = B[j][k]; | |
553 | |
554 LLE_DENOMINATOR (expression) = A[j][i]; | |
555 LLE_CONSTANT (expression) = a[j]; | |
556 | |
557 /* Ignore if identical to the existing upper bound. */ | |
558 if (!lle_equal (LL_UPPER_BOUND (loop), | |
559 expression, depth, invariants)) | |
560 { | |
561 LLE_NEXT (expression) = LL_UPPER_BOUND (loop); | |
562 LL_UPPER_BOUND (loop) = expression; | |
563 } | |
564 | |
565 } | |
566 } | |
567 | |
568 /* This portion creates a new system of linear inequalities by deleting | |
569 the i'th variable, reducing the system by one variable. */ | |
570 newsize = 0; | |
571 for (j = 0; j < size; j++) | |
572 { | |
573 /* If the coefficient for the i'th variable is 0, then we can just | |
574 eliminate the variable straightaway. Otherwise, we have to | |
575 multiply through by the coefficients we are eliminating. */ | |
576 if (A[j][i] == 0) | |
577 { | |
578 lambda_vector_copy (A[j], A1[newsize], depth); | |
579 lambda_vector_copy (B[j], B1[newsize], invariants); | |
580 a1[newsize] = a[j]; | |
581 newsize++; | |
582 } | |
583 else if (A[j][i] > 0) | |
584 { | |
585 for (k = 0; k < size; k++) | |
586 { | |
587 if (A[k][i] < 0) | |
588 { | |
589 multiple = least_common_multiple (A[j][i], A[k][i]); | |
590 f1 = multiple / A[j][i]; | |
591 f2 = -1 * multiple / A[k][i]; | |
592 | |
593 lambda_vector_add_mc (A[j], f1, A[k], f2, | |
594 A1[newsize], depth); | |
595 lambda_vector_add_mc (B[j], f1, B[k], f2, | |
596 B1[newsize], invariants); | |
597 a1[newsize] = f1 * a[j] + f2 * a[k]; | |
598 newsize++; | |
599 } | |
600 } | |
601 } | |
602 } | |
603 | |
604 swapmatrix = A; | |
605 A = A1; | |
606 A1 = swapmatrix; | |
607 | |
608 swapmatrix = B; | |
609 B = B1; | |
610 B1 = swapmatrix; | |
611 | |
612 swapvector = a; | |
613 a = a1; | |
614 a1 = swapvector; | |
615 | |
616 size = newsize; | |
617 } | |
618 | |
619 return auxillary_nest; | |
620 } | |
621 | |
622 /* Compute the loop bounds for the auxiliary space NEST. | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
623 Input system used is Ax <= b. TRANS is the unimodular transformation. |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
624 Given the original nest, this function will |
0 | 625 1. Convert the nest into matrix form, which consists of a matrix for the |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
626 coefficients, a matrix for the |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
627 invariant coefficients, and a vector for the constants. |
0 | 628 2. Use the matrix form to calculate the lattice base for the nest (which is |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
629 a dense space) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
630 3. Compose the dense space transform with the user specified transform, to |
0 | 631 get a transform we can easily calculate transformed bounds for. |
632 4. Multiply the composed transformation matrix times the matrix form of the | |
633 loop. | |
634 5. Transform the newly created matrix (from step 4) back into a loop nest | |
635 using Fourier-Motzkin elimination to figure out the bounds. */ | |
636 | |
637 static lambda_loopnest | |
638 lambda_compute_auxillary_space (lambda_loopnest nest, | |
639 lambda_trans_matrix trans, | |
640 struct obstack * lambda_obstack) | |
641 { | |
642 lambda_matrix A, B, A1, B1; | |
643 lambda_vector a, a1; | |
644 lambda_matrix invertedtrans; | |
645 int depth, invariants, size; | |
646 int i, j; | |
647 lambda_loop loop; | |
648 lambda_linear_expression expression; | |
649 lambda_lattice lattice; | |
650 | |
651 depth = LN_DEPTH (nest); | |
652 invariants = LN_INVARIANTS (nest); | |
653 | |
654 /* Unfortunately, we can't know the number of constraints we'll have | |
655 ahead of time, but this should be enough even in ridiculous loop nest | |
656 cases. We must not go over this limit. */ | |
657 A = lambda_matrix_new (128, depth); | |
658 B = lambda_matrix_new (128, invariants); | |
659 a = lambda_vector_new (128); | |
660 | |
661 A1 = lambda_matrix_new (128, depth); | |
662 B1 = lambda_matrix_new (128, invariants); | |
663 a1 = lambda_vector_new (128); | |
664 | |
665 /* Store the bounds in the equation matrix A, constant vector a, and | |
666 invariant matrix B, so that we have Ax <= a + B. | |
667 This requires a little equation rearranging so that everything is on the | |
668 correct side of the inequality. */ | |
669 size = 0; | |
670 for (i = 0; i < depth; i++) | |
671 { | |
672 loop = LN_LOOPS (nest)[i]; | |
673 | |
674 /* First we do the lower bound. */ | |
675 if (LL_STEP (loop) > 0) | |
676 expression = LL_LOWER_BOUND (loop); | |
677 else | |
678 expression = LL_UPPER_BOUND (loop); | |
679 | |
680 for (; expression != NULL; expression = LLE_NEXT (expression)) | |
681 { | |
682 /* Fill in the coefficient. */ | |
683 for (j = 0; j < i; j++) | |
684 A[size][j] = LLE_COEFFICIENTS (expression)[j]; | |
685 | |
686 /* And the invariant coefficient. */ | |
687 for (j = 0; j < invariants; j++) | |
688 B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j]; | |
689 | |
690 /* And the constant. */ | |
691 a[size] = LLE_CONSTANT (expression); | |
692 | |
693 /* Convert (2x+3y+2+b)/4 <= z to 2x+3y-4z <= -2-b. IE put all | |
694 constants and single variables on */ | |
695 A[size][i] = -1 * LLE_DENOMINATOR (expression); | |
696 a[size] *= -1; | |
697 for (j = 0; j < invariants; j++) | |
698 B[size][j] *= -1; | |
699 | |
700 size++; | |
701 /* Need to increase matrix sizes above. */ | |
702 gcc_assert (size <= 127); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
703 |
0 | 704 } |
705 | |
706 /* Then do the exact same thing for the upper bounds. */ | |
707 if (LL_STEP (loop) > 0) | |
708 expression = LL_UPPER_BOUND (loop); | |
709 else | |
710 expression = LL_LOWER_BOUND (loop); | |
711 | |
712 for (; expression != NULL; expression = LLE_NEXT (expression)) | |
713 { | |
714 /* Fill in the coefficient. */ | |
715 for (j = 0; j < i; j++) | |
716 A[size][j] = LLE_COEFFICIENTS (expression)[j]; | |
717 | |
718 /* And the invariant coefficient. */ | |
719 for (j = 0; j < invariants; j++) | |
720 B[size][j] = LLE_INVARIANT_COEFFICIENTS (expression)[j]; | |
721 | |
722 /* And the constant. */ | |
723 a[size] = LLE_CONSTANT (expression); | |
724 | |
725 /* Convert z <= (2x+3y+2+b)/4 to -2x-3y+4z <= 2+b. */ | |
726 for (j = 0; j < i; j++) | |
727 A[size][j] *= -1; | |
728 A[size][i] = LLE_DENOMINATOR (expression); | |
729 size++; | |
730 /* Need to increase matrix sizes above. */ | |
731 gcc_assert (size <= 127); | |
732 | |
733 } | |
734 } | |
735 | |
736 /* Compute the lattice base x = base * y + origin, where y is the | |
737 base space. */ | |
738 lattice = lambda_lattice_compute_base (nest, lambda_obstack); | |
739 | |
740 /* Ax <= a + B then becomes ALy <= a+B - A*origin. L is the lattice base */ | |
741 | |
742 /* A1 = A * L */ | |
743 lambda_matrix_mult (A, LATTICE_BASE (lattice), A1, size, depth, depth); | |
744 | |
745 /* a1 = a - A * origin constant. */ | |
746 lambda_matrix_vector_mult (A, size, depth, LATTICE_ORIGIN (lattice), a1); | |
747 lambda_vector_add_mc (a, 1, a1, -1, a1, size); | |
748 | |
749 /* B1 = B - A * origin invariant. */ | |
750 lambda_matrix_mult (A, LATTICE_ORIGIN_INVARIANTS (lattice), B1, size, depth, | |
751 invariants); | |
752 lambda_matrix_add_mc (B, 1, B1, -1, B1, size, invariants); | |
753 | |
754 /* Now compute the auxiliary space bounds by first inverting U, multiplying | |
755 it by A1, then performing Fourier-Motzkin. */ | |
756 | |
757 invertedtrans = lambda_matrix_new (depth, depth); | |
758 | |
759 /* Compute the inverse of U. */ | |
760 lambda_matrix_inverse (LTM_MATRIX (trans), | |
761 invertedtrans, depth); | |
762 | |
763 /* A = A1 inv(U). */ | |
764 lambda_matrix_mult (A1, invertedtrans, A, size, depth, depth); | |
765 | |
766 return compute_nest_using_fourier_motzkin (size, depth, invariants, | |
767 A, B1, a1, lambda_obstack); | |
768 } | |
769 | |
770 /* Compute the loop bounds for the target space, using the bounds of | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
771 the auxiliary nest AUXILLARY_NEST, and the triangular matrix H. |
0 | 772 The target space loop bounds are computed by multiplying the triangular |
773 matrix H by the auxiliary nest, to get the new loop bounds. The sign of | |
774 the loop steps (positive or negative) is then used to swap the bounds if | |
775 the loop counts downwards. | |
776 Return the target loopnest. */ | |
777 | |
778 static lambda_loopnest | |
779 lambda_compute_target_space (lambda_loopnest auxillary_nest, | |
780 lambda_trans_matrix H, lambda_vector stepsigns, | |
781 struct obstack * lambda_obstack) | |
782 { | |
783 lambda_matrix inverse, H1; | |
784 int determinant, i, j; | |
785 int gcd1, gcd2; | |
786 int factor; | |
787 | |
788 lambda_loopnest target_nest; | |
789 int depth, invariants; | |
790 lambda_matrix target; | |
791 | |
792 lambda_loop auxillary_loop, target_loop; | |
793 lambda_linear_expression expression, auxillary_expr, target_expr, tmp_expr; | |
794 | |
795 depth = LN_DEPTH (auxillary_nest); | |
796 invariants = LN_INVARIANTS (auxillary_nest); | |
797 | |
798 inverse = lambda_matrix_new (depth, depth); | |
799 determinant = lambda_matrix_inverse (LTM_MATRIX (H), inverse, depth); | |
800 | |
801 /* H1 is H excluding its diagonal. */ | |
802 H1 = lambda_matrix_new (depth, depth); | |
803 lambda_matrix_copy (LTM_MATRIX (H), H1, depth, depth); | |
804 | |
805 for (i = 0; i < depth; i++) | |
806 H1[i][i] = 0; | |
807 | |
808 /* Computes the linear offsets of the loop bounds. */ | |
809 target = lambda_matrix_new (depth, depth); | |
810 lambda_matrix_mult (H1, inverse, target, depth, depth, depth); | |
811 | |
812 target_nest = lambda_loopnest_new (depth, invariants, lambda_obstack); | |
813 | |
814 for (i = 0; i < depth; i++) | |
815 { | |
816 | |
817 /* Get a new loop structure. */ | |
818 target_loop = lambda_loop_new (); | |
819 LN_LOOPS (target_nest)[i] = target_loop; | |
820 | |
821 /* Computes the gcd of the coefficients of the linear part. */ | |
822 gcd1 = lambda_vector_gcd (target[i], i); | |
823 | |
824 /* Include the denominator in the GCD. */ | |
825 gcd1 = gcd (gcd1, determinant); | |
826 | |
827 /* Now divide through by the gcd. */ | |
828 for (j = 0; j < i; j++) | |
829 target[i][j] = target[i][j] / gcd1; | |
830 | |
831 expression = lambda_linear_expression_new (depth, invariants, | |
832 lambda_obstack); | |
833 lambda_vector_copy (target[i], LLE_COEFFICIENTS (expression), depth); | |
834 LLE_DENOMINATOR (expression) = determinant / gcd1; | |
835 LLE_CONSTANT (expression) = 0; | |
836 lambda_vector_clear (LLE_INVARIANT_COEFFICIENTS (expression), | |
837 invariants); | |
838 LL_LINEAR_OFFSET (target_loop) = expression; | |
839 } | |
840 | |
841 /* For each loop, compute the new bounds from H. */ | |
842 for (i = 0; i < depth; i++) | |
843 { | |
844 auxillary_loop = LN_LOOPS (auxillary_nest)[i]; | |
845 target_loop = LN_LOOPS (target_nest)[i]; | |
846 LL_STEP (target_loop) = LTM_MATRIX (H)[i][i]; | |
847 factor = LTM_MATRIX (H)[i][i]; | |
848 | |
849 /* First we do the lower bound. */ | |
850 auxillary_expr = LL_LOWER_BOUND (auxillary_loop); | |
851 | |
852 for (; auxillary_expr != NULL; | |
853 auxillary_expr = LLE_NEXT (auxillary_expr)) | |
854 { | |
855 target_expr = lambda_linear_expression_new (depth, invariants, | |
856 lambda_obstack); | |
857 lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr), | |
858 depth, inverse, depth, | |
859 LLE_COEFFICIENTS (target_expr)); | |
860 lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr), | |
861 LLE_COEFFICIENTS (target_expr), depth, | |
862 factor); | |
863 | |
864 LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor; | |
865 lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr), | |
866 LLE_INVARIANT_COEFFICIENTS (target_expr), | |
867 invariants); | |
868 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr), | |
869 LLE_INVARIANT_COEFFICIENTS (target_expr), | |
870 invariants, factor); | |
871 LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr); | |
872 | |
873 if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth)) | |
874 { | |
875 LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr) | |
876 * determinant; | |
877 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS | |
878 (target_expr), | |
879 LLE_INVARIANT_COEFFICIENTS | |
880 (target_expr), invariants, | |
881 determinant); | |
882 LLE_DENOMINATOR (target_expr) = | |
883 LLE_DENOMINATOR (target_expr) * determinant; | |
884 } | |
885 /* Find the gcd and divide by it here, rather than doing it | |
886 at the tree level. */ | |
887 gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth); | |
888 gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr), | |
889 invariants); | |
890 gcd1 = gcd (gcd1, gcd2); | |
891 gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr)); | |
892 gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr)); | |
893 for (j = 0; j < depth; j++) | |
894 LLE_COEFFICIENTS (target_expr)[j] /= gcd1; | |
895 for (j = 0; j < invariants; j++) | |
896 LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1; | |
897 LLE_CONSTANT (target_expr) /= gcd1; | |
898 LLE_DENOMINATOR (target_expr) /= gcd1; | |
899 /* Ignore if identical to existing bound. */ | |
900 if (!lle_equal (LL_LOWER_BOUND (target_loop), target_expr, depth, | |
901 invariants)) | |
902 { | |
903 LLE_NEXT (target_expr) = LL_LOWER_BOUND (target_loop); | |
904 LL_LOWER_BOUND (target_loop) = target_expr; | |
905 } | |
906 } | |
907 /* Now do the upper bound. */ | |
908 auxillary_expr = LL_UPPER_BOUND (auxillary_loop); | |
909 | |
910 for (; auxillary_expr != NULL; | |
911 auxillary_expr = LLE_NEXT (auxillary_expr)) | |
912 { | |
913 target_expr = lambda_linear_expression_new (depth, invariants, | |
914 lambda_obstack); | |
915 lambda_vector_matrix_mult (LLE_COEFFICIENTS (auxillary_expr), | |
916 depth, inverse, depth, | |
917 LLE_COEFFICIENTS (target_expr)); | |
918 lambda_vector_mult_const (LLE_COEFFICIENTS (target_expr), | |
919 LLE_COEFFICIENTS (target_expr), depth, | |
920 factor); | |
921 LLE_CONSTANT (target_expr) = LLE_CONSTANT (auxillary_expr) * factor; | |
922 lambda_vector_copy (LLE_INVARIANT_COEFFICIENTS (auxillary_expr), | |
923 LLE_INVARIANT_COEFFICIENTS (target_expr), | |
924 invariants); | |
925 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS (target_expr), | |
926 LLE_INVARIANT_COEFFICIENTS (target_expr), | |
927 invariants, factor); | |
928 LLE_DENOMINATOR (target_expr) = LLE_DENOMINATOR (auxillary_expr); | |
929 | |
930 if (!lambda_vector_zerop (LLE_COEFFICIENTS (target_expr), depth)) | |
931 { | |
932 LLE_CONSTANT (target_expr) = LLE_CONSTANT (target_expr) | |
933 * determinant; | |
934 lambda_vector_mult_const (LLE_INVARIANT_COEFFICIENTS | |
935 (target_expr), | |
936 LLE_INVARIANT_COEFFICIENTS | |
937 (target_expr), invariants, | |
938 determinant); | |
939 LLE_DENOMINATOR (target_expr) = | |
940 LLE_DENOMINATOR (target_expr) * determinant; | |
941 } | |
942 /* Find the gcd and divide by it here, instead of at the | |
943 tree level. */ | |
944 gcd1 = lambda_vector_gcd (LLE_COEFFICIENTS (target_expr), depth); | |
945 gcd2 = lambda_vector_gcd (LLE_INVARIANT_COEFFICIENTS (target_expr), | |
946 invariants); | |
947 gcd1 = gcd (gcd1, gcd2); | |
948 gcd1 = gcd (gcd1, LLE_CONSTANT (target_expr)); | |
949 gcd1 = gcd (gcd1, LLE_DENOMINATOR (target_expr)); | |
950 for (j = 0; j < depth; j++) | |
951 LLE_COEFFICIENTS (target_expr)[j] /= gcd1; | |
952 for (j = 0; j < invariants; j++) | |
953 LLE_INVARIANT_COEFFICIENTS (target_expr)[j] /= gcd1; | |
954 LLE_CONSTANT (target_expr) /= gcd1; | |
955 LLE_DENOMINATOR (target_expr) /= gcd1; | |
956 /* Ignore if equal to existing bound. */ | |
957 if (!lle_equal (LL_UPPER_BOUND (target_loop), target_expr, depth, | |
958 invariants)) | |
959 { | |
960 LLE_NEXT (target_expr) = LL_UPPER_BOUND (target_loop); | |
961 LL_UPPER_BOUND (target_loop) = target_expr; | |
962 } | |
963 } | |
964 } | |
965 for (i = 0; i < depth; i++) | |
966 { | |
967 target_loop = LN_LOOPS (target_nest)[i]; | |
968 /* If necessary, exchange the upper and lower bounds and negate | |
969 the step size. */ | |
970 if (stepsigns[i] < 0) | |
971 { | |
972 LL_STEP (target_loop) *= -1; | |
973 tmp_expr = LL_LOWER_BOUND (target_loop); | |
974 LL_LOWER_BOUND (target_loop) = LL_UPPER_BOUND (target_loop); | |
975 LL_UPPER_BOUND (target_loop) = tmp_expr; | |
976 } | |
977 } | |
978 return target_nest; | |
979 } | |
980 | |
981 /* Compute the step signs of TRANS, using TRANS and stepsigns. Return the new | |
982 result. */ | |
983 | |
984 static lambda_vector | |
985 lambda_compute_step_signs (lambda_trans_matrix trans, lambda_vector stepsigns) | |
986 { | |
987 lambda_matrix matrix, H; | |
988 int size; | |
989 lambda_vector newsteps; | |
990 int i, j, factor, minimum_column; | |
991 int temp; | |
992 | |
993 matrix = LTM_MATRIX (trans); | |
994 size = LTM_ROWSIZE (trans); | |
995 H = lambda_matrix_new (size, size); | |
996 | |
997 newsteps = lambda_vector_new (size); | |
998 lambda_vector_copy (stepsigns, newsteps, size); | |
999 | |
1000 lambda_matrix_copy (matrix, H, size, size); | |
1001 | |
1002 for (j = 0; j < size; j++) | |
1003 { | |
1004 lambda_vector row; | |
1005 row = H[j]; | |
1006 for (i = j; i < size; i++) | |
1007 if (row[i] < 0) | |
1008 lambda_matrix_col_negate (H, size, i); | |
1009 while (lambda_vector_first_nz (row, size, j + 1) < size) | |
1010 { | |
1011 minimum_column = lambda_vector_min_nz (row, size, j); | |
1012 lambda_matrix_col_exchange (H, size, j, minimum_column); | |
1013 | |
1014 temp = newsteps[j]; | |
1015 newsteps[j] = newsteps[minimum_column]; | |
1016 newsteps[minimum_column] = temp; | |
1017 | |
1018 for (i = j + 1; i < size; i++) | |
1019 { | |
1020 factor = row[i] / row[j]; | |
1021 lambda_matrix_col_add (H, size, j, i, -1 * factor); | |
1022 } | |
1023 } | |
1024 } | |
1025 return newsteps; | |
1026 } | |
1027 | |
1028 /* Transform NEST according to TRANS, and return the new loopnest. | |
1029 This involves | |
1030 1. Computing a lattice base for the transformation | |
1031 2. Composing the dense base with the specified transformation (TRANS) | |
1032 3. Decomposing the combined transformation into a lower triangular portion, | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1033 and a unimodular portion. |
0 | 1034 4. Computing the auxiliary nest using the unimodular portion. |
1035 5. Computing the target nest using the auxiliary nest and the lower | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1036 triangular portion. */ |
0 | 1037 |
1038 lambda_loopnest | |
1039 lambda_loopnest_transform (lambda_loopnest nest, lambda_trans_matrix trans, | |
1040 struct obstack * lambda_obstack) | |
1041 { | |
1042 lambda_loopnest auxillary_nest, target_nest; | |
1043 | |
1044 int depth, invariants; | |
1045 int i, j; | |
1046 lambda_lattice lattice; | |
1047 lambda_trans_matrix trans1, H, U; | |
1048 lambda_loop loop; | |
1049 lambda_linear_expression expression; | |
1050 lambda_vector origin; | |
1051 lambda_matrix origin_invariants; | |
1052 lambda_vector stepsigns; | |
1053 int f; | |
1054 | |
1055 depth = LN_DEPTH (nest); | |
1056 invariants = LN_INVARIANTS (nest); | |
1057 | |
1058 /* Keep track of the signs of the loop steps. */ | |
1059 stepsigns = lambda_vector_new (depth); | |
1060 for (i = 0; i < depth; i++) | |
1061 { | |
1062 if (LL_STEP (LN_LOOPS (nest)[i]) > 0) | |
1063 stepsigns[i] = 1; | |
1064 else | |
1065 stepsigns[i] = -1; | |
1066 } | |
1067 | |
1068 /* Compute the lattice base. */ | |
1069 lattice = lambda_lattice_compute_base (nest, lambda_obstack); | |
1070 trans1 = lambda_trans_matrix_new (depth, depth); | |
1071 | |
1072 /* Multiply the transformation matrix by the lattice base. */ | |
1073 | |
1074 lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_BASE (lattice), | |
1075 LTM_MATRIX (trans1), depth, depth, depth); | |
1076 | |
1077 /* Compute the Hermite normal form for the new transformation matrix. */ | |
1078 H = lambda_trans_matrix_new (depth, depth); | |
1079 U = lambda_trans_matrix_new (depth, depth); | |
1080 lambda_matrix_hermite (LTM_MATRIX (trans1), depth, LTM_MATRIX (H), | |
1081 LTM_MATRIX (U)); | |
1082 | |
1083 /* Compute the auxiliary loop nest's space from the unimodular | |
1084 portion. */ | |
1085 auxillary_nest = lambda_compute_auxillary_space (nest, U, lambda_obstack); | |
1086 | |
1087 /* Compute the loop step signs from the old step signs and the | |
1088 transformation matrix. */ | |
1089 stepsigns = lambda_compute_step_signs (trans1, stepsigns); | |
1090 | |
1091 /* Compute the target loop nest space from the auxiliary nest and | |
1092 the lower triangular matrix H. */ | |
1093 target_nest = lambda_compute_target_space (auxillary_nest, H, stepsigns, | |
1094 lambda_obstack); | |
1095 origin = lambda_vector_new (depth); | |
1096 origin_invariants = lambda_matrix_new (depth, invariants); | |
1097 lambda_matrix_vector_mult (LTM_MATRIX (trans), depth, depth, | |
1098 LATTICE_ORIGIN (lattice), origin); | |
1099 lambda_matrix_mult (LTM_MATRIX (trans), LATTICE_ORIGIN_INVARIANTS (lattice), | |
1100 origin_invariants, depth, depth, invariants); | |
1101 | |
1102 for (i = 0; i < depth; i++) | |
1103 { | |
1104 loop = LN_LOOPS (target_nest)[i]; | |
1105 expression = LL_LINEAR_OFFSET (loop); | |
1106 if (lambda_vector_zerop (LLE_COEFFICIENTS (expression), depth)) | |
1107 f = 1; | |
1108 else | |
1109 f = LLE_DENOMINATOR (expression); | |
1110 | |
1111 LLE_CONSTANT (expression) += f * origin[i]; | |
1112 | |
1113 for (j = 0; j < invariants; j++) | |
1114 LLE_INVARIANT_COEFFICIENTS (expression)[j] += | |
1115 f * origin_invariants[i][j]; | |
1116 } | |
1117 | |
1118 return target_nest; | |
1119 | |
1120 } | |
1121 | |
1122 /* Convert a gcc tree expression EXPR to a lambda linear expression, and | |
1123 return the new expression. DEPTH is the depth of the loopnest. | |
1124 OUTERINDUCTIONVARS is an array of the induction variables for outer loops | |
1125 in this nest. INVARIANTS is the array of invariants for the loop. EXTRA | |
1126 is the amount we have to add/subtract from the expression because of the | |
1127 type of comparison it is used in. */ | |
1128 | |
1129 static lambda_linear_expression | |
1130 gcc_tree_to_linear_expression (int depth, tree expr, | |
1131 VEC(tree,heap) *outerinductionvars, | |
1132 VEC(tree,heap) *invariants, int extra, | |
1133 struct obstack * lambda_obstack) | |
1134 { | |
1135 lambda_linear_expression lle = NULL; | |
1136 switch (TREE_CODE (expr)) | |
1137 { | |
1138 case INTEGER_CST: | |
1139 { | |
1140 lle = lambda_linear_expression_new (depth, 2 * depth, lambda_obstack); | |
1141 LLE_CONSTANT (lle) = TREE_INT_CST_LOW (expr); | |
1142 if (extra != 0) | |
1143 LLE_CONSTANT (lle) += extra; | |
1144 | |
1145 LLE_DENOMINATOR (lle) = 1; | |
1146 } | |
1147 break; | |
1148 case SSA_NAME: | |
1149 { | |
1150 tree iv, invar; | |
1151 size_t i; | |
1152 for (i = 0; VEC_iterate (tree, outerinductionvars, i, iv); i++) | |
1153 if (iv != NULL) | |
1154 { | |
1155 if (SSA_NAME_VAR (iv) == SSA_NAME_VAR (expr)) | |
1156 { | |
1157 lle = lambda_linear_expression_new (depth, 2 * depth, | |
1158 lambda_obstack); | |
1159 LLE_COEFFICIENTS (lle)[i] = 1; | |
1160 if (extra != 0) | |
1161 LLE_CONSTANT (lle) = extra; | |
1162 | |
1163 LLE_DENOMINATOR (lle) = 1; | |
1164 } | |
1165 } | |
1166 for (i = 0; VEC_iterate (tree, invariants, i, invar); i++) | |
1167 if (invar != NULL) | |
1168 { | |
1169 if (SSA_NAME_VAR (invar) == SSA_NAME_VAR (expr)) | |
1170 { | |
1171 lle = lambda_linear_expression_new (depth, 2 * depth, | |
1172 lambda_obstack); | |
1173 LLE_INVARIANT_COEFFICIENTS (lle)[i] = 1; | |
1174 if (extra != 0) | |
1175 LLE_CONSTANT (lle) = extra; | |
1176 LLE_DENOMINATOR (lle) = 1; | |
1177 } | |
1178 } | |
1179 } | |
1180 break; | |
1181 default: | |
1182 return NULL; | |
1183 } | |
1184 | |
1185 return lle; | |
1186 } | |
1187 | |
1188 /* Return the depth of the loopnest NEST */ | |
1189 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1190 static int |
0 | 1191 depth_of_nest (struct loop *nest) |
1192 { | |
1193 size_t depth = 0; | |
1194 while (nest) | |
1195 { | |
1196 depth++; | |
1197 nest = nest->inner; | |
1198 } | |
1199 return depth; | |
1200 } | |
1201 | |
1202 | |
1203 /* Return true if OP is invariant in LOOP and all outer loops. */ | |
1204 | |
1205 static bool | |
1206 invariant_in_loop_and_outer_loops (struct loop *loop, tree op) | |
1207 { | |
1208 if (is_gimple_min_invariant (op)) | |
1209 return true; | |
1210 if (loop_depth (loop) == 0) | |
1211 return true; | |
1212 if (!expr_invariant_in_loop_p (loop, op)) | |
1213 return false; | |
1214 if (!invariant_in_loop_and_outer_loops (loop_outer (loop), op)) | |
1215 return false; | |
1216 return true; | |
1217 } | |
1218 | |
1219 /* Generate a lambda loop from a gcc loop LOOP. Return the new lambda loop, | |
1220 or NULL if it could not be converted. | |
1221 DEPTH is the depth of the loop. | |
1222 INVARIANTS is a pointer to the array of loop invariants. | |
1223 The induction variable for this loop should be stored in the parameter | |
1224 OURINDUCTIONVAR. | |
1225 OUTERINDUCTIONVARS is an array of induction variables for outer loops. */ | |
1226 | |
1227 static lambda_loop | |
1228 gcc_loop_to_lambda_loop (struct loop *loop, int depth, | |
1229 VEC(tree,heap) ** invariants, | |
1230 tree * ourinductionvar, | |
1231 VEC(tree,heap) * outerinductionvars, | |
1232 VEC(tree,heap) ** lboundvars, | |
1233 VEC(tree,heap) ** uboundvars, | |
1234 VEC(int,heap) ** steps, | |
1235 struct obstack * lambda_obstack) | |
1236 { | |
1237 gimple phi; | |
1238 gimple exit_cond; | |
1239 tree access_fn, inductionvar; | |
1240 tree step; | |
1241 lambda_loop lloop = NULL; | |
1242 lambda_linear_expression lbound, ubound; | |
1243 tree test_lhs, test_rhs; | |
1244 int stepint; | |
1245 int extra = 0; | |
1246 tree lboundvar, uboundvar, uboundresult; | |
1247 | |
1248 /* Find out induction var and exit condition. */ | |
1249 inductionvar = find_induction_var_from_exit_cond (loop); | |
1250 exit_cond = get_loop_exit_condition (loop); | |
1251 | |
1252 if (inductionvar == NULL || exit_cond == NULL) | |
1253 { | |
1254 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1255 fprintf (dump_file, | |
1256 "Unable to convert loop: Cannot determine exit condition or induction variable for loop.\n"); | |
1257 return NULL; | |
1258 } | |
1259 | |
1260 if (SSA_NAME_DEF_STMT (inductionvar) == NULL) | |
1261 { | |
1262 | |
1263 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1264 fprintf (dump_file, | |
1265 "Unable to convert loop: Cannot find PHI node for induction variable\n"); | |
1266 | |
1267 return NULL; | |
1268 } | |
1269 | |
1270 phi = SSA_NAME_DEF_STMT (inductionvar); | |
1271 if (gimple_code (phi) != GIMPLE_PHI) | |
1272 { | |
1273 tree op = SINGLE_SSA_TREE_OPERAND (phi, SSA_OP_USE); | |
1274 if (!op) | |
1275 { | |
1276 | |
1277 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1278 fprintf (dump_file, | |
1279 "Unable to convert loop: Cannot find PHI node for induction variable\n"); | |
1280 | |
1281 return NULL; | |
1282 } | |
1283 | |
1284 phi = SSA_NAME_DEF_STMT (op); | |
1285 if (gimple_code (phi) != GIMPLE_PHI) | |
1286 { | |
1287 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1288 fprintf (dump_file, | |
1289 "Unable to convert loop: Cannot find PHI node for induction variable\n"); | |
1290 return NULL; | |
1291 } | |
1292 } | |
1293 | |
1294 /* The induction variable name/version we want to put in the array is the | |
1295 result of the induction variable phi node. */ | |
1296 *ourinductionvar = PHI_RESULT (phi); | |
1297 access_fn = instantiate_parameters | |
1298 (loop, analyze_scalar_evolution (loop, PHI_RESULT (phi))); | |
1299 if (access_fn == chrec_dont_know) | |
1300 { | |
1301 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1302 fprintf (dump_file, | |
1303 "Unable to convert loop: Access function for induction variable phi is unknown\n"); | |
1304 | |
1305 return NULL; | |
1306 } | |
1307 | |
1308 step = evolution_part_in_loop_num (access_fn, loop->num); | |
1309 if (!step || step == chrec_dont_know) | |
1310 { | |
1311 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1312 fprintf (dump_file, | |
1313 "Unable to convert loop: Cannot determine step of loop.\n"); | |
1314 | |
1315 return NULL; | |
1316 } | |
1317 if (TREE_CODE (step) != INTEGER_CST) | |
1318 { | |
1319 | |
1320 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1321 fprintf (dump_file, | |
1322 "Unable to convert loop: Step of loop is not integer.\n"); | |
1323 return NULL; | |
1324 } | |
1325 | |
1326 stepint = TREE_INT_CST_LOW (step); | |
1327 | |
1328 /* Only want phis for induction vars, which will have two | |
1329 arguments. */ | |
1330 if (gimple_phi_num_args (phi) != 2) | |
1331 { | |
1332 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1333 fprintf (dump_file, | |
1334 "Unable to convert loop: PHI node for induction variable has >2 arguments\n"); | |
1335 return NULL; | |
1336 } | |
1337 | |
1338 /* Another induction variable check. One argument's source should be | |
1339 in the loop, one outside the loop. */ | |
1340 if (flow_bb_inside_loop_p (loop, gimple_phi_arg_edge (phi, 0)->src) | |
1341 && flow_bb_inside_loop_p (loop, gimple_phi_arg_edge (phi, 1)->src)) | |
1342 { | |
1343 | |
1344 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1345 fprintf (dump_file, | |
1346 "Unable to convert loop: PHI edges both inside loop, or both outside loop.\n"); | |
1347 | |
1348 return NULL; | |
1349 } | |
1350 | |
1351 if (flow_bb_inside_loop_p (loop, gimple_phi_arg_edge (phi, 0)->src)) | |
1352 { | |
1353 lboundvar = PHI_ARG_DEF (phi, 1); | |
1354 lbound = gcc_tree_to_linear_expression (depth, lboundvar, | |
1355 outerinductionvars, *invariants, | |
1356 0, lambda_obstack); | |
1357 } | |
1358 else | |
1359 { | |
1360 lboundvar = PHI_ARG_DEF (phi, 0); | |
1361 lbound = gcc_tree_to_linear_expression (depth, lboundvar, | |
1362 outerinductionvars, *invariants, | |
1363 0, lambda_obstack); | |
1364 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1365 |
0 | 1366 if (!lbound) |
1367 { | |
1368 | |
1369 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1370 fprintf (dump_file, | |
1371 "Unable to convert loop: Cannot convert lower bound to linear expression\n"); | |
1372 | |
1373 return NULL; | |
1374 } | |
1375 /* One part of the test may be a loop invariant tree. */ | |
1376 VEC_reserve (tree, heap, *invariants, 1); | |
1377 test_lhs = gimple_cond_lhs (exit_cond); | |
1378 test_rhs = gimple_cond_rhs (exit_cond); | |
1379 | |
1380 if (TREE_CODE (test_rhs) == SSA_NAME | |
1381 && invariant_in_loop_and_outer_loops (loop, test_rhs)) | |
1382 VEC_quick_push (tree, *invariants, test_rhs); | |
1383 else if (TREE_CODE (test_lhs) == SSA_NAME | |
1384 && invariant_in_loop_and_outer_loops (loop, test_lhs)) | |
1385 VEC_quick_push (tree, *invariants, test_lhs); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1386 |
0 | 1387 /* The non-induction variable part of the test is the upper bound variable. |
1388 */ | |
1389 if (test_lhs == inductionvar) | |
1390 uboundvar = test_rhs; | |
1391 else | |
1392 uboundvar = test_lhs; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1393 |
0 | 1394 /* We only size the vectors assuming we have, at max, 2 times as many |
1395 invariants as we do loops (one for each bound). | |
1396 This is just an arbitrary number, but it has to be matched against the | |
1397 code below. */ | |
1398 gcc_assert (VEC_length (tree, *invariants) <= (unsigned int) (2 * depth)); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1399 |
0 | 1400 |
1401 /* We might have some leftover. */ | |
1402 if (gimple_cond_code (exit_cond) == LT_EXPR) | |
1403 extra = -1 * stepint; | |
1404 else if (gimple_cond_code (exit_cond) == NE_EXPR) | |
1405 extra = -1 * stepint; | |
1406 else if (gimple_cond_code (exit_cond) == GT_EXPR) | |
1407 extra = -1 * stepint; | |
1408 else if (gimple_cond_code (exit_cond) == EQ_EXPR) | |
1409 extra = 1 * stepint; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1410 |
0 | 1411 ubound = gcc_tree_to_linear_expression (depth, uboundvar, |
1412 outerinductionvars, | |
1413 *invariants, extra, lambda_obstack); | |
1414 uboundresult = build2 (PLUS_EXPR, TREE_TYPE (uboundvar), uboundvar, | |
1415 build_int_cst (TREE_TYPE (uboundvar), extra)); | |
1416 VEC_safe_push (tree, heap, *uboundvars, uboundresult); | |
1417 VEC_safe_push (tree, heap, *lboundvars, lboundvar); | |
1418 VEC_safe_push (int, heap, *steps, stepint); | |
1419 if (!ubound) | |
1420 { | |
1421 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1422 fprintf (dump_file, | |
1423 "Unable to convert loop: Cannot convert upper bound to linear expression\n"); | |
1424 return NULL; | |
1425 } | |
1426 | |
1427 lloop = lambda_loop_new (); | |
1428 LL_STEP (lloop) = stepint; | |
1429 LL_LOWER_BOUND (lloop) = lbound; | |
1430 LL_UPPER_BOUND (lloop) = ubound; | |
1431 return lloop; | |
1432 } | |
1433 | |
1434 /* Given a LOOP, find the induction variable it is testing against in the exit | |
1435 condition. Return the induction variable if found, NULL otherwise. */ | |
1436 | |
1437 tree | |
1438 find_induction_var_from_exit_cond (struct loop *loop) | |
1439 { | |
1440 gimple expr = get_loop_exit_condition (loop); | |
1441 tree ivarop; | |
1442 tree test_lhs, test_rhs; | |
1443 if (expr == NULL) | |
1444 return NULL_TREE; | |
1445 if (gimple_code (expr) != GIMPLE_COND) | |
1446 return NULL_TREE; | |
1447 test_lhs = gimple_cond_lhs (expr); | |
1448 test_rhs = gimple_cond_rhs (expr); | |
1449 | |
1450 /* Find the side that is invariant in this loop. The ivar must be the other | |
1451 side. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1452 |
0 | 1453 if (expr_invariant_in_loop_p (loop, test_lhs)) |
1454 ivarop = test_rhs; | |
1455 else if (expr_invariant_in_loop_p (loop, test_rhs)) | |
1456 ivarop = test_lhs; | |
1457 else | |
1458 return NULL_TREE; | |
1459 | |
1460 if (TREE_CODE (ivarop) != SSA_NAME) | |
1461 return NULL_TREE; | |
1462 return ivarop; | |
1463 } | |
1464 | |
1465 DEF_VEC_P(lambda_loop); | |
1466 DEF_VEC_ALLOC_P(lambda_loop,heap); | |
1467 | |
1468 /* Generate a lambda loopnest from a gcc loopnest LOOP_NEST. | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1469 Return the new loop nest. |
0 | 1470 INDUCTIONVARS is a pointer to an array of induction variables for the |
1471 loopnest that will be filled in during this process. | |
1472 INVARIANTS is a pointer to an array of invariants that will be filled in | |
1473 during this process. */ | |
1474 | |
1475 lambda_loopnest | |
1476 gcc_loopnest_to_lambda_loopnest (struct loop *loop_nest, | |
1477 VEC(tree,heap) **inductionvars, | |
1478 VEC(tree,heap) **invariants, | |
1479 struct obstack * lambda_obstack) | |
1480 { | |
1481 lambda_loopnest ret = NULL; | |
1482 struct loop *temp = loop_nest; | |
1483 int depth = depth_of_nest (loop_nest); | |
1484 size_t i; | |
1485 VEC(lambda_loop,heap) *loops = NULL; | |
1486 VEC(tree,heap) *uboundvars = NULL; | |
1487 VEC(tree,heap) *lboundvars = NULL; | |
1488 VEC(int,heap) *steps = NULL; | |
1489 lambda_loop newloop; | |
1490 tree inductionvar = NULL; | |
1491 bool perfect_nest = perfect_nest_p (loop_nest); | |
1492 | |
1493 if (!perfect_nest && !can_convert_to_perfect_nest (loop_nest)) | |
1494 goto fail; | |
1495 | |
1496 while (temp) | |
1497 { | |
1498 newloop = gcc_loop_to_lambda_loop (temp, depth, invariants, | |
1499 &inductionvar, *inductionvars, | |
1500 &lboundvars, &uboundvars, | |
1501 &steps, lambda_obstack); | |
1502 if (!newloop) | |
1503 goto fail; | |
1504 | |
1505 VEC_safe_push (tree, heap, *inductionvars, inductionvar); | |
1506 VEC_safe_push (lambda_loop, heap, loops, newloop); | |
1507 temp = temp->inner; | |
1508 } | |
1509 | |
1510 if (!perfect_nest) | |
1511 { | |
1512 if (!perfect_nestify (loop_nest, lboundvars, uboundvars, steps, | |
1513 *inductionvars)) | |
1514 { | |
1515 if (dump_file) | |
1516 fprintf (dump_file, | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1517 "Not a perfect loop nest and couldn't convert to one.\n"); |
0 | 1518 goto fail; |
1519 } | |
1520 else if (dump_file) | |
1521 fprintf (dump_file, | |
1522 "Successfully converted loop nest to perfect loop nest.\n"); | |
1523 } | |
1524 | |
1525 ret = lambda_loopnest_new (depth, 2 * depth, lambda_obstack); | |
1526 | |
1527 for (i = 0; VEC_iterate (lambda_loop, loops, i, newloop); i++) | |
1528 LN_LOOPS (ret)[i] = newloop; | |
1529 | |
1530 fail: | |
1531 VEC_free (lambda_loop, heap, loops); | |
1532 VEC_free (tree, heap, uboundvars); | |
1533 VEC_free (tree, heap, lboundvars); | |
1534 VEC_free (int, heap, steps); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1535 |
0 | 1536 return ret; |
1537 } | |
1538 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1539 /* Convert a lambda body vector LBV to a gcc tree, and return the new tree. |
0 | 1540 STMTS_TO_INSERT is a pointer to a tree where the statements we need to be |
1541 inserted for us are stored. INDUCTION_VARS is the array of induction | |
1542 variables for the loop this LBV is from. TYPE is the tree type to use for | |
1543 the variables and trees involved. */ | |
1544 | |
1545 static tree | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1546 lbv_to_gcc_expression (lambda_body_vector lbv, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1547 tree type, VEC(tree,heap) *induction_vars, |
0 | 1548 gimple_seq *stmts_to_insert) |
1549 { | |
1550 int k; | |
1551 tree resvar; | |
1552 tree expr = build_linear_expr (type, LBV_COEFFICIENTS (lbv), induction_vars); | |
1553 | |
1554 k = LBV_DENOMINATOR (lbv); | |
1555 gcc_assert (k != 0); | |
1556 if (k != 1) | |
1557 expr = fold_build2 (CEIL_DIV_EXPR, type, expr, build_int_cst (type, k)); | |
1558 | |
1559 resvar = create_tmp_var (type, "lbvtmp"); | |
1560 add_referenced_var (resvar); | |
1561 return force_gimple_operand (fold (expr), stmts_to_insert, true, resvar); | |
1562 } | |
1563 | |
1564 /* Convert a linear expression from coefficient and constant form to a | |
1565 gcc tree. | |
1566 Return the tree that represents the final value of the expression. | |
1567 LLE is the linear expression to convert. | |
1568 OFFSET is the linear offset to apply to the expression. | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1569 TYPE is the tree type to use for the variables and math. |
0 | 1570 INDUCTION_VARS is a vector of induction variables for the loops. |
1571 INVARIANTS is a vector of the loop nest invariants. | |
1572 WRAP specifies what tree code to wrap the results in, if there is more than | |
1573 one (it is either MAX_EXPR, or MIN_EXPR). | |
1574 STMTS_TO_INSERT Is a pointer to the statement list we fill in with | |
1575 statements that need to be inserted for the linear expression. */ | |
1576 | |
1577 static tree | |
1578 lle_to_gcc_expression (lambda_linear_expression lle, | |
1579 lambda_linear_expression offset, | |
1580 tree type, | |
1581 VEC(tree,heap) *induction_vars, | |
1582 VEC(tree,heap) *invariants, | |
1583 enum tree_code wrap, gimple_seq *stmts_to_insert) | |
1584 { | |
1585 int k; | |
1586 tree resvar; | |
1587 tree expr = NULL_TREE; | |
1588 VEC(tree,heap) *results = NULL; | |
1589 | |
1590 gcc_assert (wrap == MAX_EXPR || wrap == MIN_EXPR); | |
1591 | |
1592 /* Build up the linear expressions. */ | |
1593 for (; lle != NULL; lle = LLE_NEXT (lle)) | |
1594 { | |
1595 expr = build_linear_expr (type, LLE_COEFFICIENTS (lle), induction_vars); | |
1596 expr = fold_build2 (PLUS_EXPR, type, expr, | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1597 build_linear_expr (type, |
0 | 1598 LLE_INVARIANT_COEFFICIENTS (lle), |
1599 invariants)); | |
1600 | |
1601 k = LLE_CONSTANT (lle); | |
1602 if (k) | |
1603 expr = fold_build2 (PLUS_EXPR, type, expr, build_int_cst (type, k)); | |
1604 | |
1605 k = LLE_CONSTANT (offset); | |
1606 if (k) | |
1607 expr = fold_build2 (PLUS_EXPR, type, expr, build_int_cst (type, k)); | |
1608 | |
1609 k = LLE_DENOMINATOR (lle); | |
1610 if (k != 1) | |
1611 expr = fold_build2 (wrap == MAX_EXPR ? CEIL_DIV_EXPR : FLOOR_DIV_EXPR, | |
1612 type, expr, build_int_cst (type, k)); | |
1613 | |
1614 expr = fold (expr); | |
1615 VEC_safe_push (tree, heap, results, expr); | |
1616 } | |
1617 | |
1618 gcc_assert (expr); | |
1619 | |
1620 /* We may need to wrap the results in a MAX_EXPR or MIN_EXPR. */ | |
1621 if (VEC_length (tree, results) > 1) | |
1622 { | |
1623 size_t i; | |
1624 tree op; | |
1625 | |
1626 expr = VEC_index (tree, results, 0); | |
1627 for (i = 1; VEC_iterate (tree, results, i, op); i++) | |
1628 expr = fold_build2 (wrap, type, expr, op); | |
1629 } | |
1630 | |
1631 VEC_free (tree, heap, results); | |
1632 | |
1633 resvar = create_tmp_var (type, "lletmp"); | |
1634 add_referenced_var (resvar); | |
1635 return force_gimple_operand (fold (expr), stmts_to_insert, true, resvar); | |
1636 } | |
1637 | |
1638 /* Remove the induction variable defined at IV_STMT. */ | |
1639 | |
1640 void | |
1641 remove_iv (gimple iv_stmt) | |
1642 { | |
1643 gimple_stmt_iterator si = gsi_for_stmt (iv_stmt); | |
1644 | |
1645 if (gimple_code (iv_stmt) == GIMPLE_PHI) | |
1646 { | |
1647 unsigned i; | |
1648 | |
1649 for (i = 0; i < gimple_phi_num_args (iv_stmt); i++) | |
1650 { | |
1651 gimple stmt; | |
1652 imm_use_iterator imm_iter; | |
1653 tree arg = gimple_phi_arg_def (iv_stmt, i); | |
1654 bool used = false; | |
1655 | |
1656 if (TREE_CODE (arg) != SSA_NAME) | |
1657 continue; | |
1658 | |
1659 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, arg) | |
1660 if (stmt != iv_stmt) | |
1661 used = true; | |
1662 | |
1663 if (!used) | |
1664 remove_iv (SSA_NAME_DEF_STMT (arg)); | |
1665 } | |
1666 | |
1667 remove_phi_node (&si, true); | |
1668 } | |
1669 else | |
1670 { | |
1671 gsi_remove (&si, true); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1672 release_defs (iv_stmt); |
0 | 1673 } |
1674 } | |
1675 | |
1676 /* Transform a lambda loopnest NEW_LOOPNEST, which had TRANSFORM applied to | |
1677 it, back into gcc code. This changes the | |
1678 loops, their induction variables, and their bodies, so that they | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1679 match the transformed loopnest. |
0 | 1680 OLD_LOOPNEST is the loopnest before we've replaced it with the new |
1681 loopnest. | |
1682 OLD_IVS is a vector of induction variables from the old loopnest. | |
1683 INVARIANTS is a vector of loop invariants from the old loopnest. | |
1684 NEW_LOOPNEST is the new lambda loopnest to replace OLD_LOOPNEST with. | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1685 TRANSFORM is the matrix transform that was applied to OLD_LOOPNEST to get |
0 | 1686 NEW_LOOPNEST. */ |
1687 | |
1688 void | |
1689 lambda_loopnest_to_gcc_loopnest (struct loop *old_loopnest, | |
1690 VEC(tree,heap) *old_ivs, | |
1691 VEC(tree,heap) *invariants, | |
1692 VEC(gimple,heap) **remove_ivs, | |
1693 lambda_loopnest new_loopnest, | |
1694 lambda_trans_matrix transform, | |
1695 struct obstack * lambda_obstack) | |
1696 { | |
1697 struct loop *temp; | |
1698 size_t i = 0; | |
1699 unsigned j; | |
1700 size_t depth = 0; | |
1701 VEC(tree,heap) *new_ivs = NULL; | |
1702 tree oldiv; | |
1703 gimple_stmt_iterator bsi; | |
1704 | |
1705 transform = lambda_trans_matrix_inverse (transform); | |
1706 | |
1707 if (dump_file) | |
1708 { | |
1709 fprintf (dump_file, "Inverse of transformation matrix:\n"); | |
1710 print_lambda_trans_matrix (dump_file, transform); | |
1711 } | |
1712 depth = depth_of_nest (old_loopnest); | |
1713 temp = old_loopnest; | |
1714 | |
1715 while (temp) | |
1716 { | |
1717 lambda_loop newloop; | |
1718 basic_block bb; | |
1719 edge exit; | |
1720 tree ivvar, ivvarinced; | |
1721 gimple exitcond; | |
1722 gimple_seq stmts; | |
1723 enum tree_code testtype; | |
1724 tree newupperbound, newlowerbound; | |
1725 lambda_linear_expression offset; | |
1726 tree type; | |
1727 bool insert_after; | |
1728 gimple inc_stmt; | |
1729 | |
1730 oldiv = VEC_index (tree, old_ivs, i); | |
1731 type = TREE_TYPE (oldiv); | |
1732 | |
1733 /* First, build the new induction variable temporary */ | |
1734 | |
1735 ivvar = create_tmp_var (type, "lnivtmp"); | |
1736 add_referenced_var (ivvar); | |
1737 | |
1738 VEC_safe_push (tree, heap, new_ivs, ivvar); | |
1739 | |
1740 newloop = LN_LOOPS (new_loopnest)[i]; | |
1741 | |
1742 /* Linear offset is a bit tricky to handle. Punt on the unhandled | |
1743 cases for now. */ | |
1744 offset = LL_LINEAR_OFFSET (newloop); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1745 |
0 | 1746 gcc_assert (LLE_DENOMINATOR (offset) == 1 && |
1747 lambda_vector_zerop (LLE_COEFFICIENTS (offset), depth)); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1748 |
0 | 1749 /* Now build the new lower bounds, and insert the statements |
1750 necessary to generate it on the loop preheader. */ | |
1751 stmts = NULL; | |
1752 newlowerbound = lle_to_gcc_expression (LL_LOWER_BOUND (newloop), | |
1753 LL_LINEAR_OFFSET (newloop), | |
1754 type, | |
1755 new_ivs, | |
1756 invariants, MAX_EXPR, &stmts); | |
1757 | |
1758 if (stmts) | |
1759 { | |
1760 gsi_insert_seq_on_edge (loop_preheader_edge (temp), stmts); | |
1761 gsi_commit_edge_inserts (); | |
1762 } | |
1763 /* Build the new upper bound and insert its statements in the | |
1764 basic block of the exit condition */ | |
1765 stmts = NULL; | |
1766 newupperbound = lle_to_gcc_expression (LL_UPPER_BOUND (newloop), | |
1767 LL_LINEAR_OFFSET (newloop), | |
1768 type, | |
1769 new_ivs, | |
1770 invariants, MIN_EXPR, &stmts); | |
1771 exit = single_exit (temp); | |
1772 exitcond = get_loop_exit_condition (temp); | |
1773 bb = gimple_bb (exitcond); | |
1774 bsi = gsi_after_labels (bb); | |
1775 if (stmts) | |
1776 gsi_insert_seq_before (&bsi, stmts, GSI_NEW_STMT); | |
1777 | |
1778 /* Create the new iv. */ | |
1779 | |
1780 standard_iv_increment_position (temp, &bsi, &insert_after); | |
1781 create_iv (newlowerbound, | |
1782 build_int_cst (type, LL_STEP (newloop)), | |
1783 ivvar, temp, &bsi, insert_after, &ivvar, | |
1784 NULL); | |
1785 | |
1786 /* Unfortunately, the incremented ivvar that create_iv inserted may not | |
1787 dominate the block containing the exit condition. | |
1788 So we simply create our own incremented iv to use in the new exit | |
1789 test, and let redundancy elimination sort it out. */ | |
1790 inc_stmt = gimple_build_assign_with_ops (PLUS_EXPR, SSA_NAME_VAR (ivvar), | |
1791 ivvar, | |
1792 build_int_cst (type, LL_STEP (newloop))); | |
1793 | |
1794 ivvarinced = make_ssa_name (SSA_NAME_VAR (ivvar), inc_stmt); | |
1795 gimple_assign_set_lhs (inc_stmt, ivvarinced); | |
1796 bsi = gsi_for_stmt (exitcond); | |
1797 gsi_insert_before (&bsi, inc_stmt, GSI_SAME_STMT); | |
1798 | |
1799 /* Replace the exit condition with the new upper bound | |
1800 comparison. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1801 |
0 | 1802 testtype = LL_STEP (newloop) >= 0 ? LE_EXPR : GE_EXPR; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1803 |
0 | 1804 /* We want to build a conditional where true means exit the loop, and |
1805 false means continue the loop. | |
1806 So swap the testtype if this isn't the way things are.*/ | |
1807 | |
1808 if (exit->flags & EDGE_FALSE_VALUE) | |
1809 testtype = swap_tree_comparison (testtype); | |
1810 | |
1811 gimple_cond_set_condition (exitcond, testtype, newupperbound, ivvarinced); | |
1812 update_stmt (exitcond); | |
1813 VEC_replace (tree, new_ivs, i, ivvar); | |
1814 | |
1815 i++; | |
1816 temp = temp->inner; | |
1817 } | |
1818 | |
1819 /* Rewrite uses of the old ivs so that they are now specified in terms of | |
1820 the new ivs. */ | |
1821 | |
1822 for (i = 0; VEC_iterate (tree, old_ivs, i, oldiv); i++) | |
1823 { | |
1824 imm_use_iterator imm_iter; | |
1825 use_operand_p use_p; | |
1826 tree oldiv_def; | |
1827 gimple oldiv_stmt = SSA_NAME_DEF_STMT (oldiv); | |
1828 gimple stmt; | |
1829 | |
1830 if (gimple_code (oldiv_stmt) == GIMPLE_PHI) | |
1831 oldiv_def = PHI_RESULT (oldiv_stmt); | |
1832 else | |
1833 oldiv_def = SINGLE_SSA_TREE_OPERAND (oldiv_stmt, SSA_OP_DEF); | |
1834 gcc_assert (oldiv_def != NULL_TREE); | |
1835 | |
1836 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, oldiv_def) | |
1837 { | |
1838 tree newiv; | |
1839 gimple_seq stmts; | |
1840 lambda_body_vector lbv, newlbv; | |
1841 | |
1842 /* Compute the new expression for the induction | |
1843 variable. */ | |
1844 depth = VEC_length (tree, new_ivs); | |
1845 lbv = lambda_body_vector_new (depth, lambda_obstack); | |
1846 LBV_COEFFICIENTS (lbv)[i] = 1; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1847 |
0 | 1848 newlbv = lambda_body_vector_compute_new (transform, lbv, |
1849 lambda_obstack); | |
1850 | |
1851 stmts = NULL; | |
1852 newiv = lbv_to_gcc_expression (newlbv, TREE_TYPE (oldiv), | |
1853 new_ivs, &stmts); | |
1854 | |
1855 if (stmts && gimple_code (stmt) != GIMPLE_PHI) | |
1856 { | |
1857 bsi = gsi_for_stmt (stmt); | |
1858 gsi_insert_seq_before (&bsi, stmts, GSI_SAME_STMT); | |
1859 } | |
1860 | |
1861 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter) | |
1862 propagate_value (use_p, newiv); | |
1863 | |
1864 if (stmts && gimple_code (stmt) == GIMPLE_PHI) | |
1865 for (j = 0; j < gimple_phi_num_args (stmt); j++) | |
1866 if (gimple_phi_arg_def (stmt, j) == newiv) | |
1867 gsi_insert_seq_on_edge (gimple_phi_arg_edge (stmt, j), stmts); | |
1868 | |
1869 update_stmt (stmt); | |
1870 } | |
1871 | |
1872 /* Remove the now unused induction variable. */ | |
1873 VEC_safe_push (gimple, heap, *remove_ivs, oldiv_stmt); | |
1874 } | |
1875 VEC_free (tree, heap, new_ivs); | |
1876 } | |
1877 | |
1878 /* Return TRUE if this is not interesting statement from the perspective of | |
1879 determining if we have a perfect loop nest. */ | |
1880 | |
1881 static bool | |
1882 not_interesting_stmt (gimple stmt) | |
1883 { | |
1884 /* Note that COND_EXPR's aren't interesting because if they were exiting the | |
1885 loop, we would have already failed the number of exits tests. */ | |
1886 if (gimple_code (stmt) == GIMPLE_LABEL | |
1887 || gimple_code (stmt) == GIMPLE_GOTO | |
1888 || gimple_code (stmt) == GIMPLE_COND) | |
1889 return true; | |
1890 return false; | |
1891 } | |
1892 | |
1893 /* Return TRUE if PHI uses DEF for it's in-the-loop edge for LOOP. */ | |
1894 | |
1895 static bool | |
1896 phi_loop_edge_uses_def (struct loop *loop, gimple phi, tree def) | |
1897 { | |
1898 unsigned i; | |
1899 for (i = 0; i < gimple_phi_num_args (phi); i++) | |
1900 if (flow_bb_inside_loop_p (loop, gimple_phi_arg_edge (phi, i)->src)) | |
1901 if (PHI_ARG_DEF (phi, i) == def) | |
1902 return true; | |
1903 return false; | |
1904 } | |
1905 | |
1906 /* Return TRUE if STMT is a use of PHI_RESULT. */ | |
1907 | |
1908 static bool | |
1909 stmt_uses_phi_result (gimple stmt, tree phi_result) | |
1910 { | |
1911 tree use = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_USE); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1912 |
0 | 1913 /* This is conservatively true, because we only want SIMPLE bumpers |
1914 of the form x +- constant for our pass. */ | |
1915 return (use == phi_result); | |
1916 } | |
1917 | |
1918 /* STMT is a bumper stmt for LOOP if the version it defines is used in the | |
1919 in-loop-edge in a phi node, and the operand it uses is the result of that | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1920 phi node. |
0 | 1921 I.E. i_29 = i_3 + 1 |
1922 i_3 = PHI (0, i_29); */ | |
1923 | |
1924 static bool | |
1925 stmt_is_bumper_for_loop (struct loop *loop, gimple stmt) | |
1926 { | |
1927 gimple use; | |
1928 tree def; | |
1929 imm_use_iterator iter; | |
1930 use_operand_p use_p; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1931 |
0 | 1932 def = SINGLE_SSA_TREE_OPERAND (stmt, SSA_OP_DEF); |
1933 if (!def) | |
1934 return false; | |
1935 | |
1936 FOR_EACH_IMM_USE_FAST (use_p, iter, def) | |
1937 { | |
1938 use = USE_STMT (use_p); | |
1939 if (gimple_code (use) == GIMPLE_PHI) | |
1940 { | |
1941 if (phi_loop_edge_uses_def (loop, use, def)) | |
1942 if (stmt_uses_phi_result (stmt, PHI_RESULT (use))) | |
1943 return true; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1944 } |
0 | 1945 } |
1946 return false; | |
1947 } | |
1948 | |
1949 | |
1950 /* Return true if LOOP is a perfect loop nest. | |
1951 Perfect loop nests are those loop nests where all code occurs in the | |
1952 innermost loop body. | |
1953 If S is a program statement, then | |
1954 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1955 i.e. |
0 | 1956 DO I = 1, 20 |
1957 S1 | |
1958 DO J = 1, 20 | |
1959 ... | |
1960 END DO | |
1961 END DO | |
1962 is not a perfect loop nest because of S1. | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1963 |
0 | 1964 DO I = 1, 20 |
1965 DO J = 1, 20 | |
1966 S1 | |
1967 ... | |
1968 END DO | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1969 END DO |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1970 is a perfect loop nest. |
0 | 1971 |
1972 Since we don't have high level loops anymore, we basically have to walk our | |
1973 statements and ignore those that are there because the loop needs them (IE | |
1974 the induction variable increment, and jump back to the top of the loop). */ | |
1975 | |
1976 bool | |
1977 perfect_nest_p (struct loop *loop) | |
1978 { | |
1979 basic_block *bbs; | |
1980 size_t i; | |
1981 gimple exit_cond; | |
1982 | |
1983 /* Loops at depth 0 are perfect nests. */ | |
1984 if (!loop->inner) | |
1985 return true; | |
1986 | |
1987 bbs = get_loop_body (loop); | |
1988 exit_cond = get_loop_exit_condition (loop); | |
1989 | |
1990 for (i = 0; i < loop->num_nodes; i++) | |
1991 { | |
1992 if (bbs[i]->loop_father == loop) | |
1993 { | |
1994 gimple_stmt_iterator bsi; | |
1995 | |
1996 for (bsi = gsi_start_bb (bbs[i]); !gsi_end_p (bsi); gsi_next (&bsi)) | |
1997 { | |
1998 gimple stmt = gsi_stmt (bsi); | |
1999 | |
2000 if (gimple_code (stmt) == GIMPLE_COND | |
2001 && exit_cond != stmt) | |
2002 goto non_perfectly_nested; | |
2003 | |
2004 if (stmt == exit_cond | |
2005 || not_interesting_stmt (stmt) | |
2006 || stmt_is_bumper_for_loop (loop, stmt)) | |
2007 continue; | |
2008 | |
2009 non_perfectly_nested: | |
2010 free (bbs); | |
2011 return false; | |
2012 } | |
2013 } | |
2014 } | |
2015 | |
2016 free (bbs); | |
2017 | |
2018 return perfect_nest_p (loop->inner); | |
2019 } | |
2020 | |
2021 /* Replace the USES of X in STMT, or uses with the same step as X with Y. | |
2022 YINIT is the initial value of Y, REPLACEMENTS is a hash table to | |
2023 avoid creating duplicate temporaries and FIRSTBSI is statement | |
2024 iterator where new temporaries should be inserted at the beginning | |
2025 of body basic block. */ | |
2026 | |
2027 static void | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2028 replace_uses_equiv_to_x_with_y (struct loop *loop, gimple stmt, tree x, |
0 | 2029 int xstep, tree y, tree yinit, |
2030 htab_t replacements, | |
2031 gimple_stmt_iterator *firstbsi) | |
2032 { | |
2033 ssa_op_iter iter; | |
2034 use_operand_p use_p; | |
2035 | |
2036 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_USE) | |
2037 { | |
2038 tree use = USE_FROM_PTR (use_p); | |
2039 tree step = NULL_TREE; | |
2040 tree scev, init, val, var; | |
2041 gimple setstmt; | |
2042 struct tree_map *h, in; | |
2043 void **loc; | |
2044 | |
2045 /* Replace uses of X with Y right away. */ | |
2046 if (use == x) | |
2047 { | |
2048 SET_USE (use_p, y); | |
2049 continue; | |
2050 } | |
2051 | |
2052 scev = instantiate_parameters (loop, | |
2053 analyze_scalar_evolution (loop, use)); | |
2054 | |
2055 if (scev == NULL || scev == chrec_dont_know) | |
2056 continue; | |
2057 | |
2058 step = evolution_part_in_loop_num (scev, loop->num); | |
2059 if (step == NULL | |
2060 || step == chrec_dont_know | |
2061 || TREE_CODE (step) != INTEGER_CST | |
2062 || int_cst_value (step) != xstep) | |
2063 continue; | |
2064 | |
2065 /* Use REPLACEMENTS hash table to cache already created | |
2066 temporaries. */ | |
2067 in.hash = htab_hash_pointer (use); | |
2068 in.base.from = use; | |
2069 h = (struct tree_map *) htab_find_with_hash (replacements, &in, in.hash); | |
2070 if (h != NULL) | |
2071 { | |
2072 SET_USE (use_p, h->to); | |
2073 continue; | |
2074 } | |
2075 | |
2076 /* USE which has the same step as X should be replaced | |
2077 with a temporary set to Y + YINIT - INIT. */ | |
2078 init = initial_condition_in_loop_num (scev, loop->num); | |
2079 gcc_assert (init != NULL && init != chrec_dont_know); | |
2080 if (TREE_TYPE (use) == TREE_TYPE (y)) | |
2081 { | |
2082 val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), init, yinit); | |
2083 val = fold_build2 (PLUS_EXPR, TREE_TYPE (y), y, val); | |
2084 if (val == y) | |
2085 { | |
2086 /* If X has the same type as USE, the same step | |
2087 and same initial value, it can be replaced by Y. */ | |
2088 SET_USE (use_p, y); | |
2089 continue; | |
2090 } | |
2091 } | |
2092 else | |
2093 { | |
2094 val = fold_build2 (MINUS_EXPR, TREE_TYPE (y), y, yinit); | |
2095 val = fold_convert (TREE_TYPE (use), val); | |
2096 val = fold_build2 (PLUS_EXPR, TREE_TYPE (use), val, init); | |
2097 } | |
2098 | |
2099 /* Create a temporary variable and insert it at the beginning | |
2100 of the loop body basic block, right after the PHI node | |
2101 which sets Y. */ | |
2102 var = create_tmp_var (TREE_TYPE (use), "perfecttmp"); | |
2103 add_referenced_var (var); | |
2104 val = force_gimple_operand_gsi (firstbsi, val, false, NULL, | |
2105 true, GSI_SAME_STMT); | |
2106 setstmt = gimple_build_assign (var, val); | |
2107 var = make_ssa_name (var, setstmt); | |
2108 gimple_assign_set_lhs (setstmt, var); | |
2109 gsi_insert_before (firstbsi, setstmt, GSI_SAME_STMT); | |
2110 update_stmt (setstmt); | |
2111 SET_USE (use_p, var); | |
2112 h = GGC_NEW (struct tree_map); | |
2113 h->hash = in.hash; | |
2114 h->base.from = use; | |
2115 h->to = var; | |
2116 loc = htab_find_slot_with_hash (replacements, h, in.hash, INSERT); | |
2117 gcc_assert ((*(struct tree_map **)loc) == NULL); | |
2118 *(struct tree_map **) loc = h; | |
2119 } | |
2120 } | |
2121 | |
2122 /* Return true if STMT is an exit PHI for LOOP */ | |
2123 | |
2124 static bool | |
2125 exit_phi_for_loop_p (struct loop *loop, gimple stmt) | |
2126 { | |
2127 if (gimple_code (stmt) != GIMPLE_PHI | |
2128 || gimple_phi_num_args (stmt) != 1 | |
2129 || gimple_bb (stmt) != single_exit (loop)->dest) | |
2130 return false; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2131 |
0 | 2132 return true; |
2133 } | |
2134 | |
2135 /* Return true if STMT can be put back into the loop INNER, by | |
2136 copying it to the beginning of that loop and changing the uses. */ | |
2137 | |
2138 static bool | |
2139 can_put_in_inner_loop (struct loop *inner, gimple stmt) | |
2140 { | |
2141 imm_use_iterator imm_iter; | |
2142 use_operand_p use_p; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2143 |
0 | 2144 gcc_assert (is_gimple_assign (stmt)); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2145 if (gimple_vuse (stmt) |
0 | 2146 || !stmt_invariant_in_loop_p (inner, stmt)) |
2147 return false; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2148 |
0 | 2149 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_assign_lhs (stmt)) |
2150 { | |
2151 if (!exit_phi_for_loop_p (inner, USE_STMT (use_p))) | |
2152 { | |
2153 basic_block immbb = gimple_bb (USE_STMT (use_p)); | |
2154 | |
2155 if (!flow_bb_inside_loop_p (inner, immbb)) | |
2156 return false; | |
2157 } | |
2158 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2159 return true; |
0 | 2160 } |
2161 | |
2162 /* Return true if STMT can be put *after* the inner loop of LOOP. */ | |
2163 | |
2164 static bool | |
2165 can_put_after_inner_loop (struct loop *loop, gimple stmt) | |
2166 { | |
2167 imm_use_iterator imm_iter; | |
2168 use_operand_p use_p; | |
2169 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2170 if (gimple_vuse (stmt)) |
0 | 2171 return false; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2172 |
0 | 2173 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, gimple_assign_lhs (stmt)) |
2174 { | |
2175 if (!exit_phi_for_loop_p (loop, USE_STMT (use_p))) | |
2176 { | |
2177 basic_block immbb = gimple_bb (USE_STMT (use_p)); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2178 |
0 | 2179 if (!dominated_by_p (CDI_DOMINATORS, |
2180 immbb, | |
2181 loop->inner->header) | |
2182 && !can_put_in_inner_loop (loop->inner, stmt)) | |
2183 return false; | |
2184 } | |
2185 } | |
2186 return true; | |
2187 } | |
2188 | |
2189 /* Return true when the induction variable IV is simple enough to be | |
2190 re-synthesized. */ | |
2191 | |
2192 static bool | |
2193 can_duplicate_iv (tree iv, struct loop *loop) | |
2194 { | |
2195 tree scev = instantiate_parameters | |
2196 (loop, analyze_scalar_evolution (loop, iv)); | |
2197 | |
2198 if (!automatically_generated_chrec_p (scev)) | |
2199 { | |
2200 tree step = evolution_part_in_loop_num (scev, loop->num); | |
2201 | |
2202 if (step && step != chrec_dont_know && TREE_CODE (step) == INTEGER_CST) | |
2203 return true; | |
2204 } | |
2205 | |
2206 return false; | |
2207 } | |
2208 | |
2209 /* If this is a scalar operation that can be put back into the inner | |
2210 loop, or after the inner loop, through copying, then do so. This | |
2211 works on the theory that any amount of scalar code we have to | |
2212 reduplicate into or after the loops is less expensive that the win | |
2213 we get from rearranging the memory walk the loop is doing so that | |
2214 it has better cache behavior. */ | |
2215 | |
2216 static bool | |
2217 cannot_convert_modify_to_perfect_nest (gimple stmt, struct loop *loop) | |
2218 { | |
2219 use_operand_p use_a, use_b; | |
2220 imm_use_iterator imm_iter; | |
2221 ssa_op_iter op_iter, op_iter1; | |
2222 tree op0 = gimple_assign_lhs (stmt); | |
2223 | |
2224 /* The statement should not define a variable used in the inner | |
2225 loop. */ | |
2226 if (TREE_CODE (op0) == SSA_NAME | |
2227 && !can_duplicate_iv (op0, loop)) | |
2228 FOR_EACH_IMM_USE_FAST (use_a, imm_iter, op0) | |
2229 if (gimple_bb (USE_STMT (use_a))->loop_father == loop->inner) | |
2230 return true; | |
2231 | |
2232 FOR_EACH_SSA_USE_OPERAND (use_a, stmt, op_iter, SSA_OP_USE) | |
2233 { | |
2234 gimple node; | |
2235 tree op = USE_FROM_PTR (use_a); | |
2236 | |
2237 /* The variables should not be used in both loops. */ | |
2238 if (!can_duplicate_iv (op, loop)) | |
2239 FOR_EACH_IMM_USE_FAST (use_b, imm_iter, op) | |
2240 if (gimple_bb (USE_STMT (use_b))->loop_father == loop->inner) | |
2241 return true; | |
2242 | |
2243 /* The statement should not use the value of a scalar that was | |
2244 modified in the loop. */ | |
2245 node = SSA_NAME_DEF_STMT (op); | |
2246 if (gimple_code (node) == GIMPLE_PHI) | |
2247 FOR_EACH_PHI_ARG (use_b, node, op_iter1, SSA_OP_USE) | |
2248 { | |
2249 tree arg = USE_FROM_PTR (use_b); | |
2250 | |
2251 if (TREE_CODE (arg) == SSA_NAME) | |
2252 { | |
2253 gimple arg_stmt = SSA_NAME_DEF_STMT (arg); | |
2254 | |
2255 if (gimple_bb (arg_stmt) | |
2256 && (gimple_bb (arg_stmt)->loop_father == loop->inner)) | |
2257 return true; | |
2258 } | |
2259 } | |
2260 } | |
2261 | |
2262 return false; | |
2263 } | |
2264 /* Return true when BB contains statements that can harm the transform | |
2265 to a perfect loop nest. */ | |
2266 | |
2267 static bool | |
2268 cannot_convert_bb_to_perfect_nest (basic_block bb, struct loop *loop) | |
2269 { | |
2270 gimple_stmt_iterator bsi; | |
2271 gimple exit_condition = get_loop_exit_condition (loop); | |
2272 | |
2273 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi)) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2274 { |
0 | 2275 gimple stmt = gsi_stmt (bsi); |
2276 | |
2277 if (stmt == exit_condition | |
2278 || not_interesting_stmt (stmt) | |
2279 || stmt_is_bumper_for_loop (loop, stmt)) | |
2280 continue; | |
2281 | |
2282 if (is_gimple_assign (stmt)) | |
2283 { | |
2284 if (cannot_convert_modify_to_perfect_nest (stmt, loop)) | |
2285 return true; | |
2286 | |
2287 if (can_duplicate_iv (gimple_assign_lhs (stmt), loop)) | |
2288 continue; | |
2289 | |
2290 if (can_put_in_inner_loop (loop->inner, stmt) | |
2291 || can_put_after_inner_loop (loop, stmt)) | |
2292 continue; | |
2293 } | |
2294 | |
2295 /* If the bb of a statement we care about isn't dominated by the | |
2296 header of the inner loop, then we can't handle this case | |
2297 right now. This test ensures that the statement comes | |
2298 completely *after* the inner loop. */ | |
2299 if (!dominated_by_p (CDI_DOMINATORS, | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2300 gimple_bb (stmt), |
0 | 2301 loop->inner->header)) |
2302 return true; | |
2303 } | |
2304 | |
2305 return false; | |
2306 } | |
2307 | |
2308 | |
2309 /* Return TRUE if LOOP is an imperfect nest that we can convert to a | |
2310 perfect one. At the moment, we only handle imperfect nests of | |
2311 depth 2, where all of the statements occur after the inner loop. */ | |
2312 | |
2313 static bool | |
2314 can_convert_to_perfect_nest (struct loop *loop) | |
2315 { | |
2316 basic_block *bbs; | |
2317 size_t i; | |
2318 gimple_stmt_iterator si; | |
2319 | |
2320 /* Can't handle triply nested+ loops yet. */ | |
2321 if (!loop->inner || loop->inner->inner) | |
2322 return false; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2323 |
0 | 2324 bbs = get_loop_body (loop); |
2325 for (i = 0; i < loop->num_nodes; i++) | |
2326 if (bbs[i]->loop_father == loop | |
2327 && cannot_convert_bb_to_perfect_nest (bbs[i], loop)) | |
2328 goto fail; | |
2329 | |
2330 /* We also need to make sure the loop exit only has simple copy phis in it, | |
2331 otherwise we don't know how to transform it into a perfect nest. */ | |
2332 for (si = gsi_start_phis (single_exit (loop)->dest); | |
2333 !gsi_end_p (si); | |
2334 gsi_next (&si)) | |
2335 if (gimple_phi_num_args (gsi_stmt (si)) != 1) | |
2336 goto fail; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2337 |
0 | 2338 free (bbs); |
2339 return true; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2340 |
0 | 2341 fail: |
2342 free (bbs); | |
2343 return false; | |
2344 } | |
2345 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2346 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2347 DEF_VEC_I(source_location); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2348 DEF_VEC_ALLOC_I(source_location,heap); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2349 |
0 | 2350 /* Transform the loop nest into a perfect nest, if possible. |
2351 LOOP is the loop nest to transform into a perfect nest | |
2352 LBOUNDS are the lower bounds for the loops to transform | |
2353 UBOUNDS are the upper bounds for the loops to transform | |
2354 STEPS is the STEPS for the loops to transform. | |
2355 LOOPIVS is the induction variables for the loops to transform. | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2356 |
0 | 2357 Basically, for the case of |
2358 | |
2359 FOR (i = 0; i < 50; i++) | |
2360 { | |
2361 FOR (j =0; j < 50; j++) | |
2362 { | |
2363 <whatever> | |
2364 } | |
2365 <some code> | |
2366 } | |
2367 | |
2368 This function will transform it into a perfect loop nest by splitting the | |
2369 outer loop into two loops, like so: | |
2370 | |
2371 FOR (i = 0; i < 50; i++) | |
2372 { | |
2373 FOR (j = 0; j < 50; j++) | |
2374 { | |
2375 <whatever> | |
2376 } | |
2377 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2378 |
0 | 2379 FOR (i = 0; i < 50; i ++) |
2380 { | |
2381 <some code> | |
2382 } | |
2383 | |
2384 Return FALSE if we can't make this loop into a perfect nest. */ | |
2385 | |
2386 static bool | |
2387 perfect_nestify (struct loop *loop, | |
2388 VEC(tree,heap) *lbounds, | |
2389 VEC(tree,heap) *ubounds, | |
2390 VEC(int,heap) *steps, | |
2391 VEC(tree,heap) *loopivs) | |
2392 { | |
2393 basic_block *bbs; | |
2394 gimple exit_condition; | |
2395 gimple cond_stmt; | |
2396 basic_block preheaderbb, headerbb, bodybb, latchbb, olddest; | |
2397 int i; | |
2398 gimple_stmt_iterator bsi, firstbsi; | |
2399 bool insert_after; | |
2400 edge e; | |
2401 struct loop *newloop; | |
2402 gimple phi; | |
2403 tree uboundvar; | |
2404 gimple stmt; | |
2405 tree oldivvar, ivvar, ivvarinced; | |
2406 VEC(tree,heap) *phis = NULL; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2407 VEC(source_location,heap) *locations = NULL; |
0 | 2408 htab_t replacements = NULL; |
2409 | |
2410 /* Create the new loop. */ | |
2411 olddest = single_exit (loop)->dest; | |
2412 preheaderbb = split_edge (single_exit (loop)); | |
2413 headerbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2414 |
0 | 2415 /* Push the exit phi nodes that we are moving. */ |
2416 for (bsi = gsi_start_phis (olddest); !gsi_end_p (bsi); gsi_next (&bsi)) | |
2417 { | |
2418 phi = gsi_stmt (bsi); | |
2419 VEC_reserve (tree, heap, phis, 2); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2420 VEC_reserve (source_location, heap, locations, 1); |
0 | 2421 VEC_quick_push (tree, phis, PHI_RESULT (phi)); |
2422 VEC_quick_push (tree, phis, PHI_ARG_DEF (phi, 0)); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2423 VEC_quick_push (source_location, locations, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2424 gimple_phi_arg_location (phi, 0)); |
0 | 2425 } |
2426 e = redirect_edge_and_branch (single_succ_edge (preheaderbb), headerbb); | |
2427 | |
2428 /* Remove the exit phis from the old basic block. */ | |
2429 for (bsi = gsi_start_phis (olddest); !gsi_end_p (bsi); ) | |
2430 remove_phi_node (&bsi, false); | |
2431 | |
2432 /* and add them back to the new basic block. */ | |
2433 while (VEC_length (tree, phis) != 0) | |
2434 { | |
2435 tree def; | |
2436 tree phiname; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2437 source_location locus; |
0 | 2438 def = VEC_pop (tree, phis); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2439 phiname = VEC_pop (tree, phis); |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2440 locus = VEC_pop (source_location, locations); |
0 | 2441 phi = create_phi_node (phiname, preheaderbb); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2442 add_phi_arg (phi, def, single_pred_edge (preheaderbb), locus); |
0 | 2443 } |
2444 flush_pending_stmts (e); | |
2445 VEC_free (tree, heap, phis); | |
2446 | |
2447 bodybb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb); | |
2448 latchbb = create_empty_bb (EXIT_BLOCK_PTR->prev_bb); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2449 make_edge (headerbb, bodybb, EDGE_FALLTHRU); |
0 | 2450 cond_stmt = gimple_build_cond (NE_EXPR, integer_one_node, integer_zero_node, |
2451 NULL_TREE, NULL_TREE); | |
2452 bsi = gsi_start_bb (bodybb); | |
2453 gsi_insert_after (&bsi, cond_stmt, GSI_NEW_STMT); | |
2454 e = make_edge (bodybb, olddest, EDGE_FALSE_VALUE); | |
2455 make_edge (bodybb, latchbb, EDGE_TRUE_VALUE); | |
2456 make_edge (latchbb, headerbb, EDGE_FALLTHRU); | |
2457 | |
2458 /* Update the loop structures. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2459 newloop = duplicate_loop (loop, olddest->loop_father); |
0 | 2460 newloop->header = headerbb; |
2461 newloop->latch = latchbb; | |
2462 add_bb_to_loop (latchbb, newloop); | |
2463 add_bb_to_loop (bodybb, newloop); | |
2464 add_bb_to_loop (headerbb, newloop); | |
2465 set_immediate_dominator (CDI_DOMINATORS, bodybb, headerbb); | |
2466 set_immediate_dominator (CDI_DOMINATORS, headerbb, preheaderbb); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2467 set_immediate_dominator (CDI_DOMINATORS, preheaderbb, |
0 | 2468 single_exit (loop)->src); |
2469 set_immediate_dominator (CDI_DOMINATORS, latchbb, bodybb); | |
2470 set_immediate_dominator (CDI_DOMINATORS, olddest, | |
2471 recompute_dominator (CDI_DOMINATORS, olddest)); | |
2472 /* Create the new iv. */ | |
2473 oldivvar = VEC_index (tree, loopivs, 0); | |
2474 ivvar = create_tmp_var (TREE_TYPE (oldivvar), "perfectiv"); | |
2475 add_referenced_var (ivvar); | |
2476 standard_iv_increment_position (newloop, &bsi, &insert_after); | |
2477 create_iv (VEC_index (tree, lbounds, 0), | |
2478 build_int_cst (TREE_TYPE (oldivvar), VEC_index (int, steps, 0)), | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2479 ivvar, newloop, &bsi, insert_after, &ivvar, &ivvarinced); |
0 | 2480 |
2481 /* Create the new upper bound. This may be not just a variable, so we copy | |
2482 it to one just in case. */ | |
2483 | |
2484 exit_condition = get_loop_exit_condition (newloop); | |
2485 uboundvar = create_tmp_var (TREE_TYPE (VEC_index (tree, ubounds, 0)), | |
2486 "uboundvar"); | |
2487 add_referenced_var (uboundvar); | |
2488 stmt = gimple_build_assign (uboundvar, VEC_index (tree, ubounds, 0)); | |
2489 uboundvar = make_ssa_name (uboundvar, stmt); | |
2490 gimple_assign_set_lhs (stmt, uboundvar); | |
2491 | |
2492 if (insert_after) | |
2493 gsi_insert_after (&bsi, stmt, GSI_SAME_STMT); | |
2494 else | |
2495 gsi_insert_before (&bsi, stmt, GSI_SAME_STMT); | |
2496 update_stmt (stmt); | |
2497 gimple_cond_set_condition (exit_condition, GE_EXPR, uboundvar, ivvarinced); | |
2498 update_stmt (exit_condition); | |
2499 replacements = htab_create_ggc (20, tree_map_hash, | |
2500 tree_map_eq, NULL); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2501 bbs = get_loop_body_in_dom_order (loop); |
0 | 2502 /* Now move the statements, and replace the induction variable in the moved |
2503 statements with the correct loop induction variable. */ | |
2504 oldivvar = VEC_index (tree, loopivs, 0); | |
2505 firstbsi = gsi_start_bb (bodybb); | |
2506 for (i = loop->num_nodes - 1; i >= 0 ; i--) | |
2507 { | |
2508 gimple_stmt_iterator tobsi = gsi_last_bb (bodybb); | |
2509 if (bbs[i]->loop_father == loop) | |
2510 { | |
2511 /* If this is true, we are *before* the inner loop. | |
2512 If this isn't true, we are *after* it. | |
2513 | |
2514 The only time can_convert_to_perfect_nest returns true when we | |
2515 have statements before the inner loop is if they can be moved | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2516 into the inner loop. |
0 | 2517 |
2518 The only time can_convert_to_perfect_nest returns true when we | |
2519 have statements after the inner loop is if they can be moved into | |
2520 the new split loop. */ | |
2521 | |
2522 if (dominated_by_p (CDI_DOMINATORS, loop->inner->header, bbs[i])) | |
2523 { | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2524 gimple_stmt_iterator header_bsi |
0 | 2525 = gsi_after_labels (loop->inner->header); |
2526 | |
2527 for (bsi = gsi_start_bb (bbs[i]); !gsi_end_p (bsi);) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2528 { |
0 | 2529 gimple stmt = gsi_stmt (bsi); |
2530 | |
2531 if (stmt == exit_condition | |
2532 || not_interesting_stmt (stmt) | |
2533 || stmt_is_bumper_for_loop (loop, stmt)) | |
2534 { | |
2535 gsi_next (&bsi); | |
2536 continue; | |
2537 } | |
2538 | |
2539 gsi_move_before (&bsi, &header_bsi); | |
2540 } | |
2541 } | |
2542 else | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2543 { |
0 | 2544 /* Note that the bsi only needs to be explicitly incremented |
2545 when we don't move something, since it is automatically | |
2546 incremented when we do. */ | |
2547 for (bsi = gsi_start_bb (bbs[i]); !gsi_end_p (bsi);) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2548 { |
0 | 2549 gimple stmt = gsi_stmt (bsi); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2550 |
0 | 2551 if (stmt == exit_condition |
2552 || not_interesting_stmt (stmt) | |
2553 || stmt_is_bumper_for_loop (loop, stmt)) | |
2554 { | |
2555 gsi_next (&bsi); | |
2556 continue; | |
2557 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2558 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2559 replace_uses_equiv_to_x_with_y |
0 | 2560 (loop, stmt, oldivvar, VEC_index (int, steps, 0), ivvar, |
2561 VEC_index (tree, lbounds, 0), replacements, &firstbsi); | |
2562 | |
2563 gsi_move_before (&bsi, &tobsi); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2564 |
0 | 2565 /* If the statement has any virtual operands, they may |
2566 need to be rewired because the original loop may | |
2567 still reference them. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2568 if (gimple_vuse (stmt)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2569 mark_sym_for_renaming (gimple_vop (cfun)); |
0 | 2570 } |
2571 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2572 |
0 | 2573 } |
2574 } | |
2575 | |
2576 free (bbs); | |
2577 htab_delete (replacements); | |
2578 return perfect_nest_p (loop); | |
2579 } | |
2580 | |
2581 /* Return true if TRANS is a legal transformation matrix that respects | |
2582 the dependence vectors in DISTS and DIRS. The conservative answer | |
2583 is false. | |
2584 | |
2585 "Wolfe proves that a unimodular transformation represented by the | |
2586 matrix T is legal when applied to a loop nest with a set of | |
2587 lexicographically non-negative distance vectors RDG if and only if | |
2588 for each vector d in RDG, (T.d >= 0) is lexicographically positive. | |
2589 i.e.: if and only if it transforms the lexicographically positive | |
2590 distance vectors to lexicographically positive vectors. Note that | |
2591 a unimodular matrix must transform the zero vector (and only it) to | |
2592 the zero vector." S.Muchnick. */ | |
2593 | |
2594 bool | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2595 lambda_transform_legal_p (lambda_trans_matrix trans, |
0 | 2596 int nb_loops, |
2597 VEC (ddr_p, heap) *dependence_relations) | |
2598 { | |
2599 unsigned int i, j; | |
2600 lambda_vector distres; | |
2601 struct data_dependence_relation *ddr; | |
2602 | |
2603 gcc_assert (LTM_COLSIZE (trans) == nb_loops | |
2604 && LTM_ROWSIZE (trans) == nb_loops); | |
2605 | |
2606 /* When there are no dependences, the transformation is correct. */ | |
2607 if (VEC_length (ddr_p, dependence_relations) == 0) | |
2608 return true; | |
2609 | |
2610 ddr = VEC_index (ddr_p, dependence_relations, 0); | |
2611 if (ddr == NULL) | |
2612 return true; | |
2613 | |
2614 /* When there is an unknown relation in the dependence_relations, we | |
2615 know that it is no worth looking at this loop nest: give up. */ | |
2616 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) | |
2617 return false; | |
2618 | |
2619 distres = lambda_vector_new (nb_loops); | |
2620 | |
2621 /* For each distance vector in the dependence graph. */ | |
2622 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++) | |
2623 { | |
2624 /* Don't care about relations for which we know that there is no | |
2625 dependence, nor about read-read (aka. output-dependences): | |
2626 these data accesses can happen in any order. */ | |
2627 if (DDR_ARE_DEPENDENT (ddr) == chrec_known | |
2628 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr)))) | |
2629 continue; | |
2630 | |
2631 /* Conservatively answer: "this transformation is not valid". */ | |
2632 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) | |
2633 return false; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2634 |
0 | 2635 /* If the dependence could not be captured by a distance vector, |
2636 conservatively answer that the transform is not valid. */ | |
2637 if (DDR_NUM_DIST_VECTS (ddr) == 0) | |
2638 return false; | |
2639 | |
2640 /* Compute trans.dist_vect */ | |
2641 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++) | |
2642 { | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2643 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops, |
0 | 2644 DDR_DIST_VECT (ddr, j), distres); |
2645 | |
2646 if (!lambda_vector_lexico_pos (distres, nb_loops)) | |
2647 return false; | |
2648 } | |
2649 } | |
2650 return true; | |
2651 } | |
2652 | |
2653 | |
2654 /* Collects parameters from affine function ACCESS_FUNCTION, and push | |
2655 them in PARAMETERS. */ | |
2656 | |
2657 static void | |
2658 lambda_collect_parameters_from_af (tree access_function, | |
2659 struct pointer_set_t *param_set, | |
2660 VEC (tree, heap) **parameters) | |
2661 { | |
2662 if (access_function == NULL) | |
2663 return; | |
2664 | |
2665 if (TREE_CODE (access_function) == SSA_NAME | |
2666 && pointer_set_contains (param_set, access_function) == 0) | |
2667 { | |
2668 pointer_set_insert (param_set, access_function); | |
2669 VEC_safe_push (tree, heap, *parameters, access_function); | |
2670 } | |
2671 else | |
2672 { | |
2673 int i, num_operands = tree_operand_length (access_function); | |
2674 | |
2675 for (i = 0; i < num_operands; i++) | |
2676 lambda_collect_parameters_from_af (TREE_OPERAND (access_function, i), | |
2677 param_set, parameters); | |
2678 } | |
2679 } | |
2680 | |
2681 /* Collects parameters from DATAREFS, and push them in PARAMETERS. */ | |
2682 | |
2683 void | |
2684 lambda_collect_parameters (VEC (data_reference_p, heap) *datarefs, | |
2685 VEC (tree, heap) **parameters) | |
2686 { | |
2687 unsigned i, j; | |
2688 struct pointer_set_t *parameter_set = pointer_set_create (); | |
2689 data_reference_p data_reference; | |
2690 | |
2691 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, data_reference); i++) | |
2692 for (j = 0; j < DR_NUM_DIMENSIONS (data_reference); j++) | |
2693 lambda_collect_parameters_from_af (DR_ACCESS_FN (data_reference, j), | |
2694 parameter_set, parameters); | |
2695 pointer_set_destroy (parameter_set); | |
2696 } | |
2697 | |
2698 /* Translates BASE_EXPR to vector CY. AM is needed for inferring | |
2699 indexing positions in the data access vector. CST is the analyzed | |
2700 integer constant. */ | |
2701 | |
2702 static bool | |
2703 av_for_af_base (tree base_expr, lambda_vector cy, struct access_matrix *am, | |
2704 int cst) | |
2705 { | |
2706 bool result = true; | |
2707 | |
2708 switch (TREE_CODE (base_expr)) | |
2709 { | |
2710 case INTEGER_CST: | |
2711 /* Constant part. */ | |
2712 cy[AM_CONST_COLUMN_INDEX (am)] += int_cst_value (base_expr) * cst; | |
2713 return true; | |
2714 | |
2715 case SSA_NAME: | |
2716 { | |
2717 int param_index = | |
2718 access_matrix_get_index_for_parameter (base_expr, am); | |
2719 | |
2720 if (param_index >= 0) | |
2721 { | |
2722 cy[param_index] = cst + cy[param_index]; | |
2723 return true; | |
2724 } | |
2725 | |
2726 return false; | |
2727 } | |
2728 | |
2729 case PLUS_EXPR: | |
2730 return av_for_af_base (TREE_OPERAND (base_expr, 0), cy, am, cst) | |
2731 && av_for_af_base (TREE_OPERAND (base_expr, 1), cy, am, cst); | |
2732 | |
2733 case MINUS_EXPR: | |
2734 return av_for_af_base (TREE_OPERAND (base_expr, 0), cy, am, cst) | |
2735 && av_for_af_base (TREE_OPERAND (base_expr, 1), cy, am, -1 * cst); | |
2736 | |
2737 case MULT_EXPR: | |
2738 if (TREE_CODE (TREE_OPERAND (base_expr, 0)) == INTEGER_CST) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2739 result = av_for_af_base (TREE_OPERAND (base_expr, 1), |
0 | 2740 cy, am, cst * |
2741 int_cst_value (TREE_OPERAND (base_expr, 0))); | |
2742 else if (TREE_CODE (TREE_OPERAND (base_expr, 1)) == INTEGER_CST) | |
2743 result = av_for_af_base (TREE_OPERAND (base_expr, 0), | |
2744 cy, am, cst * | |
2745 int_cst_value (TREE_OPERAND (base_expr, 1))); | |
2746 else | |
2747 result = false; | |
2748 | |
2749 return result; | |
2750 | |
2751 case NEGATE_EXPR: | |
2752 return av_for_af_base (TREE_OPERAND (base_expr, 0), cy, am, -1 * cst); | |
2753 | |
2754 default: | |
2755 return false; | |
2756 } | |
2757 | |
2758 return result; | |
2759 } | |
2760 | |
2761 /* Translates ACCESS_FUN to vector CY. AM is needed for inferring | |
2762 indexing positions in the data access vector. */ | |
2763 | |
2764 static bool | |
2765 av_for_af (tree access_fun, lambda_vector cy, struct access_matrix *am) | |
2766 { | |
2767 switch (TREE_CODE (access_fun)) | |
2768 { | |
2769 case POLYNOMIAL_CHREC: | |
2770 { | |
2771 tree left = CHREC_LEFT (access_fun); | |
2772 tree right = CHREC_RIGHT (access_fun); | |
2773 unsigned var; | |
2774 | |
2775 if (TREE_CODE (right) != INTEGER_CST) | |
2776 return false; | |
2777 | |
2778 var = am_vector_index_for_loop (am, CHREC_VARIABLE (access_fun)); | |
2779 cy[var] = int_cst_value (right); | |
2780 | |
2781 if (TREE_CODE (left) == POLYNOMIAL_CHREC) | |
2782 return av_for_af (left, cy, am); | |
2783 else | |
2784 return av_for_af_base (left, cy, am, 1); | |
2785 } | |
2786 | |
2787 case INTEGER_CST: | |
2788 /* Constant part. */ | |
2789 return av_for_af_base (access_fun, cy, am, 1); | |
2790 | |
2791 default: | |
2792 return false; | |
2793 } | |
2794 } | |
2795 | |
2796 /* Initializes the access matrix for DATA_REFERENCE. */ | |
2797 | |
2798 static bool | |
2799 build_access_matrix (data_reference_p data_reference, | |
2800 VEC (tree, heap) *parameters, VEC (loop_p, heap) *nest) | |
2801 { | |
2802 struct access_matrix *am = GGC_NEW (struct access_matrix); | |
2803 unsigned i, ndim = DR_NUM_DIMENSIONS (data_reference); | |
2804 unsigned nivs = VEC_length (loop_p, nest); | |
2805 unsigned lambda_nb_columns; | |
2806 | |
2807 AM_LOOP_NEST (am) = nest; | |
2808 AM_NB_INDUCTION_VARS (am) = nivs; | |
2809 AM_PARAMETERS (am) = parameters; | |
2810 | |
2811 lambda_nb_columns = AM_NB_COLUMNS (am); | |
2812 AM_MATRIX (am) = VEC_alloc (lambda_vector, gc, ndim); | |
2813 | |
2814 for (i = 0; i < ndim; i++) | |
2815 { | |
2816 lambda_vector access_vector = lambda_vector_new (lambda_nb_columns); | |
2817 tree access_function = DR_ACCESS_FN (data_reference, i); | |
2818 | |
2819 if (!av_for_af (access_function, access_vector, am)) | |
2820 return false; | |
2821 | |
2822 VEC_quick_push (lambda_vector, AM_MATRIX (am), access_vector); | |
2823 } | |
2824 | |
2825 DR_ACCESS_MATRIX (data_reference) = am; | |
2826 return true; | |
2827 } | |
2828 | |
2829 /* Returns false when one of the access matrices cannot be built. */ | |
2830 | |
2831 bool | |
2832 lambda_compute_access_matrices (VEC (data_reference_p, heap) *datarefs, | |
2833 VEC (tree, heap) *parameters, | |
2834 VEC (loop_p, heap) *nest) | |
2835 { | |
2836 data_reference_p dataref; | |
2837 unsigned ix; | |
2838 | |
2839 for (ix = 0; VEC_iterate (data_reference_p, datarefs, ix, dataref); ix++) | |
2840 if (!build_access_matrix (dataref, parameters, nest)) | |
2841 return false; | |
2842 | |
2843 return true; | |
2844 } |