Mercurial > hg > CbC > CbC_gcc
comparison gcc/graphite-flattening.c @ 68:561a7518be6b
update gcc-4.6
author | Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Sun, 21 Aug 2011 07:07:55 +0900 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
67:f6334be47118 | 68:561a7518be6b |
---|---|
1 /* Loop flattening for Graphite. | |
2 Copyright (C) 2010 Free Software Foundation, Inc. | |
3 Contributed by Sebastian Pop <sebastian.pop@amd.com>. | |
4 | |
5 This file is part of GCC. | |
6 | |
7 GCC is free software; you can redistribute it and/or modify | |
8 it under the terms of the GNU General Public License as published by | |
9 the Free Software Foundation; either version 3, or (at your option) | |
10 any later version. | |
11 | |
12 GCC is distributed in the hope that it will be useful, | |
13 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 GNU General Public License for more details. | |
16 | |
17 You should have received a copy of the GNU General Public License | |
18 along with GCC; see the file COPYING3. If not see | |
19 <http://www.gnu.org/licenses/>. */ | |
20 | |
21 #include "config.h" | |
22 #include "system.h" | |
23 #include "coretypes.h" | |
24 #include "tree-flow.h" | |
25 #include "tree-dump.h" | |
26 #include "cfgloop.h" | |
27 #include "tree-chrec.h" | |
28 #include "tree-data-ref.h" | |
29 #include "tree-scalar-evolution.h" | |
30 #include "sese.h" | |
31 | |
32 #ifdef HAVE_cloog | |
33 #include "ppl_c.h" | |
34 #include "graphite-ppl.h" | |
35 #include "graphite-poly.h" | |
36 | |
37 /* The loop flattening pass transforms loop nests into a single loop, | |
38 removing the loop nesting structure. The auto-vectorization can | |
39 then apply on the full loop body, without needing the outer-loop | |
40 vectorization. | |
41 | |
42 The loop flattening pass that has been described in a very Fortran | |
43 specific way in the 1992 paper by Reinhard von Hanxleden and Ken | |
44 Kennedy: "Relaxing SIMD Control Flow Constraints using Loop | |
45 Transformations" available from | |
46 http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.54.5033 | |
47 | |
48 The canonical example is as follows: suppose that we have a loop | |
49 nest with known iteration counts | |
50 | |
51 | for (i = 1; i <= 6; i++) | |
52 | for (j = 1; j <= 6; j++) | |
53 | S1(i,j); | |
54 | |
55 The loop flattening is performed by linearizing the iteration space | |
56 using the function "f (x) = 6 * i + j". In this case, CLooG would | |
57 produce this code: | |
58 | |
59 | for (c1=7;c1<=42;c1++) { | |
60 | i = floord(c1-1,6); | |
61 | S1(i,c1-6*i); | |
62 | } | |
63 | |
64 There are several limitations for loop flattening that are linked | |
65 to the expressivity of the polyhedral model. One has to take an | |
66 upper bound approximation to deal with the parametric case of loop | |
67 flattening. For example, in the loop nest: | |
68 | |
69 | for (i = 1; i <= N; i++) | |
70 | for (j = 1; j <= M; j++) | |
71 | S1(i,j); | |
72 | |
73 One would like to flatten this loop using a linearization function | |
74 like this "f (x) = M * i + j". However CLooG's schedules are not | |
75 expressive enough to deal with this case, and so the parameter M | |
76 has to be replaced by an integer upper bound approximation. If we | |
77 further know in the context of the scop that "M <= 6", then it is | |
78 possible to linearize the loop with "f (x) = 6 * i + j". In this | |
79 case, CLooG would produce this code: | |
80 | |
81 | for (c1=7;c1<=6*M+N;c1++) { | |
82 | i = ceild(c1-N,6); | |
83 | if (i <= floord(c1-1,6)) { | |
84 | S1(i,c1-6*i); | |
85 | } | |
86 | } | |
87 | |
88 For an arbitrarily complex loop nest the algorithm proceeds in two | |
89 steps. First, the LST is flattened by removing the loops structure | |
90 and by inserting the statements in the order they appear in | |
91 depth-first order. Then, the scattering of each statement is | |
92 transformed accordingly. | |
93 | |
94 Supposing that the original program is represented by the following | |
95 LST: | |
96 | |
97 | (loop_1 | |
98 | stmt_1 | |
99 | (loop_2 stmt_3 | |
100 | (loop_3 stmt_4) | |
101 | (loop_4 stmt_5 stmt_6) | |
102 | stmt_7 | |
103 | ) | |
104 | stmt_2 | |
105 | ) | |
106 | |
107 Loop flattening traverses the LST in depth-first order, and | |
108 flattens pairs of loops successively by projecting the inner loops | |
109 in the iteration domain of the outer loops: | |
110 | |
111 lst_project_loop (loop_2, loop_3, stride) | |
112 | |
113 | (loop_1 | |
114 | stmt_1 | |
115 | (loop_2 stmt_3 stmt_4 | |
116 | (loop_4 stmt_5 stmt_6) | |
117 | stmt_7 | |
118 | ) | |
119 | stmt_2 | |
120 | ) | |
121 | |
122 lst_project_loop (loop_2, loop_4, stride) | |
123 | |
124 | (loop_1 | |
125 | stmt_1 | |
126 | (loop_2 stmt_3 stmt_4 stmt_5 stmt_6 stmt_7) | |
127 | stmt_2 | |
128 | ) | |
129 | |
130 lst_project_loop (loop_1, loop_2, stride) | |
131 | |
132 | (loop_1 | |
133 | stmt_1 stmt_3 stmt_4 stmt_5 stmt_6 stmt_7 stmt_2 | |
134 | ) | |
135 | |
136 At each step, the iteration domain of the outer loop is enlarged to | |
137 contain enough points to iterate over the inner loop domain. */ | |
138 | |
139 /* Initializes RES to the number of iterations of the linearized loop | |
140 LST. RES is the cardinal of the iteration domain of LST. */ | |
141 | |
142 static void | |
143 lst_linearized_niter (lst_p lst, mpz_t res) | |
144 { | |
145 int i; | |
146 lst_p l; | |
147 mpz_t n; | |
148 | |
149 mpz_init (n); | |
150 mpz_set_si (res, 0); | |
151 | |
152 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l) | |
153 if (LST_LOOP_P (l)) | |
154 { | |
155 lst_linearized_niter (l, n); | |
156 mpz_add (res, res, n); | |
157 } | |
158 | |
159 if (LST_LOOP_P (lst)) | |
160 { | |
161 lst_niter_for_loop (lst, n); | |
162 | |
163 if (mpz_cmp_si (res, 0) != 0) | |
164 mpz_mul (res, res, n); | |
165 else | |
166 mpz_set (res, n); | |
167 } | |
168 | |
169 mpz_clear (n); | |
170 } | |
171 | |
172 /* Applies the translation "f (x) = x + OFFSET" to the loop containing | |
173 STMT. */ | |
174 | |
175 static void | |
176 lst_offset (lst_p stmt, mpz_t offset) | |
177 { | |
178 lst_p inner = LST_LOOP_FATHER (stmt); | |
179 poly_bb_p pbb = LST_PBB (stmt); | |
180 ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb); | |
181 int inner_depth = lst_depth (inner); | |
182 ppl_dimension_type inner_dim = psct_dynamic_dim (pbb, inner_depth); | |
183 ppl_Linear_Expression_t expr; | |
184 ppl_dimension_type dim; | |
185 ppl_Coefficient_t one; | |
186 mpz_t x; | |
187 | |
188 mpz_init (x); | |
189 mpz_set_si (x, 1); | |
190 ppl_new_Coefficient (&one); | |
191 ppl_assign_Coefficient_from_mpz_t (one, x); | |
192 | |
193 ppl_Polyhedron_space_dimension (poly, &dim); | |
194 ppl_new_Linear_Expression_with_dimension (&expr, dim); | |
195 | |
196 ppl_set_coef (expr, inner_dim, 1); | |
197 ppl_set_inhomogeneous_gmp (expr, offset); | |
198 ppl_Polyhedron_affine_image (poly, inner_dim, expr, one); | |
199 ppl_delete_Linear_Expression (expr); | |
200 ppl_delete_Coefficient (one); | |
201 } | |
202 | |
203 /* Scale by FACTOR the loop LST containing STMT. */ | |
204 | |
205 static void | |
206 lst_scale (lst_p lst, lst_p stmt, mpz_t factor) | |
207 { | |
208 mpz_t x; | |
209 ppl_Coefficient_t one; | |
210 int outer_depth = lst_depth (lst); | |
211 poly_bb_p pbb = LST_PBB (stmt); | |
212 ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb); | |
213 ppl_dimension_type outer_dim = psct_dynamic_dim (pbb, outer_depth); | |
214 ppl_Linear_Expression_t expr; | |
215 ppl_dimension_type dim; | |
216 | |
217 mpz_init (x); | |
218 mpz_set_si (x, 1); | |
219 ppl_new_Coefficient (&one); | |
220 ppl_assign_Coefficient_from_mpz_t (one, x); | |
221 | |
222 ppl_Polyhedron_space_dimension (poly, &dim); | |
223 ppl_new_Linear_Expression_with_dimension (&expr, dim); | |
224 | |
225 /* outer_dim = factor * outer_dim. */ | |
226 ppl_set_coef_gmp (expr, outer_dim, factor); | |
227 ppl_Polyhedron_affine_image (poly, outer_dim, expr, one); | |
228 ppl_delete_Linear_Expression (expr); | |
229 | |
230 mpz_clear (x); | |
231 ppl_delete_Coefficient (one); | |
232 } | |
233 | |
234 /* Project the INNER loop into the iteration domain of the OUTER loop. | |
235 STRIDE is the number of iterations between two iterations of the | |
236 outer loop. */ | |
237 | |
238 static void | |
239 lst_project_loop (lst_p outer, lst_p inner, mpz_t stride) | |
240 { | |
241 int i; | |
242 lst_p stmt; | |
243 mpz_t x; | |
244 ppl_Coefficient_t one; | |
245 int outer_depth = lst_depth (outer); | |
246 int inner_depth = lst_depth (inner); | |
247 | |
248 mpz_init (x); | |
249 mpz_set_si (x, 1); | |
250 ppl_new_Coefficient (&one); | |
251 ppl_assign_Coefficient_from_mpz_t (one, x); | |
252 | |
253 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (inner), i, stmt) | |
254 { | |
255 poly_bb_p pbb = LST_PBB (stmt); | |
256 ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb); | |
257 ppl_dimension_type outer_dim = psct_dynamic_dim (pbb, outer_depth); | |
258 ppl_dimension_type inner_dim = psct_dynamic_dim (pbb, inner_depth); | |
259 ppl_Linear_Expression_t expr; | |
260 ppl_dimension_type dim; | |
261 ppl_dimension_type *ds; | |
262 | |
263 /* There should be no loops under INNER. */ | |
264 gcc_assert (!LST_LOOP_P (stmt)); | |
265 ppl_Polyhedron_space_dimension (poly, &dim); | |
266 ppl_new_Linear_Expression_with_dimension (&expr, dim); | |
267 | |
268 /* outer_dim = outer_dim * stride + inner_dim. */ | |
269 ppl_set_coef (expr, inner_dim, 1); | |
270 ppl_set_coef_gmp (expr, outer_dim, stride); | |
271 ppl_Polyhedron_affine_image (poly, outer_dim, expr, one); | |
272 ppl_delete_Linear_Expression (expr); | |
273 | |
274 /* Project on inner_dim. */ | |
275 ppl_new_Linear_Expression_with_dimension (&expr, dim - 1); | |
276 ppl_Polyhedron_affine_image (poly, inner_dim, expr, one); | |
277 ppl_delete_Linear_Expression (expr); | |
278 | |
279 /* Remove inner loop and the static schedule of its body. */ | |
280 ds = XNEWVEC (ppl_dimension_type, 2); | |
281 ds[0] = inner_dim; | |
282 ds[1] = inner_dim + 1; | |
283 ppl_Polyhedron_remove_space_dimensions (poly, ds, 2); | |
284 PBB_NB_SCATTERING_TRANSFORM (pbb) -= 2; | |
285 free (ds); | |
286 } | |
287 | |
288 mpz_clear (x); | |
289 ppl_delete_Coefficient (one); | |
290 } | |
291 | |
292 /* Flattens the loop nest LST. Return true when something changed. | |
293 OFFSET is used to compute the number of iterations of the outermost | |
294 loop before the current LST is executed. */ | |
295 | |
296 static bool | |
297 lst_flatten_loop (lst_p lst, mpz_t init_offset) | |
298 { | |
299 int i; | |
300 lst_p l; | |
301 bool res = false; | |
302 mpz_t n, one, offset, stride; | |
303 | |
304 mpz_init (n); | |
305 mpz_init (one); | |
306 mpz_init (offset); | |
307 mpz_init (stride); | |
308 mpz_set (offset, init_offset); | |
309 mpz_set_si (one, 1); | |
310 | |
311 lst_linearized_niter (lst, stride); | |
312 lst_niter_for_loop (lst, n); | |
313 mpz_tdiv_q (stride, stride, n); | |
314 | |
315 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l) | |
316 if (LST_LOOP_P (l)) | |
317 { | |
318 res = true; | |
319 | |
320 lst_flatten_loop (l, offset); | |
321 lst_niter_for_loop (l, n); | |
322 | |
323 lst_project_loop (lst, l, stride); | |
324 | |
325 /* The offset is the number of iterations minus 1, as we want | |
326 to execute the next statements at the same iteration as the | |
327 last iteration of the loop. */ | |
328 mpz_sub (n, n, one); | |
329 mpz_add (offset, offset, n); | |
330 } | |
331 else | |
332 { | |
333 lst_scale (lst, l, stride); | |
334 if (mpz_cmp_si (offset, 0) != 0) | |
335 lst_offset (l, offset); | |
336 } | |
337 | |
338 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l) | |
339 if (LST_LOOP_P (l)) | |
340 lst_remove_loop_and_inline_stmts_in_loop_father (l); | |
341 | |
342 mpz_clear (n); | |
343 mpz_clear (one); | |
344 mpz_clear (offset); | |
345 mpz_clear (stride); | |
346 return res; | |
347 } | |
348 | |
349 /* Remove all but the first 3 dimensions of the scattering: | |
350 - dim0: the static schedule for the loop | |
351 - dim1: the dynamic schedule of the loop | |
352 - dim2: the static schedule for the loop body. */ | |
353 | |
354 static void | |
355 remove_unused_scattering_dimensions (lst_p lst) | |
356 { | |
357 int i; | |
358 lst_p stmt; | |
359 mpz_t x; | |
360 ppl_Coefficient_t one; | |
361 | |
362 mpz_init (x); | |
363 mpz_set_si (x, 1); | |
364 ppl_new_Coefficient (&one); | |
365 ppl_assign_Coefficient_from_mpz_t (one, x); | |
366 | |
367 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, stmt) | |
368 { | |
369 poly_bb_p pbb = LST_PBB (stmt); | |
370 ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb); | |
371 int j, nb_dims_to_remove = PBB_NB_SCATTERING_TRANSFORM (pbb) - 3; | |
372 ppl_dimension_type *ds; | |
373 | |
374 /* There should be no loops inside LST after flattening. */ | |
375 gcc_assert (!LST_LOOP_P (stmt)); | |
376 | |
377 if (!nb_dims_to_remove) | |
378 continue; | |
379 | |
380 ds = XNEWVEC (ppl_dimension_type, nb_dims_to_remove); | |
381 for (j = 0; j < nb_dims_to_remove; j++) | |
382 ds[j] = j + 3; | |
383 | |
384 ppl_Polyhedron_remove_space_dimensions (poly, ds, nb_dims_to_remove); | |
385 PBB_NB_SCATTERING_TRANSFORM (pbb) -= nb_dims_to_remove; | |
386 free (ds); | |
387 } | |
388 | |
389 mpz_clear (x); | |
390 ppl_delete_Coefficient (one); | |
391 } | |
392 | |
393 /* Flattens all the loop nests of LST. Return true when something | |
394 changed. */ | |
395 | |
396 static bool | |
397 lst_do_flatten (lst_p lst) | |
398 { | |
399 int i; | |
400 lst_p l; | |
401 bool res = false; | |
402 mpz_t zero; | |
403 | |
404 if (!lst | |
405 || !LST_LOOP_P (lst)) | |
406 return false; | |
407 | |
408 mpz_init (zero); | |
409 mpz_set_si (zero, 0); | |
410 | |
411 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l) | |
412 if (LST_LOOP_P (l)) | |
413 { | |
414 res |= lst_flatten_loop (l, zero); | |
415 remove_unused_scattering_dimensions (l); | |
416 } | |
417 | |
418 lst_update_scattering (lst); | |
419 mpz_clear (zero); | |
420 return res; | |
421 } | |
422 | |
423 /* Flatten all the loop nests in SCOP. Returns true when something | |
424 changed. */ | |
425 | |
426 bool | |
427 flatten_all_loops (scop_p scop) | |
428 { | |
429 return lst_do_flatten (SCOP_TRANSFORMED_SCHEDULE (scop)); | |
430 } | |
431 | |
432 #endif |