comparison gcc/graphite-flattening.c @ 68:561a7518be6b

update gcc-4.6
author Nobuyasu Oshiro <dimolto@cr.ie.u-ryukyu.ac.jp>
date Sun, 21 Aug 2011 07:07:55 +0900
parents
children
comparison
equal deleted inserted replaced
67:f6334be47118 68:561a7518be6b
1 /* Loop flattening for Graphite.
2 Copyright (C) 2010 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <sebastian.pop@amd.com>.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tree-flow.h"
25 #include "tree-dump.h"
26 #include "cfgloop.h"
27 #include "tree-chrec.h"
28 #include "tree-data-ref.h"
29 #include "tree-scalar-evolution.h"
30 #include "sese.h"
31
32 #ifdef HAVE_cloog
33 #include "ppl_c.h"
34 #include "graphite-ppl.h"
35 #include "graphite-poly.h"
36
37 /* The loop flattening pass transforms loop nests into a single loop,
38 removing the loop nesting structure. The auto-vectorization can
39 then apply on the full loop body, without needing the outer-loop
40 vectorization.
41
42 The loop flattening pass that has been described in a very Fortran
43 specific way in the 1992 paper by Reinhard von Hanxleden and Ken
44 Kennedy: "Relaxing SIMD Control Flow Constraints using Loop
45 Transformations" available from
46 http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.54.5033
47
48 The canonical example is as follows: suppose that we have a loop
49 nest with known iteration counts
50
51 | for (i = 1; i <= 6; i++)
52 | for (j = 1; j <= 6; j++)
53 | S1(i,j);
54
55 The loop flattening is performed by linearizing the iteration space
56 using the function "f (x) = 6 * i + j". In this case, CLooG would
57 produce this code:
58
59 | for (c1=7;c1<=42;c1++) {
60 | i = floord(c1-1,6);
61 | S1(i,c1-6*i);
62 | }
63
64 There are several limitations for loop flattening that are linked
65 to the expressivity of the polyhedral model. One has to take an
66 upper bound approximation to deal with the parametric case of loop
67 flattening. For example, in the loop nest:
68
69 | for (i = 1; i <= N; i++)
70 | for (j = 1; j <= M; j++)
71 | S1(i,j);
72
73 One would like to flatten this loop using a linearization function
74 like this "f (x) = M * i + j". However CLooG's schedules are not
75 expressive enough to deal with this case, and so the parameter M
76 has to be replaced by an integer upper bound approximation. If we
77 further know in the context of the scop that "M <= 6", then it is
78 possible to linearize the loop with "f (x) = 6 * i + j". In this
79 case, CLooG would produce this code:
80
81 | for (c1=7;c1<=6*M+N;c1++) {
82 | i = ceild(c1-N,6);
83 | if (i <= floord(c1-1,6)) {
84 | S1(i,c1-6*i);
85 | }
86 | }
87
88 For an arbitrarily complex loop nest the algorithm proceeds in two
89 steps. First, the LST is flattened by removing the loops structure
90 and by inserting the statements in the order they appear in
91 depth-first order. Then, the scattering of each statement is
92 transformed accordingly.
93
94 Supposing that the original program is represented by the following
95 LST:
96
97 | (loop_1
98 | stmt_1
99 | (loop_2 stmt_3
100 | (loop_3 stmt_4)
101 | (loop_4 stmt_5 stmt_6)
102 | stmt_7
103 | )
104 | stmt_2
105 | )
106
107 Loop flattening traverses the LST in depth-first order, and
108 flattens pairs of loops successively by projecting the inner loops
109 in the iteration domain of the outer loops:
110
111 lst_project_loop (loop_2, loop_3, stride)
112
113 | (loop_1
114 | stmt_1
115 | (loop_2 stmt_3 stmt_4
116 | (loop_4 stmt_5 stmt_6)
117 | stmt_7
118 | )
119 | stmt_2
120 | )
121
122 lst_project_loop (loop_2, loop_4, stride)
123
124 | (loop_1
125 | stmt_1
126 | (loop_2 stmt_3 stmt_4 stmt_5 stmt_6 stmt_7)
127 | stmt_2
128 | )
129
130 lst_project_loop (loop_1, loop_2, stride)
131
132 | (loop_1
133 | stmt_1 stmt_3 stmt_4 stmt_5 stmt_6 stmt_7 stmt_2
134 | )
135
136 At each step, the iteration domain of the outer loop is enlarged to
137 contain enough points to iterate over the inner loop domain. */
138
139 /* Initializes RES to the number of iterations of the linearized loop
140 LST. RES is the cardinal of the iteration domain of LST. */
141
142 static void
143 lst_linearized_niter (lst_p lst, mpz_t res)
144 {
145 int i;
146 lst_p l;
147 mpz_t n;
148
149 mpz_init (n);
150 mpz_set_si (res, 0);
151
152 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l)
153 if (LST_LOOP_P (l))
154 {
155 lst_linearized_niter (l, n);
156 mpz_add (res, res, n);
157 }
158
159 if (LST_LOOP_P (lst))
160 {
161 lst_niter_for_loop (lst, n);
162
163 if (mpz_cmp_si (res, 0) != 0)
164 mpz_mul (res, res, n);
165 else
166 mpz_set (res, n);
167 }
168
169 mpz_clear (n);
170 }
171
172 /* Applies the translation "f (x) = x + OFFSET" to the loop containing
173 STMT. */
174
175 static void
176 lst_offset (lst_p stmt, mpz_t offset)
177 {
178 lst_p inner = LST_LOOP_FATHER (stmt);
179 poly_bb_p pbb = LST_PBB (stmt);
180 ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb);
181 int inner_depth = lst_depth (inner);
182 ppl_dimension_type inner_dim = psct_dynamic_dim (pbb, inner_depth);
183 ppl_Linear_Expression_t expr;
184 ppl_dimension_type dim;
185 ppl_Coefficient_t one;
186 mpz_t x;
187
188 mpz_init (x);
189 mpz_set_si (x, 1);
190 ppl_new_Coefficient (&one);
191 ppl_assign_Coefficient_from_mpz_t (one, x);
192
193 ppl_Polyhedron_space_dimension (poly, &dim);
194 ppl_new_Linear_Expression_with_dimension (&expr, dim);
195
196 ppl_set_coef (expr, inner_dim, 1);
197 ppl_set_inhomogeneous_gmp (expr, offset);
198 ppl_Polyhedron_affine_image (poly, inner_dim, expr, one);
199 ppl_delete_Linear_Expression (expr);
200 ppl_delete_Coefficient (one);
201 }
202
203 /* Scale by FACTOR the loop LST containing STMT. */
204
205 static void
206 lst_scale (lst_p lst, lst_p stmt, mpz_t factor)
207 {
208 mpz_t x;
209 ppl_Coefficient_t one;
210 int outer_depth = lst_depth (lst);
211 poly_bb_p pbb = LST_PBB (stmt);
212 ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb);
213 ppl_dimension_type outer_dim = psct_dynamic_dim (pbb, outer_depth);
214 ppl_Linear_Expression_t expr;
215 ppl_dimension_type dim;
216
217 mpz_init (x);
218 mpz_set_si (x, 1);
219 ppl_new_Coefficient (&one);
220 ppl_assign_Coefficient_from_mpz_t (one, x);
221
222 ppl_Polyhedron_space_dimension (poly, &dim);
223 ppl_new_Linear_Expression_with_dimension (&expr, dim);
224
225 /* outer_dim = factor * outer_dim. */
226 ppl_set_coef_gmp (expr, outer_dim, factor);
227 ppl_Polyhedron_affine_image (poly, outer_dim, expr, one);
228 ppl_delete_Linear_Expression (expr);
229
230 mpz_clear (x);
231 ppl_delete_Coefficient (one);
232 }
233
234 /* Project the INNER loop into the iteration domain of the OUTER loop.
235 STRIDE is the number of iterations between two iterations of the
236 outer loop. */
237
238 static void
239 lst_project_loop (lst_p outer, lst_p inner, mpz_t stride)
240 {
241 int i;
242 lst_p stmt;
243 mpz_t x;
244 ppl_Coefficient_t one;
245 int outer_depth = lst_depth (outer);
246 int inner_depth = lst_depth (inner);
247
248 mpz_init (x);
249 mpz_set_si (x, 1);
250 ppl_new_Coefficient (&one);
251 ppl_assign_Coefficient_from_mpz_t (one, x);
252
253 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (inner), i, stmt)
254 {
255 poly_bb_p pbb = LST_PBB (stmt);
256 ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb);
257 ppl_dimension_type outer_dim = psct_dynamic_dim (pbb, outer_depth);
258 ppl_dimension_type inner_dim = psct_dynamic_dim (pbb, inner_depth);
259 ppl_Linear_Expression_t expr;
260 ppl_dimension_type dim;
261 ppl_dimension_type *ds;
262
263 /* There should be no loops under INNER. */
264 gcc_assert (!LST_LOOP_P (stmt));
265 ppl_Polyhedron_space_dimension (poly, &dim);
266 ppl_new_Linear_Expression_with_dimension (&expr, dim);
267
268 /* outer_dim = outer_dim * stride + inner_dim. */
269 ppl_set_coef (expr, inner_dim, 1);
270 ppl_set_coef_gmp (expr, outer_dim, stride);
271 ppl_Polyhedron_affine_image (poly, outer_dim, expr, one);
272 ppl_delete_Linear_Expression (expr);
273
274 /* Project on inner_dim. */
275 ppl_new_Linear_Expression_with_dimension (&expr, dim - 1);
276 ppl_Polyhedron_affine_image (poly, inner_dim, expr, one);
277 ppl_delete_Linear_Expression (expr);
278
279 /* Remove inner loop and the static schedule of its body. */
280 ds = XNEWVEC (ppl_dimension_type, 2);
281 ds[0] = inner_dim;
282 ds[1] = inner_dim + 1;
283 ppl_Polyhedron_remove_space_dimensions (poly, ds, 2);
284 PBB_NB_SCATTERING_TRANSFORM (pbb) -= 2;
285 free (ds);
286 }
287
288 mpz_clear (x);
289 ppl_delete_Coefficient (one);
290 }
291
292 /* Flattens the loop nest LST. Return true when something changed.
293 OFFSET is used to compute the number of iterations of the outermost
294 loop before the current LST is executed. */
295
296 static bool
297 lst_flatten_loop (lst_p lst, mpz_t init_offset)
298 {
299 int i;
300 lst_p l;
301 bool res = false;
302 mpz_t n, one, offset, stride;
303
304 mpz_init (n);
305 mpz_init (one);
306 mpz_init (offset);
307 mpz_init (stride);
308 mpz_set (offset, init_offset);
309 mpz_set_si (one, 1);
310
311 lst_linearized_niter (lst, stride);
312 lst_niter_for_loop (lst, n);
313 mpz_tdiv_q (stride, stride, n);
314
315 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l)
316 if (LST_LOOP_P (l))
317 {
318 res = true;
319
320 lst_flatten_loop (l, offset);
321 lst_niter_for_loop (l, n);
322
323 lst_project_loop (lst, l, stride);
324
325 /* The offset is the number of iterations minus 1, as we want
326 to execute the next statements at the same iteration as the
327 last iteration of the loop. */
328 mpz_sub (n, n, one);
329 mpz_add (offset, offset, n);
330 }
331 else
332 {
333 lst_scale (lst, l, stride);
334 if (mpz_cmp_si (offset, 0) != 0)
335 lst_offset (l, offset);
336 }
337
338 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l)
339 if (LST_LOOP_P (l))
340 lst_remove_loop_and_inline_stmts_in_loop_father (l);
341
342 mpz_clear (n);
343 mpz_clear (one);
344 mpz_clear (offset);
345 mpz_clear (stride);
346 return res;
347 }
348
349 /* Remove all but the first 3 dimensions of the scattering:
350 - dim0: the static schedule for the loop
351 - dim1: the dynamic schedule of the loop
352 - dim2: the static schedule for the loop body. */
353
354 static void
355 remove_unused_scattering_dimensions (lst_p lst)
356 {
357 int i;
358 lst_p stmt;
359 mpz_t x;
360 ppl_Coefficient_t one;
361
362 mpz_init (x);
363 mpz_set_si (x, 1);
364 ppl_new_Coefficient (&one);
365 ppl_assign_Coefficient_from_mpz_t (one, x);
366
367 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, stmt)
368 {
369 poly_bb_p pbb = LST_PBB (stmt);
370 ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb);
371 int j, nb_dims_to_remove = PBB_NB_SCATTERING_TRANSFORM (pbb) - 3;
372 ppl_dimension_type *ds;
373
374 /* There should be no loops inside LST after flattening. */
375 gcc_assert (!LST_LOOP_P (stmt));
376
377 if (!nb_dims_to_remove)
378 continue;
379
380 ds = XNEWVEC (ppl_dimension_type, nb_dims_to_remove);
381 for (j = 0; j < nb_dims_to_remove; j++)
382 ds[j] = j + 3;
383
384 ppl_Polyhedron_remove_space_dimensions (poly, ds, nb_dims_to_remove);
385 PBB_NB_SCATTERING_TRANSFORM (pbb) -= nb_dims_to_remove;
386 free (ds);
387 }
388
389 mpz_clear (x);
390 ppl_delete_Coefficient (one);
391 }
392
393 /* Flattens all the loop nests of LST. Return true when something
394 changed. */
395
396 static bool
397 lst_do_flatten (lst_p lst)
398 {
399 int i;
400 lst_p l;
401 bool res = false;
402 mpz_t zero;
403
404 if (!lst
405 || !LST_LOOP_P (lst))
406 return false;
407
408 mpz_init (zero);
409 mpz_set_si (zero, 0);
410
411 FOR_EACH_VEC_ELT (lst_p, LST_SEQ (lst), i, l)
412 if (LST_LOOP_P (l))
413 {
414 res |= lst_flatten_loop (l, zero);
415 remove_unused_scattering_dimensions (l);
416 }
417
418 lst_update_scattering (lst);
419 mpz_clear (zero);
420 return res;
421 }
422
423 /* Flatten all the loop nests in SCOP. Returns true when something
424 changed. */
425
426 bool
427 flatten_all_loops (scop_p scop)
428 {
429 return lst_do_flatten (SCOP_TRANSFORMED_SCHEDULE (scop));
430 }
431
432 #endif