Mercurial > hg > CbC > CbC_gcc
comparison gcc/graphite-interchange.c @ 55:77e2b8dfacca gcc-4.4.5
update it from 4.4.3 to 4.5.0
author | ryoma <e075725@ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 12 Feb 2010 23:39:51 +0900 |
parents | |
children | b7f97abdc517 |
comparison
equal
deleted
inserted
replaced
52:c156f1bd5cd9 | 55:77e2b8dfacca |
---|---|
1 /* Interchange heuristics and transform for loop interchange on | |
2 polyhedral representation. | |
3 | |
4 Copyright (C) 2009 Free Software Foundation, Inc. | |
5 Contributed by Sebastian Pop <sebastian.pop@amd.com> and | |
6 Harsha Jagasia <harsha.jagasia@amd.com>. | |
7 | |
8 This file is part of GCC. | |
9 | |
10 GCC is free software; you can redistribute it and/or modify | |
11 it under the terms of the GNU General Public License as published by | |
12 the Free Software Foundation; either version 3, or (at your option) | |
13 any later version. | |
14 | |
15 GCC is distributed in the hope that it will be useful, | |
16 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 GNU General Public License for more details. | |
19 | |
20 You should have received a copy of the GNU General Public License | |
21 along with GCC; see the file COPYING3. If not see | |
22 <http://www.gnu.org/licenses/>. */ | |
23 #include "config.h" | |
24 #include "system.h" | |
25 #include "coretypes.h" | |
26 #include "tm.h" | |
27 #include "ggc.h" | |
28 #include "tree.h" | |
29 #include "rtl.h" | |
30 #include "output.h" | |
31 #include "basic-block.h" | |
32 #include "diagnostic.h" | |
33 #include "tree-flow.h" | |
34 #include "toplev.h" | |
35 #include "tree-dump.h" | |
36 #include "timevar.h" | |
37 #include "cfgloop.h" | |
38 #include "tree-chrec.h" | |
39 #include "tree-data-ref.h" | |
40 #include "tree-scalar-evolution.h" | |
41 #include "tree-pass.h" | |
42 #include "domwalk.h" | |
43 #include "value-prof.h" | |
44 #include "pointer-set.h" | |
45 #include "gimple.h" | |
46 #include "params.h" | |
47 | |
48 #ifdef HAVE_cloog | |
49 #include "cloog/cloog.h" | |
50 #include "ppl_c.h" | |
51 #include "sese.h" | |
52 #include "graphite-ppl.h" | |
53 #include "graphite.h" | |
54 #include "graphite-poly.h" | |
55 | |
56 /* Builds a linear expression, of dimension DIM, representing PDR's | |
57 memory access: | |
58 | |
59 L = r_{n}*r_{n-1}*...*r_{1}*s_{0} + ... + r_{n}*s_{n-1} + s_{n}. | |
60 | |
61 For an array A[10][20] with two subscript locations s0 and s1, the | |
62 linear memory access is 20 * s0 + s1: a stride of 1 in subscript s0 | |
63 corresponds to a memory stride of 20. | |
64 | |
65 OFFSET is a number of dimensions to prepend before the | |
66 subscript dimensions: s_0, s_1, ..., s_n. | |
67 | |
68 Thus, the final linear expression has the following format: | |
69 0 .. 0_{offset} | 0 .. 0_{nit} | 0 .. 0_{gd} | 0 | c_0 c_1 ... c_n | |
70 where the expression itself is: | |
71 c_0 * s_0 + c_1 * s_1 + ... c_n * s_n. */ | |
72 | |
73 static ppl_Linear_Expression_t | |
74 build_linearized_memory_access (ppl_dimension_type offset, poly_dr_p pdr) | |
75 { | |
76 ppl_Linear_Expression_t res; | |
77 ppl_Linear_Expression_t le; | |
78 ppl_dimension_type i; | |
79 ppl_dimension_type first = pdr_subscript_dim (pdr, 0); | |
80 ppl_dimension_type last = pdr_subscript_dim (pdr, PDR_NB_SUBSCRIPTS (pdr)); | |
81 Value size, sub_size; | |
82 graphite_dim_t dim = offset + pdr_dim (pdr); | |
83 | |
84 ppl_new_Linear_Expression_with_dimension (&res, dim); | |
85 | |
86 value_init (size); | |
87 value_set_si (size, 1); | |
88 value_init (sub_size); | |
89 value_set_si (sub_size, 1); | |
90 | |
91 for (i = last - 1; i >= first; i--) | |
92 { | |
93 ppl_set_coef_gmp (res, i + offset, size); | |
94 | |
95 ppl_new_Linear_Expression_with_dimension (&le, dim - offset); | |
96 ppl_set_coef (le, i, 1); | |
97 ppl_max_for_le_pointset (PDR_ACCESSES (pdr), le, sub_size); | |
98 value_multiply (size, size, sub_size); | |
99 ppl_delete_Linear_Expression (le); | |
100 } | |
101 | |
102 value_clear (sub_size); | |
103 value_clear (size); | |
104 return res; | |
105 } | |
106 | |
107 /* Builds a partial difference equations and inserts them | |
108 into pointset powerset polyhedron P. Polyhedron is assumed | |
109 to have the format: T|I|T'|I'|G|S|S'|l1|l2. | |
110 | |
111 TIME_DEPTH is the time dimension w.r.t. which we are | |
112 differentiating. | |
113 OFFSET represents the number of dimensions between | |
114 columns t_{time_depth} and t'_{time_depth}. | |
115 DIM_SCTR is the number of scattering dimensions. It is | |
116 essentially the dimensionality of the T vector. | |
117 | |
118 The following equations are inserted into the polyhedron P: | |
119 | t_1 = t_1' | |
120 | ... | |
121 | t_{time_depth-1} = t'_{time_depth-1} | |
122 | t_{time_depth} = t'_{time_depth} + 1 | |
123 | t_{time_depth+1} = t'_{time_depth + 1} | |
124 | ... | |
125 | t_{dim_sctr} = t'_{dim_sctr}. */ | |
126 | |
127 static void | |
128 build_partial_difference (ppl_Pointset_Powerset_C_Polyhedron_t *p, | |
129 ppl_dimension_type time_depth, | |
130 ppl_dimension_type offset, | |
131 ppl_dimension_type dim_sctr) | |
132 { | |
133 ppl_Constraint_t new_cstr; | |
134 ppl_Linear_Expression_t le; | |
135 ppl_dimension_type i; | |
136 ppl_dimension_type dim; | |
137 ppl_Pointset_Powerset_C_Polyhedron_t temp; | |
138 | |
139 /* Add the equality: t_{time_depth} = t'_{time_depth} + 1. | |
140 This is the core part of this alogrithm, since this | |
141 constraint asks for the memory access stride (difference) | |
142 between two consecutive points in time dimensions. */ | |
143 | |
144 ppl_Pointset_Powerset_C_Polyhedron_space_dimension (*p, &dim); | |
145 ppl_new_Linear_Expression_with_dimension (&le, dim); | |
146 ppl_set_coef (le, time_depth, 1); | |
147 ppl_set_coef (le, time_depth + offset, -1); | |
148 ppl_set_inhomogeneous (le, 1); | |
149 ppl_new_Constraint (&new_cstr, le, PPL_CONSTRAINT_TYPE_EQUAL); | |
150 ppl_Pointset_Powerset_C_Polyhedron_add_constraint (*p, new_cstr); | |
151 ppl_delete_Linear_Expression (le); | |
152 ppl_delete_Constraint (new_cstr); | |
153 | |
154 /* Add equalities: | |
155 | t1 = t1' | |
156 | ... | |
157 | t_{time_depth-1} = t'_{time_depth-1} | |
158 | t_{time_depth+1} = t'_{time_depth+1} | |
159 | ... | |
160 | t_{dim_sctr} = t'_{dim_sctr} | |
161 | |
162 This means that all the time dimensions are equal except for | |
163 time_depth, where the constraint is t_{depth} = t'_{depth} + 1 | |
164 step. More to this: we should be carefull not to add equalities | |
165 to the 'coupled' dimensions, which happens when the one dimension | |
166 is stripmined dimension, and the other dimension corresponds | |
167 to the point loop inside stripmined dimension. */ | |
168 | |
169 ppl_new_Pointset_Powerset_C_Polyhedron_from_Pointset_Powerset_C_Polyhedron (&temp, *p); | |
170 | |
171 for (i = 0; i < dim_sctr; i++) | |
172 if (i != time_depth) | |
173 { | |
174 ppl_new_Linear_Expression_with_dimension (&le, dim); | |
175 ppl_set_coef (le, i, 1); | |
176 ppl_set_coef (le, i + offset, -1); | |
177 ppl_new_Constraint (&new_cstr, le, PPL_CONSTRAINT_TYPE_EQUAL); | |
178 ppl_Pointset_Powerset_C_Polyhedron_add_constraint (temp, new_cstr); | |
179 | |
180 if (ppl_Pointset_Powerset_C_Polyhedron_is_empty (temp)) | |
181 { | |
182 ppl_delete_Pointset_Powerset_C_Polyhedron (temp); | |
183 ppl_new_Pointset_Powerset_C_Polyhedron_from_Pointset_Powerset_C_Polyhedron (&temp, *p); | |
184 } | |
185 else | |
186 ppl_Pointset_Powerset_C_Polyhedron_add_constraint (*p, new_cstr); | |
187 ppl_delete_Linear_Expression (le); | |
188 ppl_delete_Constraint (new_cstr); | |
189 } | |
190 | |
191 ppl_delete_Pointset_Powerset_C_Polyhedron (temp); | |
192 } | |
193 | |
194 | |
195 /* Set STRIDE to the stride of PDR in memory by advancing by one in | |
196 the loop at DEPTH. */ | |
197 | |
198 static void | |
199 memory_stride_in_loop (Value stride, graphite_dim_t depth, poly_dr_p pdr) | |
200 { | |
201 ppl_dimension_type time_depth; | |
202 ppl_Linear_Expression_t le, lma; | |
203 ppl_Constraint_t new_cstr; | |
204 ppl_dimension_type i, *map; | |
205 ppl_Pointset_Powerset_C_Polyhedron_t p1, p2, sctr; | |
206 graphite_dim_t nb_subscripts = PDR_NB_SUBSCRIPTS (pdr) + 1; | |
207 poly_bb_p pbb = PDR_PBB (pdr); | |
208 ppl_dimension_type offset = pbb_nb_scattering_transform (pbb) | |
209 + pbb_nb_local_vars (pbb) | |
210 + pbb_dim_iter_domain (pbb); | |
211 ppl_dimension_type offsetg = offset + pbb_nb_params (pbb); | |
212 ppl_dimension_type dim_sctr = pbb_nb_scattering_transform (pbb) | |
213 + pbb_nb_local_vars (pbb); | |
214 ppl_dimension_type dim_L1 = offset + offsetg + 2 * nb_subscripts; | |
215 ppl_dimension_type dim_L2 = offset + offsetg + 2 * nb_subscripts + 1; | |
216 ppl_dimension_type new_dim = offset + offsetg + 2 * nb_subscripts + 2; | |
217 | |
218 /* The resulting polyhedron should have the following format: | |
219 T|I|T'|I'|G|S|S'|l1|l2 | |
220 where: | |
221 | T = t_1..t_{dim_sctr} | |
222 | I = i_1..i_{dim_iter_domain} | |
223 | T'= t'_1..t'_{dim_sctr} | |
224 | I'= i'_1..i'_{dim_iter_domain} | |
225 | G = g_1..g_{nb_params} | |
226 | S = s_1..s_{nb_subscripts} | |
227 | S'= s'_1..s'_{nb_subscripts} | |
228 | l1 and l2 are scalars. | |
229 | |
230 Some invariants: | |
231 offset = dim_sctr + dim_iter_domain + nb_local_vars | |
232 offsetg = dim_sctr + dim_iter_domain + nb_local_vars + nb_params. */ | |
233 | |
234 /* Construct the T|I|0|0|G|0|0|0|0 part. */ | |
235 { | |
236 ppl_new_Pointset_Powerset_C_Polyhedron_from_C_Polyhedron | |
237 (&sctr, PBB_TRANSFORMED_SCATTERING (pbb)); | |
238 ppl_Pointset_Powerset_C_Polyhedron_add_space_dimensions_and_embed | |
239 (sctr, 2 * nb_subscripts + 2); | |
240 ppl_insert_dimensions_pointset (sctr, offset, offset); | |
241 } | |
242 | |
243 /* Construct the 0|I|0|0|G|S|0|0|0 part. */ | |
244 { | |
245 ppl_new_Pointset_Powerset_C_Polyhedron_from_Pointset_Powerset_C_Polyhedron | |
246 (&p1, PDR_ACCESSES (pdr)); | |
247 ppl_Pointset_Powerset_C_Polyhedron_add_space_dimensions_and_embed | |
248 (p1, nb_subscripts + 2); | |
249 ppl_insert_dimensions_pointset (p1, 0, dim_sctr); | |
250 ppl_insert_dimensions_pointset (p1, offset, offset); | |
251 } | |
252 | |
253 /* Construct the 0|0|0|0|0|S|0|l1|0 part. */ | |
254 { | |
255 lma = build_linearized_memory_access (offset + dim_sctr, pdr); | |
256 ppl_set_coef (lma, dim_L1, -1); | |
257 ppl_new_Constraint (&new_cstr, lma, PPL_CONSTRAINT_TYPE_EQUAL); | |
258 ppl_Pointset_Powerset_C_Polyhedron_add_constraint (p1, new_cstr); | |
259 ppl_delete_Linear_Expression (lma); | |
260 ppl_delete_Constraint (new_cstr); | |
261 } | |
262 | |
263 /* Now intersect all the parts to get the polyhedron P1: | |
264 T|I|0|0|G|0|0|0 |0 | |
265 0|I|0|0|G|S|0|0 |0 | |
266 0|0|0|0|0|S|0|l1|0 | |
267 ------------------ | |
268 T|I|0|0|G|S|0|l1|0. */ | |
269 | |
270 ppl_Pointset_Powerset_C_Polyhedron_intersection_assign (p1, sctr); | |
271 ppl_delete_Pointset_Powerset_C_Polyhedron (sctr); | |
272 | |
273 /* Build P2, which would have the following form: | |
274 0|0|T'|I'|G|0|S'|0|l2 | |
275 | |
276 P2 is built, by remapping the P1 polyhedron: | |
277 T|I|0|0|G|S|0|l1|0 | |
278 | |
279 using the following mapping: | |
280 T->T' | |
281 I->I' | |
282 S->S' | |
283 l1->l2. */ | |
284 { | |
285 ppl_new_Pointset_Powerset_C_Polyhedron_from_Pointset_Powerset_C_Polyhedron | |
286 (&p2, p1); | |
287 | |
288 map = ppl_new_id_map (new_dim); | |
289 | |
290 /* TI -> T'I'. */ | |
291 for (i = 0; i < offset; i++) | |
292 ppl_interchange (map, i, i + offset); | |
293 | |
294 /* l1 -> l2. */ | |
295 ppl_interchange (map, dim_L1, dim_L2); | |
296 | |
297 /* S -> S'. */ | |
298 for (i = 0; i < nb_subscripts; i++) | |
299 ppl_interchange (map, offset + offsetg + i, | |
300 offset + offsetg + nb_subscripts + i); | |
301 | |
302 ppl_Pointset_Powerset_C_Polyhedron_map_space_dimensions (p2, map, new_dim); | |
303 free (map); | |
304 } | |
305 | |
306 time_depth = psct_dynamic_dim (pbb, depth); | |
307 | |
308 /* P1 = P1 inter P2. */ | |
309 ppl_Pointset_Powerset_C_Polyhedron_intersection_assign (p1, p2); | |
310 build_partial_difference (&p1, time_depth, offset, dim_sctr); | |
311 | |
312 /* Maximise the expression L2 - L1. */ | |
313 { | |
314 ppl_new_Linear_Expression_with_dimension (&le, new_dim); | |
315 ppl_set_coef (le, dim_L2, 1); | |
316 ppl_set_coef (le, dim_L1, -1); | |
317 ppl_max_for_le_pointset (p1, le, stride); | |
318 } | |
319 | |
320 if (dump_file && (dump_flags & TDF_DETAILS)) | |
321 { | |
322 fprintf (dump_file, "\nStride in BB_%d, DR_%d, depth %d:", | |
323 pbb_index (pbb), PDR_ID (pdr), (int) depth); | |
324 value_print (dump_file, " %s ", stride); | |
325 } | |
326 | |
327 ppl_delete_Pointset_Powerset_C_Polyhedron (p1); | |
328 ppl_delete_Pointset_Powerset_C_Polyhedron (p2); | |
329 ppl_delete_Linear_Expression (le); | |
330 } | |
331 | |
332 /* Sets STRIDES to the sum of all the strides of the data references accessed */ | |
333 | |
334 static void | |
335 memory_strides_in_loop_depth (poly_bb_p pbb, graphite_dim_t depth, Value strides) | |
336 { | |
337 int i; | |
338 poly_dr_p pdr; | |
339 Value s, n; | |
340 | |
341 value_set_si (strides, 0); | |
342 value_init (s); | |
343 value_init (n); | |
344 | |
345 for (i = 0; VEC_iterate (poly_dr_p, PBB_DRS (pbb), i, pdr); i++) | |
346 { | |
347 value_set_si (n, PDR_NB_REFS (pdr)); | |
348 | |
349 memory_stride_in_loop (s, depth, pdr); | |
350 value_multiply (s, s, n); | |
351 value_addto (strides, strides, s); | |
352 } | |
353 | |
354 value_clear (s); | |
355 value_clear (n); | |
356 } | |
357 | |
358 /* Returns true when it is profitable to interchange time dimensions DEPTH1 | |
359 and DEPTH2 with DEPTH1 < DEPTH2 for PBB. | |
360 | |
361 Example: | |
362 | |
363 | int a[100][100]; | |
364 | | |
365 | int | |
366 | foo (int N) | |
367 | { | |
368 | int j; | |
369 | int i; | |
370 | | |
371 | for (i = 0; i < N; i++) | |
372 | for (j = 0; j < N; j++) | |
373 | a[j][2 * i] += 1; | |
374 | | |
375 | return a[N][12]; | |
376 | } | |
377 | |
378 The data access A[j][i] is described like this: | |
379 | |
380 | i j N a s0 s1 1 | |
381 | 0 0 0 1 0 0 -5 = 0 | |
382 | 0 -1 0 0 1 0 0 = 0 | |
383 |-2 0 0 0 0 1 0 = 0 | |
384 | 0 0 0 0 1 0 0 >= 0 | |
385 | 0 0 0 0 0 1 0 >= 0 | |
386 | 0 0 0 0 -1 0 100 >= 0 | |
387 | 0 0 0 0 0 -1 100 >= 0 | |
388 | |
389 The linearized memory access L to A[100][100] is: | |
390 | |
391 | i j N a s0 s1 1 | |
392 | 0 0 0 0 100 1 0 | |
393 | |
394 TODO: the shown format is not valid as it does not show the fact | |
395 that the iteration domain "i j" is transformed using the scattering. | |
396 | |
397 Next, to measure the impact of iterating once in loop "i", we build | |
398 a maximization problem: first, we add to DR accesses the dimensions | |
399 k, s2, s3, L1 = 100 * s0 + s1, L2, and D1: this is the polyhedron P1. | |
400 L1 and L2 are the linearized memory access functions. | |
401 | |
402 | i j N a s0 s1 k s2 s3 L1 L2 D1 1 | |
403 | 0 0 0 1 0 0 0 0 0 0 0 0 -5 = 0 alias = 5 | |
404 | 0 -1 0 0 1 0 0 0 0 0 0 0 0 = 0 s0 = j | |
405 |-2 0 0 0 0 1 0 0 0 0 0 0 0 = 0 s1 = 2 * i | |
406 | 0 0 0 0 1 0 0 0 0 0 0 0 0 >= 0 | |
407 | 0 0 0 0 0 1 0 0 0 0 0 0 0 >= 0 | |
408 | 0 0 0 0 -1 0 0 0 0 0 0 0 100 >= 0 | |
409 | 0 0 0 0 0 -1 0 0 0 0 0 0 100 >= 0 | |
410 | 0 0 0 0 100 1 0 0 0 -1 0 0 0 = 0 L1 = 100 * s0 + s1 | |
411 | |
412 Then, we generate the polyhedron P2 by interchanging the dimensions | |
413 (s0, s2), (s1, s3), (L1, L2), (k, i) | |
414 | |
415 | i j N a s0 s1 k s2 s3 L1 L2 D1 1 | |
416 | 0 0 0 1 0 0 0 0 0 0 0 0 -5 = 0 alias = 5 | |
417 | 0 -1 0 0 0 0 0 1 0 0 0 0 0 = 0 s2 = j | |
418 | 0 0 0 0 0 0 -2 0 1 0 0 0 0 = 0 s3 = 2 * k | |
419 | 0 0 0 0 0 0 0 1 0 0 0 0 0 >= 0 | |
420 | 0 0 0 0 0 0 0 0 1 0 0 0 0 >= 0 | |
421 | 0 0 0 0 0 0 0 -1 0 0 0 0 100 >= 0 | |
422 | 0 0 0 0 0 0 0 0 -1 0 0 0 100 >= 0 | |
423 | 0 0 0 0 0 0 0 100 1 0 -1 0 0 = 0 L2 = 100 * s2 + s3 | |
424 | |
425 then we add to P2 the equality k = i + 1: | |
426 | |
427 |-1 0 0 0 0 0 1 0 0 0 0 0 -1 = 0 k = i + 1 | |
428 | |
429 and finally we maximize the expression "D1 = max (P1 inter P2, L2 - L1)". | |
430 | |
431 Similarly, to determine the impact of one iteration on loop "j", we | |
432 interchange (k, j), we add "k = j + 1", and we compute D2 the | |
433 maximal value of the difference. | |
434 | |
435 Finally, the profitability test is D1 < D2: if in the outer loop | |
436 the strides are smaller than in the inner loop, then it is | |
437 profitable to interchange the loops at DEPTH1 and DEPTH2. */ | |
438 | |
439 static bool | |
440 pbb_interchange_profitable_p (graphite_dim_t depth1, graphite_dim_t depth2, | |
441 poly_bb_p pbb) | |
442 { | |
443 Value d1, d2; | |
444 bool res; | |
445 | |
446 gcc_assert (depth1 < depth2); | |
447 | |
448 value_init (d1); | |
449 value_init (d2); | |
450 | |
451 memory_strides_in_loop_depth (pbb, depth1, d1); | |
452 memory_strides_in_loop_depth (pbb, depth2, d2); | |
453 | |
454 res = value_lt (d1, d2); | |
455 | |
456 value_clear (d1); | |
457 value_clear (d2); | |
458 | |
459 return res; | |
460 } | |
461 | |
462 /* Interchanges the loops at DEPTH1 and DEPTH2 of the original | |
463 scattering and assigns the resulting polyhedron to the transformed | |
464 scattering. */ | |
465 | |
466 static void | |
467 pbb_interchange_loop_depths (graphite_dim_t depth1, graphite_dim_t depth2, | |
468 poly_bb_p pbb) | |
469 { | |
470 ppl_dimension_type i, dim; | |
471 ppl_dimension_type *map; | |
472 ppl_Polyhedron_t poly = PBB_TRANSFORMED_SCATTERING (pbb); | |
473 ppl_dimension_type dim1 = psct_dynamic_dim (pbb, depth1); | |
474 ppl_dimension_type dim2 = psct_dynamic_dim (pbb, depth2); | |
475 | |
476 ppl_Polyhedron_space_dimension (poly, &dim); | |
477 map = (ppl_dimension_type *) XNEWVEC (ppl_dimension_type, dim); | |
478 | |
479 for (i = 0; i < dim; i++) | |
480 map[i] = i; | |
481 | |
482 map[dim1] = dim2; | |
483 map[dim2] = dim1; | |
484 | |
485 ppl_Polyhedron_map_space_dimensions (poly, map, dim); | |
486 free (map); | |
487 } | |
488 | |
489 /* Apply the interchange of loops at depths DEPTH1 and DEPTH2 to all | |
490 the statements below LST. */ | |
491 | |
492 static void | |
493 lst_apply_interchange (lst_p lst, int depth1, int depth2) | |
494 { | |
495 if (!lst) | |
496 return; | |
497 | |
498 if (LST_LOOP_P (lst)) | |
499 { | |
500 int i; | |
501 lst_p l; | |
502 | |
503 for (i = 0; VEC_iterate (lst_p, LST_SEQ (lst), i, l); i++) | |
504 lst_apply_interchange (l, depth1, depth2); | |
505 } | |
506 else | |
507 pbb_interchange_loop_depths (depth1, depth2, LST_PBB (lst)); | |
508 } | |
509 | |
510 /* Return true when the interchange of loops at depths DEPTH1 and | |
511 DEPTH2 to all the statements below LST is profitable. */ | |
512 | |
513 static bool | |
514 lst_interchange_profitable_p (lst_p lst, int depth1, int depth2) | |
515 { | |
516 if (!lst) | |
517 return false; | |
518 | |
519 if (LST_LOOP_P (lst)) | |
520 { | |
521 int i; | |
522 lst_p l; | |
523 bool res = false; | |
524 | |
525 for (i = 0; VEC_iterate (lst_p, LST_SEQ (lst), i, l); i++) | |
526 { | |
527 bool profitable = lst_interchange_profitable_p (l, depth1, depth2); | |
528 | |
529 if (profitable && !LST_LOOP_P (lst) | |
530 && dump_file && (dump_flags & TDF_DETAILS)) | |
531 fprintf (dump_file, | |
532 "Interchanging loops at depths %d and %d is profitable for stmt_%d.\n", | |
533 depth1, depth2, pbb_index (LST_PBB (lst))); | |
534 | |
535 res |= profitable; | |
536 } | |
537 | |
538 return res; | |
539 } | |
540 else | |
541 return pbb_interchange_profitable_p (depth1, depth2, LST_PBB (lst)); | |
542 } | |
543 | |
544 /* Return true when the nest starting at LOOP1 and ending on LOOP2 is | |
545 perfect: i.e. there are no sequence of statements. */ | |
546 | |
547 static bool | |
548 lst_perfectly_nested_p (lst_p loop1, lst_p loop2) | |
549 { | |
550 if (loop1 == loop2) | |
551 return true; | |
552 | |
553 if (!LST_LOOP_P (loop1)) | |
554 return false; | |
555 | |
556 return VEC_length (lst_p, LST_SEQ (loop1)) == 1 | |
557 && lst_perfectly_nested_p (VEC_index (lst_p, LST_SEQ (loop1), 0), loop2); | |
558 } | |
559 | |
560 /* Transform the loop nest between LOOP1 and LOOP2 into a perfect | |
561 nest. To continue the naming tradition, this function is called | |
562 after perfect_nestify. NEST is set to the perfectly nested loop | |
563 that is created. BEFORE/AFTER are set to the loops distributed | |
564 before/after the loop NEST. */ | |
565 | |
566 static void | |
567 lst_perfect_nestify (lst_p loop1, lst_p loop2, lst_p *before, | |
568 lst_p *nest, lst_p *after) | |
569 { | |
570 poly_bb_p first, last; | |
571 | |
572 gcc_assert (loop1 && loop2 | |
573 && loop1 != loop2 | |
574 && LST_LOOP_P (loop1) && LST_LOOP_P (loop2)); | |
575 | |
576 first = LST_PBB (lst_find_first_pbb (loop2)); | |
577 last = LST_PBB (lst_find_last_pbb (loop2)); | |
578 | |
579 *before = copy_lst (loop1); | |
580 *nest = copy_lst (loop1); | |
581 *after = copy_lst (loop1); | |
582 | |
583 lst_remove_all_before_including_pbb (*before, first, false); | |
584 lst_remove_all_before_including_pbb (*after, last, true); | |
585 | |
586 lst_remove_all_before_excluding_pbb (*nest, first, true); | |
587 lst_remove_all_before_excluding_pbb (*nest, last, false); | |
588 | |
589 if (lst_empty_p (*before)) | |
590 *before = NULL; | |
591 if (lst_empty_p (*after)) | |
592 *after = NULL; | |
593 if (lst_empty_p (*nest)) | |
594 *nest = NULL; | |
595 } | |
596 | |
597 /* Try to interchange LOOP1 with LOOP2 for all the statements of the | |
598 body of LOOP2. LOOP1 contains LOOP2. Return true if it did the | |
599 interchange. CREATED_LOOP_BEFORE/CREATED_LOOP_AFTER are set to | |
600 true if the loop distribution created a loop before/after LOOP1. */ | |
601 | |
602 static bool | |
603 lst_try_interchange_loops (scop_p scop, lst_p loop1, lst_p loop2, | |
604 lst_p *before, lst_p *nest, lst_p *after) | |
605 { | |
606 int depth1 = lst_depth (loop1); | |
607 int depth2 = lst_depth (loop2); | |
608 lst_p transformed; | |
609 | |
610 *before = NULL; | |
611 *after = NULL; | |
612 *nest = NULL; | |
613 | |
614 if (!lst_interchange_profitable_p (loop2, depth1, depth2)) | |
615 return false; | |
616 | |
617 if (!lst_perfectly_nested_p (loop1, loop2)) | |
618 lst_perfect_nestify (loop1, loop2, before, nest, after); | |
619 | |
620 lst_apply_interchange (loop2, depth1, depth2); | |
621 | |
622 /* Sync the transformed LST information and the PBB scatterings | |
623 before using the scatterings in the data dependence analysis. */ | |
624 if (*before || *nest || *after) | |
625 { | |
626 transformed = lst_substitute_3 (SCOP_TRANSFORMED_SCHEDULE (scop), loop1, | |
627 *before, *nest, *after); | |
628 lst_update_scattering (transformed); | |
629 free_lst (transformed); | |
630 } | |
631 | |
632 if (graphite_legal_transform (scop)) | |
633 { | |
634 if (dump_file && (dump_flags & TDF_DETAILS)) | |
635 fprintf (dump_file, | |
636 "Loops at depths %d and %d will be interchanged.\n", | |
637 depth1, depth2); | |
638 | |
639 /* Transform the SCOP_TRANSFORMED_SCHEDULE of the SCOP. */ | |
640 lst_insert_in_sequence (*before, loop1, true); | |
641 lst_insert_in_sequence (*after, loop1, false); | |
642 | |
643 if (*nest) | |
644 { | |
645 lst_replace (loop1, *nest); | |
646 free_lst (loop1); | |
647 } | |
648 | |
649 return true; | |
650 } | |
651 | |
652 /* Undo the transform. */ | |
653 lst_apply_interchange (loop2, depth2, depth1); | |
654 *before = NULL; | |
655 *after = NULL; | |
656 *nest = NULL; | |
657 return false; | |
658 } | |
659 | |
660 static bool lst_interchange_select_inner (scop_p, lst_p, int, lst_p); | |
661 | |
662 /* Try to interchange loop OUTER of LST_SEQ (OUTER_FATHER) with all | |
663 the loop INNER and with all the loops contained in the body of | |
664 INNER. Return true if it did interchanged some loops. */ | |
665 | |
666 static bool | |
667 lst_try_interchange (scop_p scop, lst_p outer_father, int outer, lst_p inner) | |
668 { | |
669 lst_p before, nest, after; | |
670 bool res; | |
671 lst_p loop1 = VEC_index (lst_p, LST_SEQ (outer_father), outer); | |
672 lst_p loop2 = inner; | |
673 | |
674 gcc_assert (LST_LOOP_P (loop1) | |
675 && LST_LOOP_P (loop2)); | |
676 | |
677 res = lst_try_interchange_loops (scop, loop1, loop2, &before, &nest, &after); | |
678 | |
679 if (before) | |
680 res |= lst_interchange_select_inner (scop, outer_father, outer, before); | |
681 else if (nest) | |
682 res |= lst_interchange_select_inner (scop, outer_father, outer, nest); | |
683 else | |
684 res |= lst_interchange_select_inner (scop, outer_father, outer, loop2); | |
685 | |
686 return res; | |
687 } | |
688 | |
689 /* Selects the inner loop in LST_SEQ (INNER_FATHER) to be interchanged | |
690 with the loop OUTER in LST_SEQ (OUTER_FATHER). */ | |
691 | |
692 static bool | |
693 lst_interchange_select_inner (scop_p scop, lst_p outer_father, int outer, | |
694 lst_p inner_father) | |
695 { | |
696 lst_p l; | |
697 bool res = false; | |
698 int inner; | |
699 | |
700 gcc_assert (outer_father | |
701 && LST_LOOP_P (outer_father) | |
702 && LST_LOOP_P (VEC_index (lst_p, LST_SEQ (outer_father), outer)) | |
703 && inner_father | |
704 && LST_LOOP_P (inner_father)); | |
705 | |
706 for (inner = 0; VEC_iterate (lst_p, LST_SEQ (inner_father), inner, l); inner++) | |
707 if (LST_LOOP_P (l)) | |
708 res |= lst_try_interchange (scop, outer_father, outer, l); | |
709 | |
710 return res; | |
711 } | |
712 | |
713 /* Interchanges all the loops of LOOP and the loops of its body that | |
714 are considered profitable to interchange. Return true if it did | |
715 interchanged some loops. OUTER is the index in LST_SEQ (LOOP) that | |
716 points to the next outer loop to be considered for interchange. */ | |
717 | |
718 static bool | |
719 lst_interchange_select_outer (scop_p scop, lst_p loop, int outer) | |
720 { | |
721 lst_p l; | |
722 bool res = false; | |
723 int i = 0; | |
724 lst_p father; | |
725 | |
726 if (!loop || !LST_LOOP_P (loop)) | |
727 return false; | |
728 | |
729 father = LST_LOOP_FATHER (loop); | |
730 if (father) | |
731 { | |
732 res = lst_interchange_select_inner (scop, father, outer, loop); | |
733 | |
734 if (VEC_length (lst_p, LST_SEQ (father)) <= (unsigned) outer) | |
735 return res; | |
736 | |
737 loop = VEC_index (lst_p, LST_SEQ (father), outer); | |
738 } | |
739 | |
740 if (LST_LOOP_P (loop)) | |
741 for (i = 0; VEC_iterate (lst_p, LST_SEQ (loop), i, l); i++) | |
742 if (LST_LOOP_P (l)) | |
743 res |= lst_interchange_select_outer (scop, l, i); | |
744 | |
745 return res; | |
746 } | |
747 | |
748 /* Interchanges all the loop depths that are considered profitable for SCOP. */ | |
749 | |
750 bool | |
751 scop_do_interchange (scop_p scop) | |
752 { | |
753 bool res = lst_interchange_select_outer | |
754 (scop, SCOP_TRANSFORMED_SCHEDULE (scop), 0); | |
755 | |
756 lst_update_scattering (SCOP_TRANSFORMED_SCHEDULE (scop)); | |
757 | |
758 return res; | |
759 } | |
760 | |
761 | |
762 #endif | |
763 |