Mercurial > hg > CbC > CbC_gcc
annotate gcc/tree-loop-linear.c @ 60:bd49c42ec43e
remove unnecessary files
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 15 Feb 2010 17:39:45 +0900 |
parents | 77e2b8dfacca |
children | b7f97abdc517 |
rev | line source |
---|---|
0 | 1 /* Linear Loop transforms |
2 Copyright (C) 2003, 2004, 2005, 2007, 2008, 2009 | |
3 Free Software Foundation, Inc. | |
4 Contributed by Daniel Berlin <dberlin@dberlin.org>. | |
5 | |
6 This file is part of GCC. | |
7 | |
8 GCC is free software; you can redistribute it and/or modify it under | |
9 the terms of the GNU General Public License as published by the Free | |
10 Software Foundation; either version 3, or (at your option) any later | |
11 version. | |
12 | |
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
16 for more details. | |
17 | |
18 You should have received a copy of the GNU General Public License | |
19 along with GCC; see the file COPYING3. If not see | |
20 <http://www.gnu.org/licenses/>. */ | |
21 | |
22 | |
23 #include "config.h" | |
24 #include "system.h" | |
25 #include "coretypes.h" | |
26 #include "tm.h" | |
27 #include "ggc.h" | |
28 #include "tree.h" | |
29 #include "target.h" | |
30 | |
31 #include "rtl.h" | |
32 #include "basic-block.h" | |
33 #include "diagnostic.h" | |
34 #include "obstack.h" | |
35 #include "tree-flow.h" | |
36 #include "tree-dump.h" | |
37 #include "timevar.h" | |
38 #include "cfgloop.h" | |
39 #include "expr.h" | |
40 #include "optabs.h" | |
41 #include "tree-chrec.h" | |
42 #include "tree-data-ref.h" | |
43 #include "tree-scalar-evolution.h" | |
44 #include "tree-pass.h" | |
45 #include "lambda.h" | |
46 | |
47 /* Linear loop transforms include any composition of interchange, | |
48 scaling, skewing, and reversal. They are used to change the | |
49 iteration order of loop nests in order to optimize data locality of | |
50 traversals, or remove dependences that prevent | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
51 parallelization/vectorization/etc. |
0 | 52 |
53 TODO: Determine reuse vectors/matrix and use it to determine optimal | |
54 transform matrix for locality purposes. | |
55 TODO: Completion of partial transforms. */ | |
56 | |
57 /* Gather statistics for loop interchange. LOOP is the loop being | |
58 considered. The first loop in the considered loop nest is | |
59 FIRST_LOOP, and consequently, the index of the considered loop is | |
60 obtained by LOOP->DEPTH - FIRST_LOOP->DEPTH | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
61 |
0 | 62 Initializes: |
63 - DEPENDENCE_STEPS the sum of all the data dependence distances | |
64 carried by loop LOOP, | |
65 | |
66 - NB_DEPS_NOT_CARRIED_BY_LOOP the number of dependence relations | |
67 for which the loop LOOP is not carrying any dependence, | |
68 | |
69 - ACCESS_STRIDES the sum of all the strides in LOOP. | |
70 | |
71 Example: for the following loop, | |
72 | |
73 | loop_1 runs 1335 times | |
74 | loop_2 runs 1335 times | |
75 | A[{{0, +, 1}_1, +, 1335}_2] | |
76 | B[{{0, +, 1}_1, +, 1335}_2] | |
77 | endloop_2 | |
78 | A[{0, +, 1336}_1] | |
79 | endloop_1 | |
80 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
81 gather_interchange_stats (in loop_1) will return |
0 | 82 DEPENDENCE_STEPS = 3002 |
83 NB_DEPS_NOT_CARRIED_BY_LOOP = 5 | |
84 ACCESS_STRIDES = 10694 | |
85 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
86 gather_interchange_stats (in loop_2) will return |
0 | 87 DEPENDENCE_STEPS = 3000 |
88 NB_DEPS_NOT_CARRIED_BY_LOOP = 7 | |
89 ACCESS_STRIDES = 8010 | |
90 */ | |
91 | |
92 static void | |
93 gather_interchange_stats (VEC (ddr_p, heap) *dependence_relations ATTRIBUTE_UNUSED, | |
94 VEC (data_reference_p, heap) *datarefs ATTRIBUTE_UNUSED, | |
95 struct loop *loop ATTRIBUTE_UNUSED, | |
96 struct loop *first_loop ATTRIBUTE_UNUSED, | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
97 unsigned int *dependence_steps ATTRIBUTE_UNUSED, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
98 unsigned int *nb_deps_not_carried_by_loop ATTRIBUTE_UNUSED, |
0 | 99 double_int *access_strides ATTRIBUTE_UNUSED) |
100 { | |
101 unsigned int i, j; | |
102 struct data_dependence_relation *ddr; | |
103 struct data_reference *dr; | |
104 | |
105 *dependence_steps = 0; | |
106 *nb_deps_not_carried_by_loop = 0; | |
107 *access_strides = double_int_zero; | |
108 | |
109 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++) | |
110 { | |
111 /* If we don't know anything about this dependence, or the distance | |
112 vector is NULL, or there is no dependence, then there is no reuse of | |
113 data. */ | |
114 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know | |
115 || DDR_ARE_DEPENDENT (ddr) == chrec_known | |
116 || DDR_NUM_DIST_VECTS (ddr) == 0) | |
117 continue; | |
118 | |
119 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++) | |
120 { | |
121 int dist = DDR_DIST_VECT (ddr, j)[loop_depth (loop) - loop_depth (first_loop)]; | |
122 | |
123 if (dist == 0) | |
124 (*nb_deps_not_carried_by_loop) += 1; | |
125 | |
126 else if (dist < 0) | |
127 (*dependence_steps) += -dist; | |
128 | |
129 else | |
130 (*dependence_steps) += dist; | |
131 } | |
132 } | |
133 | |
134 /* Compute the access strides. */ | |
135 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++) | |
136 { | |
137 unsigned int it; | |
138 tree ref = DR_REF (dr); | |
139 gimple stmt = DR_STMT (dr); | |
140 struct loop *stmt_loop = loop_containing_stmt (stmt); | |
141 struct loop *inner_loop = first_loop->inner; | |
142 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
143 if (inner_loop != stmt_loop |
0 | 144 && !flow_loop_nested_p (inner_loop, stmt_loop)) |
145 continue; | |
146 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
147 for (it = 0; it < DR_NUM_DIMENSIONS (dr); |
0 | 148 it++, ref = TREE_OPERAND (ref, 0)) |
149 { | |
150 int num = am_vector_index_for_loop (DR_ACCESS_MATRIX (dr), loop->num); | |
151 int istride = AM_GET_ACCESS_MATRIX_ELEMENT (DR_ACCESS_MATRIX (dr), it, num); | |
152 tree array_size = TYPE_SIZE (TREE_TYPE (ref)); | |
153 double_int dstride; | |
154 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
155 if (array_size == NULL_TREE |
0 | 156 || TREE_CODE (array_size) != INTEGER_CST) |
157 continue; | |
158 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
159 dstride = double_int_mul (tree_to_double_int (array_size), |
0 | 160 shwi_to_double_int (istride)); |
161 (*access_strides) = double_int_add (*access_strides, dstride); | |
162 } | |
163 } | |
164 } | |
165 | |
166 /* Attempt to apply interchange transformations to TRANS to maximize the | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
167 spatial and temporal locality of the loop. |
0 | 168 Returns the new transform matrix. The smaller the reuse vector |
169 distances in the inner loops, the fewer the cache misses. | |
170 FIRST_LOOP is the loop->num of the first loop in the analyzed loop | |
171 nest. */ | |
172 | |
173 | |
174 static lambda_trans_matrix | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
175 try_interchange_loops (lambda_trans_matrix trans, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
176 unsigned int depth, |
0 | 177 VEC (ddr_p, heap) *dependence_relations, |
178 VEC (data_reference_p, heap) *datarefs, | |
179 struct loop *first_loop) | |
180 { | |
181 bool res; | |
182 struct loop *loop_i; | |
183 struct loop *loop_j; | |
184 unsigned int dependence_steps_i, dependence_steps_j; | |
185 double_int access_strides_i, access_strides_j; | |
186 double_int small, large, nb_iter; | |
187 double_int l1_cache_size, l2_cache_size; | |
188 int cmp; | |
189 unsigned int nb_deps_not_carried_by_i, nb_deps_not_carried_by_j; | |
190 struct data_dependence_relation *ddr; | |
191 | |
192 if (VEC_length (ddr_p, dependence_relations) == 0) | |
193 return trans; | |
194 | |
195 /* When there is an unknown relation in the dependence_relations, we | |
196 know that it is no worth looking at this loop nest: give up. */ | |
197 ddr = VEC_index (ddr_p, dependence_relations, 0); | |
198 if (ddr == NULL || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know) | |
199 return trans; | |
200 | |
201 l1_cache_size = uhwi_to_double_int (L1_CACHE_SIZE * 1024); | |
202 l2_cache_size = uhwi_to_double_int (L2_CACHE_SIZE * 1024); | |
203 | |
204 /* LOOP_I is always the outer loop. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
205 for (loop_j = first_loop->inner; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
206 loop_j; |
0 | 207 loop_j = loop_j->inner) |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
208 for (loop_i = first_loop; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
209 loop_depth (loop_i) < loop_depth (loop_j); |
0 | 210 loop_i = loop_i->inner) |
211 { | |
212 gather_interchange_stats (dependence_relations, datarefs, | |
213 loop_i, first_loop, | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
214 &dependence_steps_i, |
0 | 215 &nb_deps_not_carried_by_i, |
216 &access_strides_i); | |
217 gather_interchange_stats (dependence_relations, datarefs, | |
218 loop_j, first_loop, | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
219 &dependence_steps_j, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
220 &nb_deps_not_carried_by_j, |
0 | 221 &access_strides_j); |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
222 |
0 | 223 /* Heuristics for loop interchange profitability: |
224 | |
225 0. Don't transform if the smallest stride is larger than | |
226 the L2 cache, or if the largest stride multiplied by the | |
227 number of iterations is smaller than the L1 cache. | |
228 | |
229 1. (spatial locality) Inner loops should have smallest | |
230 dependence steps. | |
231 | |
232 2. (spatial locality) Inner loops should contain more | |
233 dependence relations not carried by the loop. | |
234 | |
235 3. (temporal locality) Inner loops should have smallest | |
236 array access strides. | |
237 */ | |
238 | |
239 cmp = double_int_ucmp (access_strides_i, access_strides_j); | |
240 small = cmp < 0 ? access_strides_i : access_strides_j; | |
241 large = cmp < 0 ? access_strides_j : access_strides_i; | |
242 | |
243 if (double_int_ucmp (small, l2_cache_size) > 0) | |
244 continue; | |
245 | |
246 res = cmp < 0 ? | |
247 estimated_loop_iterations (loop_j, false, &nb_iter): | |
248 estimated_loop_iterations (loop_i, false, &nb_iter); | |
249 large = double_int_mul (large, nb_iter); | |
250 | |
251 if (res && double_int_ucmp (large, l1_cache_size) < 0) | |
252 continue; | |
253 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
254 if (dependence_steps_i < dependence_steps_j |
0 | 255 || nb_deps_not_carried_by_i > nb_deps_not_carried_by_j |
256 || cmp < 0) | |
257 { | |
258 lambda_matrix_row_exchange (LTM_MATRIX (trans), | |
259 loop_depth (loop_i) - loop_depth (first_loop), | |
260 loop_depth (loop_j) - loop_depth (first_loop)); | |
261 /* Validate the resulting matrix. When the transformation | |
262 is not valid, reverse to the previous transformation. */ | |
263 if (!lambda_transform_legal_p (trans, depth, dependence_relations)) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
264 lambda_matrix_row_exchange (LTM_MATRIX (trans), |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
265 loop_depth (loop_i) - loop_depth (first_loop), |
0 | 266 loop_depth (loop_j) - loop_depth (first_loop)); |
267 } | |
268 } | |
269 | |
270 return trans; | |
271 } | |
272 | |
273 /* Return the number of nested loops in LOOP_NEST, or 0 if the loops | |
274 are not perfectly nested. */ | |
275 | |
276 unsigned int | |
277 perfect_loop_nest_depth (struct loop *loop_nest) | |
278 { | |
279 struct loop *temp; | |
280 unsigned int depth = 1; | |
281 | |
282 /* If it's not a loop nest, we don't want it. We also don't handle | |
283 sibling loops properly, which are loops of the following form: | |
284 | |
285 | for (i = 0; i < 50; i++) | |
286 | { | |
287 | for (j = 0; j < 50; j++) | |
288 | { | |
289 | ... | |
290 | } | |
291 | for (j = 0; j < 50; j++) | |
292 | { | |
293 | ... | |
294 | } | |
295 | } | |
296 */ | |
297 | |
298 if (!loop_nest->inner || !single_exit (loop_nest)) | |
299 return 0; | |
300 | |
301 for (temp = loop_nest->inner; temp; temp = temp->inner) | |
302 { | |
303 /* If we have a sibling loop or multiple exit edges, jump ship. */ | |
304 if (temp->next || !single_exit (temp)) | |
305 return 0; | |
306 | |
307 depth++; | |
308 } | |
309 | |
310 return depth; | |
311 } | |
312 | |
313 /* Perform a set of linear transforms on loops. */ | |
314 | |
315 void | |
316 linear_transform_loops (void) | |
317 { | |
318 bool modified = false; | |
319 loop_iterator li; | |
320 VEC(tree,heap) *oldivs = NULL; | |
321 VEC(tree,heap) *invariants = NULL; | |
322 VEC(tree,heap) *lambda_parameters = NULL; | |
323 VEC(gimple,heap) *remove_ivs = VEC_alloc (gimple, heap, 3); | |
324 struct loop *loop_nest; | |
325 gimple oldiv_stmt; | |
326 unsigned i; | |
327 | |
328 FOR_EACH_LOOP (li, loop_nest, 0) | |
329 { | |
330 unsigned int depth = 0; | |
331 VEC (ddr_p, heap) *dependence_relations; | |
332 VEC (data_reference_p, heap) *datarefs; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
333 |
0 | 334 lambda_loopnest before, after; |
335 lambda_trans_matrix trans; | |
336 struct obstack lambda_obstack; | |
337 struct loop *loop; | |
338 VEC(loop_p,heap) *nest; | |
339 | |
340 depth = perfect_loop_nest_depth (loop_nest); | |
341 if (depth == 0) | |
342 continue; | |
343 | |
344 nest = VEC_alloc (loop_p, heap, 3); | |
345 for (loop = loop_nest; loop; loop = loop->inner) | |
346 VEC_safe_push (loop_p, heap, nest, loop); | |
347 | |
348 gcc_obstack_init (&lambda_obstack); | |
349 VEC_truncate (tree, oldivs, 0); | |
350 VEC_truncate (tree, invariants, 0); | |
351 VEC_truncate (tree, lambda_parameters, 0); | |
352 | |
353 datarefs = VEC_alloc (data_reference_p, heap, 10); | |
354 dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10); | |
355 if (!compute_data_dependences_for_loop (loop_nest, true, &datarefs, | |
356 &dependence_relations)) | |
357 goto free_and_continue; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
358 |
0 | 359 lambda_collect_parameters (datarefs, &lambda_parameters); |
360 if (!lambda_compute_access_matrices (datarefs, lambda_parameters, nest)) | |
361 goto free_and_continue; | |
362 | |
363 if (dump_file && (dump_flags & TDF_DETAILS)) | |
364 dump_ddrs (dump_file, dependence_relations); | |
365 | |
366 /* Build the transformation matrix. */ | |
367 trans = lambda_trans_matrix_new (depth, depth); | |
368 lambda_matrix_id (LTM_MATRIX (trans), depth); | |
369 trans = try_interchange_loops (trans, depth, dependence_relations, | |
370 datarefs, loop_nest); | |
371 | |
372 if (lambda_trans_matrix_id_p (trans)) | |
373 { | |
374 if (dump_file) | |
375 fprintf (dump_file, "Won't transform loop. Optimal transform is the identity transform\n"); | |
376 goto free_and_continue; | |
377 } | |
378 | |
379 /* Check whether the transformation is legal. */ | |
380 if (!lambda_transform_legal_p (trans, depth, dependence_relations)) | |
381 { | |
382 if (dump_file) | |
383 fprintf (dump_file, "Can't transform loop, transform is illegal:\n"); | |
384 goto free_and_continue; | |
385 } | |
386 | |
387 before = gcc_loopnest_to_lambda_loopnest (loop_nest, &oldivs, | |
388 &invariants, &lambda_obstack); | |
389 | |
390 if (!before) | |
391 goto free_and_continue; | |
392 | |
393 if (dump_file) | |
394 { | |
395 fprintf (dump_file, "Before:\n"); | |
396 print_lambda_loopnest (dump_file, before, 'i'); | |
397 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
398 |
0 | 399 after = lambda_loopnest_transform (before, trans, &lambda_obstack); |
400 | |
401 if (dump_file) | |
402 { | |
403 fprintf (dump_file, "After:\n"); | |
404 print_lambda_loopnest (dump_file, after, 'u'); | |
405 } | |
406 | |
407 lambda_loopnest_to_gcc_loopnest (loop_nest, oldivs, invariants, | |
408 &remove_ivs, | |
409 after, trans, &lambda_obstack); | |
410 modified = true; | |
411 | |
412 if (dump_file) | |
413 fprintf (dump_file, "Successfully transformed loop.\n"); | |
414 | |
415 free_and_continue: | |
416 obstack_free (&lambda_obstack, NULL); | |
417 free_dependence_relations (dependence_relations); | |
418 free_data_refs (datarefs); | |
419 VEC_free (loop_p, heap, nest); | |
420 } | |
421 | |
422 for (i = 0; VEC_iterate (gimple, remove_ivs, i, oldiv_stmt); i++) | |
423 remove_iv (oldiv_stmt); | |
424 | |
425 VEC_free (tree, heap, oldivs); | |
426 VEC_free (tree, heap, invariants); | |
427 VEC_free (gimple, heap, remove_ivs); | |
428 scev_reset (); | |
429 | |
430 if (modified) | |
431 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa_full_phi); | |
432 } |