comparison gcc/tree-data-ref.c @ 0:a06113de4d67

first commit
author kent <kent@cr.ie.u-ryukyu.ac.jp>
date Fri, 17 Jul 2009 14:47:48 +0900
parents
children 77e2b8dfacca
comparison
equal deleted inserted replaced
-1:000000000000 0:a06113de4d67
1 /* Data references and dependences detectors.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
3 Free Software Foundation, Inc.
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify it under
9 the terms of the GNU General Public License as published by the Free
10 Software Foundation; either version 3, or (at your option) any later
11 version.
12
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 /* This pass walks a given loop structure searching for array
23 references. The information about the array accesses is recorded
24 in DATA_REFERENCE structures.
25
26 The basic test for determining the dependences is:
27 given two access functions chrec1 and chrec2 to a same array, and
28 x and y two vectors from the iteration domain, the same element of
29 the array is accessed twice at iterations x and y if and only if:
30 | chrec1 (x) == chrec2 (y).
31
32 The goals of this analysis are:
33
34 - to determine the independence: the relation between two
35 independent accesses is qualified with the chrec_known (this
36 information allows a loop parallelization),
37
38 - when two data references access the same data, to qualify the
39 dependence relation with classic dependence representations:
40
41 - distance vectors
42 - direction vectors
43 - loop carried level dependence
44 - polyhedron dependence
45 or with the chains of recurrences based representation,
46
47 - to define a knowledge base for storing the data dependence
48 information,
49
50 - to define an interface to access this data.
51
52
53 Definitions:
54
55 - subscript: given two array accesses a subscript is the tuple
56 composed of the access functions for a given dimension. Example:
57 Given A[f1][f2][f3] and B[g1][g2][g3], there are three subscripts:
58 (f1, g1), (f2, g2), (f3, g3).
59
60 - Diophantine equation: an equation whose coefficients and
61 solutions are integer constants, for example the equation
62 | 3*x + 2*y = 1
63 has an integer solution x = 1 and y = -1.
64
65 References:
66
67 - "Advanced Compilation for High Performance Computing" by Randy
68 Allen and Ken Kennedy.
69 http://citeseer.ist.psu.edu/goff91practical.html
70
71 - "Loop Transformations for Restructuring Compilers - The Foundations"
72 by Utpal Banerjee.
73
74
75 */
76
77 #include "config.h"
78 #include "system.h"
79 #include "coretypes.h"
80 #include "tm.h"
81 #include "ggc.h"
82 #include "tree.h"
83
84 /* These RTL headers are needed for basic-block.h. */
85 #include "rtl.h"
86 #include "basic-block.h"
87 #include "diagnostic.h"
88 #include "tree-flow.h"
89 #include "tree-dump.h"
90 #include "timevar.h"
91 #include "cfgloop.h"
92 #include "tree-data-ref.h"
93 #include "tree-scalar-evolution.h"
94 #include "tree-pass.h"
95 #include "langhooks.h"
96
97 static struct datadep_stats
98 {
99 int num_dependence_tests;
100 int num_dependence_dependent;
101 int num_dependence_independent;
102 int num_dependence_undetermined;
103
104 int num_subscript_tests;
105 int num_subscript_undetermined;
106 int num_same_subscript_function;
107
108 int num_ziv;
109 int num_ziv_independent;
110 int num_ziv_dependent;
111 int num_ziv_unimplemented;
112
113 int num_siv;
114 int num_siv_independent;
115 int num_siv_dependent;
116 int num_siv_unimplemented;
117
118 int num_miv;
119 int num_miv_independent;
120 int num_miv_dependent;
121 int num_miv_unimplemented;
122 } dependence_stats;
123
124 static bool subscript_dependence_tester_1 (struct data_dependence_relation *,
125 struct data_reference *,
126 struct data_reference *,
127 struct loop *);
128 /* Returns true iff A divides B. */
129
130 static inline bool
131 tree_fold_divides_p (const_tree a, const_tree b)
132 {
133 gcc_assert (TREE_CODE (a) == INTEGER_CST);
134 gcc_assert (TREE_CODE (b) == INTEGER_CST);
135 return integer_zerop (int_const_binop (TRUNC_MOD_EXPR, b, a, 0));
136 }
137
138 /* Returns true iff A divides B. */
139
140 static inline bool
141 int_divides_p (int a, int b)
142 {
143 return ((b % a) == 0);
144 }
145
146
147
148 /* Dump into FILE all the data references from DATAREFS. */
149
150 void
151 dump_data_references (FILE *file, VEC (data_reference_p, heap) *datarefs)
152 {
153 unsigned int i;
154 struct data_reference *dr;
155
156 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
157 dump_data_reference (file, dr);
158 }
159
160 /* Dump to STDERR all the dependence relations from DDRS. */
161
162 void
163 debug_data_dependence_relations (VEC (ddr_p, heap) *ddrs)
164 {
165 dump_data_dependence_relations (stderr, ddrs);
166 }
167
168 /* Dump into FILE all the dependence relations from DDRS. */
169
170 void
171 dump_data_dependence_relations (FILE *file,
172 VEC (ddr_p, heap) *ddrs)
173 {
174 unsigned int i;
175 struct data_dependence_relation *ddr;
176
177 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
178 dump_data_dependence_relation (file, ddr);
179 }
180
181 /* Dump function for a DATA_REFERENCE structure. */
182
183 void
184 dump_data_reference (FILE *outf,
185 struct data_reference *dr)
186 {
187 unsigned int i;
188
189 fprintf (outf, "(Data Ref: \n stmt: ");
190 print_gimple_stmt (outf, DR_STMT (dr), 0, 0);
191 fprintf (outf, " ref: ");
192 print_generic_stmt (outf, DR_REF (dr), 0);
193 fprintf (outf, " base_object: ");
194 print_generic_stmt (outf, DR_BASE_OBJECT (dr), 0);
195
196 for (i = 0; i < DR_NUM_DIMENSIONS (dr); i++)
197 {
198 fprintf (outf, " Access function %d: ", i);
199 print_generic_stmt (outf, DR_ACCESS_FN (dr, i), 0);
200 }
201 fprintf (outf, ")\n");
202 }
203
204 /* Dumps the affine function described by FN to the file OUTF. */
205
206 static void
207 dump_affine_function (FILE *outf, affine_fn fn)
208 {
209 unsigned i;
210 tree coef;
211
212 print_generic_expr (outf, VEC_index (tree, fn, 0), TDF_SLIM);
213 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
214 {
215 fprintf (outf, " + ");
216 print_generic_expr (outf, coef, TDF_SLIM);
217 fprintf (outf, " * x_%u", i);
218 }
219 }
220
221 /* Dumps the conflict function CF to the file OUTF. */
222
223 static void
224 dump_conflict_function (FILE *outf, conflict_function *cf)
225 {
226 unsigned i;
227
228 if (cf->n == NO_DEPENDENCE)
229 fprintf (outf, "no dependence\n");
230 else if (cf->n == NOT_KNOWN)
231 fprintf (outf, "not known\n");
232 else
233 {
234 for (i = 0; i < cf->n; i++)
235 {
236 fprintf (outf, "[");
237 dump_affine_function (outf, cf->fns[i]);
238 fprintf (outf, "]\n");
239 }
240 }
241 }
242
243 /* Dump function for a SUBSCRIPT structure. */
244
245 void
246 dump_subscript (FILE *outf, struct subscript *subscript)
247 {
248 conflict_function *cf = SUB_CONFLICTS_IN_A (subscript);
249
250 fprintf (outf, "\n (subscript \n");
251 fprintf (outf, " iterations_that_access_an_element_twice_in_A: ");
252 dump_conflict_function (outf, cf);
253 if (CF_NONTRIVIAL_P (cf))
254 {
255 tree last_iteration = SUB_LAST_CONFLICT (subscript);
256 fprintf (outf, " last_conflict: ");
257 print_generic_stmt (outf, last_iteration, 0);
258 }
259
260 cf = SUB_CONFLICTS_IN_B (subscript);
261 fprintf (outf, " iterations_that_access_an_element_twice_in_B: ");
262 dump_conflict_function (outf, cf);
263 if (CF_NONTRIVIAL_P (cf))
264 {
265 tree last_iteration = SUB_LAST_CONFLICT (subscript);
266 fprintf (outf, " last_conflict: ");
267 print_generic_stmt (outf, last_iteration, 0);
268 }
269
270 fprintf (outf, " (Subscript distance: ");
271 print_generic_stmt (outf, SUB_DISTANCE (subscript), 0);
272 fprintf (outf, " )\n");
273 fprintf (outf, " )\n");
274 }
275
276 /* Print the classic direction vector DIRV to OUTF. */
277
278 void
279 print_direction_vector (FILE *outf,
280 lambda_vector dirv,
281 int length)
282 {
283 int eq;
284
285 for (eq = 0; eq < length; eq++)
286 {
287 enum data_dependence_direction dir = dirv[eq];
288
289 switch (dir)
290 {
291 case dir_positive:
292 fprintf (outf, " +");
293 break;
294 case dir_negative:
295 fprintf (outf, " -");
296 break;
297 case dir_equal:
298 fprintf (outf, " =");
299 break;
300 case dir_positive_or_equal:
301 fprintf (outf, " +=");
302 break;
303 case dir_positive_or_negative:
304 fprintf (outf, " +-");
305 break;
306 case dir_negative_or_equal:
307 fprintf (outf, " -=");
308 break;
309 case dir_star:
310 fprintf (outf, " *");
311 break;
312 default:
313 fprintf (outf, "indep");
314 break;
315 }
316 }
317 fprintf (outf, "\n");
318 }
319
320 /* Print a vector of direction vectors. */
321
322 void
323 print_dir_vectors (FILE *outf, VEC (lambda_vector, heap) *dir_vects,
324 int length)
325 {
326 unsigned j;
327 lambda_vector v;
328
329 for (j = 0; VEC_iterate (lambda_vector, dir_vects, j, v); j++)
330 print_direction_vector (outf, v, length);
331 }
332
333 /* Print a vector of distance vectors. */
334
335 void
336 print_dist_vectors (FILE *outf, VEC (lambda_vector, heap) *dist_vects,
337 int length)
338 {
339 unsigned j;
340 lambda_vector v;
341
342 for (j = 0; VEC_iterate (lambda_vector, dist_vects, j, v); j++)
343 print_lambda_vector (outf, v, length);
344 }
345
346 /* Debug version. */
347
348 void
349 debug_data_dependence_relation (struct data_dependence_relation *ddr)
350 {
351 dump_data_dependence_relation (stderr, ddr);
352 }
353
354 /* Dump function for a DATA_DEPENDENCE_RELATION structure. */
355
356 void
357 dump_data_dependence_relation (FILE *outf,
358 struct data_dependence_relation *ddr)
359 {
360 struct data_reference *dra, *drb;
361
362 fprintf (outf, "(Data Dep: \n");
363
364 if (!ddr || DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
365 {
366 fprintf (outf, " (don't know)\n)\n");
367 return;
368 }
369
370 dra = DDR_A (ddr);
371 drb = DDR_B (ddr);
372 dump_data_reference (outf, dra);
373 dump_data_reference (outf, drb);
374
375 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
376 fprintf (outf, " (no dependence)\n");
377
378 else if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
379 {
380 unsigned int i;
381 struct loop *loopi;
382
383 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
384 {
385 fprintf (outf, " access_fn_A: ");
386 print_generic_stmt (outf, DR_ACCESS_FN (dra, i), 0);
387 fprintf (outf, " access_fn_B: ");
388 print_generic_stmt (outf, DR_ACCESS_FN (drb, i), 0);
389 dump_subscript (outf, DDR_SUBSCRIPT (ddr, i));
390 }
391
392 fprintf (outf, " inner loop index: %d\n", DDR_INNER_LOOP (ddr));
393 fprintf (outf, " loop nest: (");
394 for (i = 0; VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
395 fprintf (outf, "%d ", loopi->num);
396 fprintf (outf, ")\n");
397
398 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
399 {
400 fprintf (outf, " distance_vector: ");
401 print_lambda_vector (outf, DDR_DIST_VECT (ddr, i),
402 DDR_NB_LOOPS (ddr));
403 }
404
405 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
406 {
407 fprintf (outf, " direction_vector: ");
408 print_direction_vector (outf, DDR_DIR_VECT (ddr, i),
409 DDR_NB_LOOPS (ddr));
410 }
411 }
412
413 fprintf (outf, ")\n");
414 }
415
416 /* Dump function for a DATA_DEPENDENCE_DIRECTION structure. */
417
418 void
419 dump_data_dependence_direction (FILE *file,
420 enum data_dependence_direction dir)
421 {
422 switch (dir)
423 {
424 case dir_positive:
425 fprintf (file, "+");
426 break;
427
428 case dir_negative:
429 fprintf (file, "-");
430 break;
431
432 case dir_equal:
433 fprintf (file, "=");
434 break;
435
436 case dir_positive_or_negative:
437 fprintf (file, "+-");
438 break;
439
440 case dir_positive_or_equal:
441 fprintf (file, "+=");
442 break;
443
444 case dir_negative_or_equal:
445 fprintf (file, "-=");
446 break;
447
448 case dir_star:
449 fprintf (file, "*");
450 break;
451
452 default:
453 break;
454 }
455 }
456
457 /* Dumps the distance and direction vectors in FILE. DDRS contains
458 the dependence relations, and VECT_SIZE is the size of the
459 dependence vectors, or in other words the number of loops in the
460 considered nest. */
461
462 void
463 dump_dist_dir_vectors (FILE *file, VEC (ddr_p, heap) *ddrs)
464 {
465 unsigned int i, j;
466 struct data_dependence_relation *ddr;
467 lambda_vector v;
468
469 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
470 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE && DDR_AFFINE_P (ddr))
471 {
472 for (j = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), j, v); j++)
473 {
474 fprintf (file, "DISTANCE_V (");
475 print_lambda_vector (file, v, DDR_NB_LOOPS (ddr));
476 fprintf (file, ")\n");
477 }
478
479 for (j = 0; VEC_iterate (lambda_vector, DDR_DIR_VECTS (ddr), j, v); j++)
480 {
481 fprintf (file, "DIRECTION_V (");
482 print_direction_vector (file, v, DDR_NB_LOOPS (ddr));
483 fprintf (file, ")\n");
484 }
485 }
486
487 fprintf (file, "\n\n");
488 }
489
490 /* Dumps the data dependence relations DDRS in FILE. */
491
492 void
493 dump_ddrs (FILE *file, VEC (ddr_p, heap) *ddrs)
494 {
495 unsigned int i;
496 struct data_dependence_relation *ddr;
497
498 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
499 dump_data_dependence_relation (file, ddr);
500
501 fprintf (file, "\n\n");
502 }
503
504 /* Helper function for split_constant_offset. Expresses OP0 CODE OP1
505 (the type of the result is TYPE) as VAR + OFF, where OFF is a nonzero
506 constant of type ssizetype, and returns true. If we cannot do this
507 with OFF nonzero, OFF and VAR are set to NULL_TREE instead and false
508 is returned. */
509
510 static bool
511 split_constant_offset_1 (tree type, tree op0, enum tree_code code, tree op1,
512 tree *var, tree *off)
513 {
514 tree var0, var1;
515 tree off0, off1;
516 enum tree_code ocode = code;
517
518 *var = NULL_TREE;
519 *off = NULL_TREE;
520
521 switch (code)
522 {
523 case INTEGER_CST:
524 *var = build_int_cst (type, 0);
525 *off = fold_convert (ssizetype, op0);
526 return true;
527
528 case POINTER_PLUS_EXPR:
529 ocode = PLUS_EXPR;
530 /* FALLTHROUGH */
531 case PLUS_EXPR:
532 case MINUS_EXPR:
533 split_constant_offset (op0, &var0, &off0);
534 split_constant_offset (op1, &var1, &off1);
535 *var = fold_build2 (code, type, var0, var1);
536 *off = size_binop (ocode, off0, off1);
537 return true;
538
539 case MULT_EXPR:
540 if (TREE_CODE (op1) != INTEGER_CST)
541 return false;
542
543 split_constant_offset (op0, &var0, &off0);
544 *var = fold_build2 (MULT_EXPR, type, var0, op1);
545 *off = size_binop (MULT_EXPR, off0, fold_convert (ssizetype, op1));
546 return true;
547
548 case ADDR_EXPR:
549 {
550 tree base, poffset;
551 HOST_WIDE_INT pbitsize, pbitpos;
552 enum machine_mode pmode;
553 int punsignedp, pvolatilep;
554
555 op0 = TREE_OPERAND (op0, 0);
556 if (!handled_component_p (op0))
557 return false;
558
559 base = get_inner_reference (op0, &pbitsize, &pbitpos, &poffset,
560 &pmode, &punsignedp, &pvolatilep, false);
561
562 if (pbitpos % BITS_PER_UNIT != 0)
563 return false;
564 base = build_fold_addr_expr (base);
565 off0 = ssize_int (pbitpos / BITS_PER_UNIT);
566
567 if (poffset)
568 {
569 split_constant_offset (poffset, &poffset, &off1);
570 off0 = size_binop (PLUS_EXPR, off0, off1);
571 if (POINTER_TYPE_P (TREE_TYPE (base)))
572 base = fold_build2 (POINTER_PLUS_EXPR, TREE_TYPE (base),
573 base, fold_convert (sizetype, poffset));
574 else
575 base = fold_build2 (PLUS_EXPR, TREE_TYPE (base), base,
576 fold_convert (TREE_TYPE (base), poffset));
577 }
578
579 var0 = fold_convert (type, base);
580
581 /* If variable length types are involved, punt, otherwise casts
582 might be converted into ARRAY_REFs in gimplify_conversion.
583 To compute that ARRAY_REF's element size TYPE_SIZE_UNIT, which
584 possibly no longer appears in current GIMPLE, might resurface.
585 This perhaps could run
586 if (CONVERT_EXPR_P (var0))
587 {
588 gimplify_conversion (&var0);
589 // Attempt to fill in any within var0 found ARRAY_REF's
590 // element size from corresponding op embedded ARRAY_REF,
591 // if unsuccessful, just punt.
592 } */
593 while (POINTER_TYPE_P (type))
594 type = TREE_TYPE (type);
595 if (int_size_in_bytes (type) < 0)
596 return false;
597
598 *var = var0;
599 *off = off0;
600 return true;
601 }
602
603 case SSA_NAME:
604 {
605 gimple def_stmt = SSA_NAME_DEF_STMT (op0);
606 enum tree_code subcode;
607
608 if (gimple_code (def_stmt) != GIMPLE_ASSIGN)
609 return false;
610
611 var0 = gimple_assign_rhs1 (def_stmt);
612 subcode = gimple_assign_rhs_code (def_stmt);
613 var1 = gimple_assign_rhs2 (def_stmt);
614
615 return split_constant_offset_1 (type, var0, subcode, var1, var, off);
616 }
617
618 default:
619 return false;
620 }
621 }
622
623 /* Expresses EXP as VAR + OFF, where off is a constant. The type of OFF
624 will be ssizetype. */
625
626 void
627 split_constant_offset (tree exp, tree *var, tree *off)
628 {
629 tree type = TREE_TYPE (exp), otype, op0, op1, e, o;
630 enum tree_code code;
631
632 *var = exp;
633 *off = ssize_int (0);
634 STRIP_NOPS (exp);
635
636 if (automatically_generated_chrec_p (exp))
637 return;
638
639 otype = TREE_TYPE (exp);
640 code = TREE_CODE (exp);
641 extract_ops_from_tree (exp, &code, &op0, &op1);
642 if (split_constant_offset_1 (otype, op0, code, op1, &e, &o))
643 {
644 *var = fold_convert (type, e);
645 *off = o;
646 }
647 }
648
649 /* Returns the address ADDR of an object in a canonical shape (without nop
650 casts, and with type of pointer to the object). */
651
652 static tree
653 canonicalize_base_object_address (tree addr)
654 {
655 tree orig = addr;
656
657 STRIP_NOPS (addr);
658
659 /* The base address may be obtained by casting from integer, in that case
660 keep the cast. */
661 if (!POINTER_TYPE_P (TREE_TYPE (addr)))
662 return orig;
663
664 if (TREE_CODE (addr) != ADDR_EXPR)
665 return addr;
666
667 return build_fold_addr_expr (TREE_OPERAND (addr, 0));
668 }
669
670 /* Analyzes the behavior of the memory reference DR in the innermost loop that
671 contains it. Returns true if analysis succeed or false otherwise. */
672
673 bool
674 dr_analyze_innermost (struct data_reference *dr)
675 {
676 gimple stmt = DR_STMT (dr);
677 struct loop *loop = loop_containing_stmt (stmt);
678 tree ref = DR_REF (dr);
679 HOST_WIDE_INT pbitsize, pbitpos;
680 tree base, poffset;
681 enum machine_mode pmode;
682 int punsignedp, pvolatilep;
683 affine_iv base_iv, offset_iv;
684 tree init, dinit, step;
685
686 if (dump_file && (dump_flags & TDF_DETAILS))
687 fprintf (dump_file, "analyze_innermost: ");
688
689 base = get_inner_reference (ref, &pbitsize, &pbitpos, &poffset,
690 &pmode, &punsignedp, &pvolatilep, false);
691 gcc_assert (base != NULL_TREE);
692
693 if (pbitpos % BITS_PER_UNIT != 0)
694 {
695 if (dump_file && (dump_flags & TDF_DETAILS))
696 fprintf (dump_file, "failed: bit offset alignment.\n");
697 return false;
698 }
699
700 base = build_fold_addr_expr (base);
701 if (!simple_iv (loop, loop_containing_stmt (stmt), base, &base_iv, false))
702 {
703 if (dump_file && (dump_flags & TDF_DETAILS))
704 fprintf (dump_file, "failed: evolution of base is not affine.\n");
705 return false;
706 }
707 if (!poffset)
708 {
709 offset_iv.base = ssize_int (0);
710 offset_iv.step = ssize_int (0);
711 }
712 else if (!simple_iv (loop, loop_containing_stmt (stmt),
713 poffset, &offset_iv, false))
714 {
715 if (dump_file && (dump_flags & TDF_DETAILS))
716 fprintf (dump_file, "failed: evolution of offset is not affine.\n");
717 return false;
718 }
719
720 init = ssize_int (pbitpos / BITS_PER_UNIT);
721 split_constant_offset (base_iv.base, &base_iv.base, &dinit);
722 init = size_binop (PLUS_EXPR, init, dinit);
723 split_constant_offset (offset_iv.base, &offset_iv.base, &dinit);
724 init = size_binop (PLUS_EXPR, init, dinit);
725
726 step = size_binop (PLUS_EXPR,
727 fold_convert (ssizetype, base_iv.step),
728 fold_convert (ssizetype, offset_iv.step));
729
730 DR_BASE_ADDRESS (dr) = canonicalize_base_object_address (base_iv.base);
731
732 DR_OFFSET (dr) = fold_convert (ssizetype, offset_iv.base);
733 DR_INIT (dr) = init;
734 DR_STEP (dr) = step;
735
736 DR_ALIGNED_TO (dr) = size_int (highest_pow2_factor (offset_iv.base));
737
738 if (dump_file && (dump_flags & TDF_DETAILS))
739 fprintf (dump_file, "success.\n");
740
741 return true;
742 }
743
744 /* Determines the base object and the list of indices of memory reference
745 DR, analyzed in loop nest NEST. */
746
747 static void
748 dr_analyze_indices (struct data_reference *dr, struct loop *nest)
749 {
750 gimple stmt = DR_STMT (dr);
751 struct loop *loop = loop_containing_stmt (stmt);
752 VEC (tree, heap) *access_fns = NULL;
753 tree ref = unshare_expr (DR_REF (dr)), aref = ref, op;
754 tree base, off, access_fn;
755 basic_block before_loop = block_before_loop (nest);
756
757 while (handled_component_p (aref))
758 {
759 if (TREE_CODE (aref) == ARRAY_REF)
760 {
761 op = TREE_OPERAND (aref, 1);
762 access_fn = analyze_scalar_evolution (loop, op);
763 access_fn = instantiate_scev (before_loop, loop, access_fn);
764 VEC_safe_push (tree, heap, access_fns, access_fn);
765
766 TREE_OPERAND (aref, 1) = build_int_cst (TREE_TYPE (op), 0);
767 }
768
769 aref = TREE_OPERAND (aref, 0);
770 }
771
772 if (INDIRECT_REF_P (aref))
773 {
774 op = TREE_OPERAND (aref, 0);
775 access_fn = analyze_scalar_evolution (loop, op);
776 access_fn = instantiate_scev (before_loop, loop, access_fn);
777 base = initial_condition (access_fn);
778 split_constant_offset (base, &base, &off);
779 access_fn = chrec_replace_initial_condition (access_fn,
780 fold_convert (TREE_TYPE (base), off));
781
782 TREE_OPERAND (aref, 0) = base;
783 VEC_safe_push (tree, heap, access_fns, access_fn);
784 }
785
786 DR_BASE_OBJECT (dr) = ref;
787 DR_ACCESS_FNS (dr) = access_fns;
788 }
789
790 /* Extracts the alias analysis information from the memory reference DR. */
791
792 static void
793 dr_analyze_alias (struct data_reference *dr)
794 {
795 gimple stmt = DR_STMT (dr);
796 tree ref = DR_REF (dr);
797 tree base = get_base_address (ref), addr, smt = NULL_TREE;
798 ssa_op_iter it;
799 tree op;
800 bitmap vops;
801
802 if (DECL_P (base))
803 smt = base;
804 else if (INDIRECT_REF_P (base))
805 {
806 addr = TREE_OPERAND (base, 0);
807 if (TREE_CODE (addr) == SSA_NAME)
808 {
809 smt = symbol_mem_tag (SSA_NAME_VAR (addr));
810 DR_PTR_INFO (dr) = SSA_NAME_PTR_INFO (addr);
811 }
812 }
813
814 DR_SYMBOL_TAG (dr) = smt;
815
816 vops = BITMAP_ALLOC (NULL);
817 FOR_EACH_SSA_TREE_OPERAND (op, stmt, it, SSA_OP_VIRTUAL_USES)
818 {
819 bitmap_set_bit (vops, DECL_UID (SSA_NAME_VAR (op)));
820 }
821
822 DR_VOPS (dr) = vops;
823 }
824
825 /* Returns true if the address of DR is invariant. */
826
827 static bool
828 dr_address_invariant_p (struct data_reference *dr)
829 {
830 unsigned i;
831 tree idx;
832
833 for (i = 0; VEC_iterate (tree, DR_ACCESS_FNS (dr), i, idx); i++)
834 if (tree_contains_chrecs (idx, NULL))
835 return false;
836
837 return true;
838 }
839
840 /* Frees data reference DR. */
841
842 void
843 free_data_ref (data_reference_p dr)
844 {
845 BITMAP_FREE (DR_VOPS (dr));
846 VEC_free (tree, heap, DR_ACCESS_FNS (dr));
847 free (dr);
848 }
849
850 /* Analyzes memory reference MEMREF accessed in STMT. The reference
851 is read if IS_READ is true, write otherwise. Returns the
852 data_reference description of MEMREF. NEST is the outermost loop of the
853 loop nest in that the reference should be analyzed. */
854
855 struct data_reference *
856 create_data_ref (struct loop *nest, tree memref, gimple stmt, bool is_read)
857 {
858 struct data_reference *dr;
859
860 if (dump_file && (dump_flags & TDF_DETAILS))
861 {
862 fprintf (dump_file, "Creating dr for ");
863 print_generic_expr (dump_file, memref, TDF_SLIM);
864 fprintf (dump_file, "\n");
865 }
866
867 dr = XCNEW (struct data_reference);
868 DR_STMT (dr) = stmt;
869 DR_REF (dr) = memref;
870 DR_IS_READ (dr) = is_read;
871
872 dr_analyze_innermost (dr);
873 dr_analyze_indices (dr, nest);
874 dr_analyze_alias (dr);
875
876 if (dump_file && (dump_flags & TDF_DETAILS))
877 {
878 fprintf (dump_file, "\tbase_address: ");
879 print_generic_expr (dump_file, DR_BASE_ADDRESS (dr), TDF_SLIM);
880 fprintf (dump_file, "\n\toffset from base address: ");
881 print_generic_expr (dump_file, DR_OFFSET (dr), TDF_SLIM);
882 fprintf (dump_file, "\n\tconstant offset from base address: ");
883 print_generic_expr (dump_file, DR_INIT (dr), TDF_SLIM);
884 fprintf (dump_file, "\n\tstep: ");
885 print_generic_expr (dump_file, DR_STEP (dr), TDF_SLIM);
886 fprintf (dump_file, "\n\taligned to: ");
887 print_generic_expr (dump_file, DR_ALIGNED_TO (dr), TDF_SLIM);
888 fprintf (dump_file, "\n\tbase_object: ");
889 print_generic_expr (dump_file, DR_BASE_OBJECT (dr), TDF_SLIM);
890 fprintf (dump_file, "\n\tsymbol tag: ");
891 print_generic_expr (dump_file, DR_SYMBOL_TAG (dr), TDF_SLIM);
892 fprintf (dump_file, "\n");
893 }
894
895 return dr;
896 }
897
898 /* Returns true if FNA == FNB. */
899
900 static bool
901 affine_function_equal_p (affine_fn fna, affine_fn fnb)
902 {
903 unsigned i, n = VEC_length (tree, fna);
904
905 if (n != VEC_length (tree, fnb))
906 return false;
907
908 for (i = 0; i < n; i++)
909 if (!operand_equal_p (VEC_index (tree, fna, i),
910 VEC_index (tree, fnb, i), 0))
911 return false;
912
913 return true;
914 }
915
916 /* If all the functions in CF are the same, returns one of them,
917 otherwise returns NULL. */
918
919 static affine_fn
920 common_affine_function (conflict_function *cf)
921 {
922 unsigned i;
923 affine_fn comm;
924
925 if (!CF_NONTRIVIAL_P (cf))
926 return NULL;
927
928 comm = cf->fns[0];
929
930 for (i = 1; i < cf->n; i++)
931 if (!affine_function_equal_p (comm, cf->fns[i]))
932 return NULL;
933
934 return comm;
935 }
936
937 /* Returns the base of the affine function FN. */
938
939 static tree
940 affine_function_base (affine_fn fn)
941 {
942 return VEC_index (tree, fn, 0);
943 }
944
945 /* Returns true if FN is a constant. */
946
947 static bool
948 affine_function_constant_p (affine_fn fn)
949 {
950 unsigned i;
951 tree coef;
952
953 for (i = 1; VEC_iterate (tree, fn, i, coef); i++)
954 if (!integer_zerop (coef))
955 return false;
956
957 return true;
958 }
959
960 /* Returns true if FN is the zero constant function. */
961
962 static bool
963 affine_function_zero_p (affine_fn fn)
964 {
965 return (integer_zerop (affine_function_base (fn))
966 && affine_function_constant_p (fn));
967 }
968
969 /* Returns a signed integer type with the largest precision from TA
970 and TB. */
971
972 static tree
973 signed_type_for_types (tree ta, tree tb)
974 {
975 if (TYPE_PRECISION (ta) > TYPE_PRECISION (tb))
976 return signed_type_for (ta);
977 else
978 return signed_type_for (tb);
979 }
980
981 /* Applies operation OP on affine functions FNA and FNB, and returns the
982 result. */
983
984 static affine_fn
985 affine_fn_op (enum tree_code op, affine_fn fna, affine_fn fnb)
986 {
987 unsigned i, n, m;
988 affine_fn ret;
989 tree coef;
990
991 if (VEC_length (tree, fnb) > VEC_length (tree, fna))
992 {
993 n = VEC_length (tree, fna);
994 m = VEC_length (tree, fnb);
995 }
996 else
997 {
998 n = VEC_length (tree, fnb);
999 m = VEC_length (tree, fna);
1000 }
1001
1002 ret = VEC_alloc (tree, heap, m);
1003 for (i = 0; i < n; i++)
1004 {
1005 tree type = signed_type_for_types (TREE_TYPE (VEC_index (tree, fna, i)),
1006 TREE_TYPE (VEC_index (tree, fnb, i)));
1007
1008 VEC_quick_push (tree, ret,
1009 fold_build2 (op, type,
1010 VEC_index (tree, fna, i),
1011 VEC_index (tree, fnb, i)));
1012 }
1013
1014 for (; VEC_iterate (tree, fna, i, coef); i++)
1015 VEC_quick_push (tree, ret,
1016 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1017 coef, integer_zero_node));
1018 for (; VEC_iterate (tree, fnb, i, coef); i++)
1019 VEC_quick_push (tree, ret,
1020 fold_build2 (op, signed_type_for (TREE_TYPE (coef)),
1021 integer_zero_node, coef));
1022
1023 return ret;
1024 }
1025
1026 /* Returns the sum of affine functions FNA and FNB. */
1027
1028 static affine_fn
1029 affine_fn_plus (affine_fn fna, affine_fn fnb)
1030 {
1031 return affine_fn_op (PLUS_EXPR, fna, fnb);
1032 }
1033
1034 /* Returns the difference of affine functions FNA and FNB. */
1035
1036 static affine_fn
1037 affine_fn_minus (affine_fn fna, affine_fn fnb)
1038 {
1039 return affine_fn_op (MINUS_EXPR, fna, fnb);
1040 }
1041
1042 /* Frees affine function FN. */
1043
1044 static void
1045 affine_fn_free (affine_fn fn)
1046 {
1047 VEC_free (tree, heap, fn);
1048 }
1049
1050 /* Determine for each subscript in the data dependence relation DDR
1051 the distance. */
1052
1053 static void
1054 compute_subscript_distance (struct data_dependence_relation *ddr)
1055 {
1056 conflict_function *cf_a, *cf_b;
1057 affine_fn fn_a, fn_b, diff;
1058
1059 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
1060 {
1061 unsigned int i;
1062
1063 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
1064 {
1065 struct subscript *subscript;
1066
1067 subscript = DDR_SUBSCRIPT (ddr, i);
1068 cf_a = SUB_CONFLICTS_IN_A (subscript);
1069 cf_b = SUB_CONFLICTS_IN_B (subscript);
1070
1071 fn_a = common_affine_function (cf_a);
1072 fn_b = common_affine_function (cf_b);
1073 if (!fn_a || !fn_b)
1074 {
1075 SUB_DISTANCE (subscript) = chrec_dont_know;
1076 return;
1077 }
1078 diff = affine_fn_minus (fn_a, fn_b);
1079
1080 if (affine_function_constant_p (diff))
1081 SUB_DISTANCE (subscript) = affine_function_base (diff);
1082 else
1083 SUB_DISTANCE (subscript) = chrec_dont_know;
1084
1085 affine_fn_free (diff);
1086 }
1087 }
1088 }
1089
1090 /* Returns the conflict function for "unknown". */
1091
1092 static conflict_function *
1093 conflict_fn_not_known (void)
1094 {
1095 conflict_function *fn = XCNEW (conflict_function);
1096 fn->n = NOT_KNOWN;
1097
1098 return fn;
1099 }
1100
1101 /* Returns the conflict function for "independent". */
1102
1103 static conflict_function *
1104 conflict_fn_no_dependence (void)
1105 {
1106 conflict_function *fn = XCNEW (conflict_function);
1107 fn->n = NO_DEPENDENCE;
1108
1109 return fn;
1110 }
1111
1112 /* Returns true if the address of OBJ is invariant in LOOP. */
1113
1114 static bool
1115 object_address_invariant_in_loop_p (const struct loop *loop, const_tree obj)
1116 {
1117 while (handled_component_p (obj))
1118 {
1119 if (TREE_CODE (obj) == ARRAY_REF)
1120 {
1121 /* Index of the ARRAY_REF was zeroed in analyze_indices, thus we only
1122 need to check the stride and the lower bound of the reference. */
1123 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1124 loop->num)
1125 || chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 3),
1126 loop->num))
1127 return false;
1128 }
1129 else if (TREE_CODE (obj) == COMPONENT_REF)
1130 {
1131 if (chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 2),
1132 loop->num))
1133 return false;
1134 }
1135 obj = TREE_OPERAND (obj, 0);
1136 }
1137
1138 if (!INDIRECT_REF_P (obj))
1139 return true;
1140
1141 return !chrec_contains_symbols_defined_in_loop (TREE_OPERAND (obj, 0),
1142 loop->num);
1143 }
1144
1145 /* Returns true if A and B are accesses to different objects, or to different
1146 fields of the same object. */
1147
1148 static bool
1149 disjoint_objects_p (tree a, tree b)
1150 {
1151 tree base_a, base_b;
1152 VEC (tree, heap) *comp_a = NULL, *comp_b = NULL;
1153 bool ret;
1154
1155 base_a = get_base_address (a);
1156 base_b = get_base_address (b);
1157
1158 if (DECL_P (base_a)
1159 && DECL_P (base_b)
1160 && base_a != base_b)
1161 return true;
1162
1163 if (!operand_equal_p (base_a, base_b, 0))
1164 return false;
1165
1166 /* Compare the component references of A and B. We must start from the inner
1167 ones, so record them to the vector first. */
1168 while (handled_component_p (a))
1169 {
1170 VEC_safe_push (tree, heap, comp_a, a);
1171 a = TREE_OPERAND (a, 0);
1172 }
1173 while (handled_component_p (b))
1174 {
1175 VEC_safe_push (tree, heap, comp_b, b);
1176 b = TREE_OPERAND (b, 0);
1177 }
1178
1179 ret = false;
1180 while (1)
1181 {
1182 if (VEC_length (tree, comp_a) == 0
1183 || VEC_length (tree, comp_b) == 0)
1184 break;
1185
1186 a = VEC_pop (tree, comp_a);
1187 b = VEC_pop (tree, comp_b);
1188
1189 /* Real and imaginary part of a variable do not alias. */
1190 if ((TREE_CODE (a) == REALPART_EXPR
1191 && TREE_CODE (b) == IMAGPART_EXPR)
1192 || (TREE_CODE (a) == IMAGPART_EXPR
1193 && TREE_CODE (b) == REALPART_EXPR))
1194 {
1195 ret = true;
1196 break;
1197 }
1198
1199 if (TREE_CODE (a) != TREE_CODE (b))
1200 break;
1201
1202 /* Nothing to do for ARRAY_REFs, as the indices of array_refs in
1203 DR_BASE_OBJECT are always zero. */
1204 if (TREE_CODE (a) == ARRAY_REF)
1205 continue;
1206 else if (TREE_CODE (a) == COMPONENT_REF)
1207 {
1208 if (operand_equal_p (TREE_OPERAND (a, 1), TREE_OPERAND (b, 1), 0))
1209 continue;
1210
1211 /* Different fields of unions may overlap. */
1212 base_a = TREE_OPERAND (a, 0);
1213 if (TREE_CODE (TREE_TYPE (base_a)) == UNION_TYPE)
1214 break;
1215
1216 /* Different fields of structures cannot. */
1217 ret = true;
1218 break;
1219 }
1220 else
1221 break;
1222 }
1223
1224 VEC_free (tree, heap, comp_a);
1225 VEC_free (tree, heap, comp_b);
1226
1227 return ret;
1228 }
1229
1230 /* Returns false if we can prove that data references A and B do not alias,
1231 true otherwise. */
1232
1233 bool
1234 dr_may_alias_p (const struct data_reference *a, const struct data_reference *b)
1235 {
1236 const_tree addr_a = DR_BASE_ADDRESS (a);
1237 const_tree addr_b = DR_BASE_ADDRESS (b);
1238 const_tree type_a, type_b;
1239 const_tree decl_a = NULL_TREE, decl_b = NULL_TREE;
1240
1241 /* If the sets of virtual operands are disjoint, the memory references do not
1242 alias. */
1243 if (!bitmap_intersect_p (DR_VOPS (a), DR_VOPS (b)))
1244 return false;
1245
1246 /* If the accessed objects are disjoint, the memory references do not
1247 alias. */
1248 if (disjoint_objects_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b)))
1249 return false;
1250
1251 if (!addr_a || !addr_b)
1252 return true;
1253
1254 /* If the references are based on different static objects, they cannot alias
1255 (PTA should be able to disambiguate such accesses, but often it fails to,
1256 since currently we cannot distinguish between pointer and offset in pointer
1257 arithmetics). */
1258 if (TREE_CODE (addr_a) == ADDR_EXPR
1259 && TREE_CODE (addr_b) == ADDR_EXPR)
1260 return TREE_OPERAND (addr_a, 0) == TREE_OPERAND (addr_b, 0);
1261
1262 /* An instruction writing through a restricted pointer is "independent" of any
1263 instruction reading or writing through a different restricted pointer,
1264 in the same block/scope. */
1265
1266 type_a = TREE_TYPE (addr_a);
1267 type_b = TREE_TYPE (addr_b);
1268 gcc_assert (POINTER_TYPE_P (type_a) && POINTER_TYPE_P (type_b));
1269
1270 if (TREE_CODE (addr_a) == SSA_NAME)
1271 decl_a = SSA_NAME_VAR (addr_a);
1272 if (TREE_CODE (addr_b) == SSA_NAME)
1273 decl_b = SSA_NAME_VAR (addr_b);
1274
1275 if (TYPE_RESTRICT (type_a) && TYPE_RESTRICT (type_b)
1276 && (!DR_IS_READ (a) || !DR_IS_READ (b))
1277 && decl_a && DECL_P (decl_a)
1278 && decl_b && DECL_P (decl_b)
1279 && decl_a != decl_b
1280 && TREE_CODE (DECL_CONTEXT (decl_a)) == FUNCTION_DECL
1281 && DECL_CONTEXT (decl_a) == DECL_CONTEXT (decl_b))
1282 return false;
1283
1284 return true;
1285 }
1286
1287 static void compute_self_dependence (struct data_dependence_relation *);
1288
1289 /* Initialize a data dependence relation between data accesses A and
1290 B. NB_LOOPS is the number of loops surrounding the references: the
1291 size of the classic distance/direction vectors. */
1292
1293 static struct data_dependence_relation *
1294 initialize_data_dependence_relation (struct data_reference *a,
1295 struct data_reference *b,
1296 VEC (loop_p, heap) *loop_nest)
1297 {
1298 struct data_dependence_relation *res;
1299 unsigned int i;
1300
1301 res = XNEW (struct data_dependence_relation);
1302 DDR_A (res) = a;
1303 DDR_B (res) = b;
1304 DDR_LOOP_NEST (res) = NULL;
1305 DDR_REVERSED_P (res) = false;
1306 DDR_SUBSCRIPTS (res) = NULL;
1307 DDR_DIR_VECTS (res) = NULL;
1308 DDR_DIST_VECTS (res) = NULL;
1309
1310 if (a == NULL || b == NULL)
1311 {
1312 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1313 return res;
1314 }
1315
1316 /* If the data references do not alias, then they are independent. */
1317 if (!dr_may_alias_p (a, b))
1318 {
1319 DDR_ARE_DEPENDENT (res) = chrec_known;
1320 return res;
1321 }
1322
1323 /* When the references are exactly the same, don't spend time doing
1324 the data dependence tests, just initialize the ddr and return. */
1325 if (operand_equal_p (DR_REF (a), DR_REF (b), 0))
1326 {
1327 DDR_AFFINE_P (res) = true;
1328 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1329 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1330 DDR_LOOP_NEST (res) = loop_nest;
1331 DDR_INNER_LOOP (res) = 0;
1332 DDR_SELF_REFERENCE (res) = true;
1333 compute_self_dependence (res);
1334 return res;
1335 }
1336
1337 /* If the references do not access the same object, we do not know
1338 whether they alias or not. */
1339 if (!operand_equal_p (DR_BASE_OBJECT (a), DR_BASE_OBJECT (b), 0))
1340 {
1341 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1342 return res;
1343 }
1344
1345 /* If the base of the object is not invariant in the loop nest, we cannot
1346 analyze it. TODO -- in fact, it would suffice to record that there may
1347 be arbitrary dependences in the loops where the base object varies. */
1348 if (!object_address_invariant_in_loop_p (VEC_index (loop_p, loop_nest, 0),
1349 DR_BASE_OBJECT (a)))
1350 {
1351 DDR_ARE_DEPENDENT (res) = chrec_dont_know;
1352 return res;
1353 }
1354
1355 gcc_assert (DR_NUM_DIMENSIONS (a) == DR_NUM_DIMENSIONS (b));
1356
1357 DDR_AFFINE_P (res) = true;
1358 DDR_ARE_DEPENDENT (res) = NULL_TREE;
1359 DDR_SUBSCRIPTS (res) = VEC_alloc (subscript_p, heap, DR_NUM_DIMENSIONS (a));
1360 DDR_LOOP_NEST (res) = loop_nest;
1361 DDR_INNER_LOOP (res) = 0;
1362 DDR_SELF_REFERENCE (res) = false;
1363
1364 for (i = 0; i < DR_NUM_DIMENSIONS (a); i++)
1365 {
1366 struct subscript *subscript;
1367
1368 subscript = XNEW (struct subscript);
1369 SUB_CONFLICTS_IN_A (subscript) = conflict_fn_not_known ();
1370 SUB_CONFLICTS_IN_B (subscript) = conflict_fn_not_known ();
1371 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
1372 SUB_DISTANCE (subscript) = chrec_dont_know;
1373 VEC_safe_push (subscript_p, heap, DDR_SUBSCRIPTS (res), subscript);
1374 }
1375
1376 return res;
1377 }
1378
1379 /* Frees memory used by the conflict function F. */
1380
1381 static void
1382 free_conflict_function (conflict_function *f)
1383 {
1384 unsigned i;
1385
1386 if (CF_NONTRIVIAL_P (f))
1387 {
1388 for (i = 0; i < f->n; i++)
1389 affine_fn_free (f->fns[i]);
1390 }
1391 free (f);
1392 }
1393
1394 /* Frees memory used by SUBSCRIPTS. */
1395
1396 static void
1397 free_subscripts (VEC (subscript_p, heap) *subscripts)
1398 {
1399 unsigned i;
1400 subscript_p s;
1401
1402 for (i = 0; VEC_iterate (subscript_p, subscripts, i, s); i++)
1403 {
1404 free_conflict_function (s->conflicting_iterations_in_a);
1405 free_conflict_function (s->conflicting_iterations_in_b);
1406 free (s);
1407 }
1408 VEC_free (subscript_p, heap, subscripts);
1409 }
1410
1411 /* Set DDR_ARE_DEPENDENT to CHREC and finalize the subscript overlap
1412 description. */
1413
1414 static inline void
1415 finalize_ddr_dependent (struct data_dependence_relation *ddr,
1416 tree chrec)
1417 {
1418 if (dump_file && (dump_flags & TDF_DETAILS))
1419 {
1420 fprintf (dump_file, "(dependence classified: ");
1421 print_generic_expr (dump_file, chrec, 0);
1422 fprintf (dump_file, ")\n");
1423 }
1424
1425 DDR_ARE_DEPENDENT (ddr) = chrec;
1426 free_subscripts (DDR_SUBSCRIPTS (ddr));
1427 DDR_SUBSCRIPTS (ddr) = NULL;
1428 }
1429
1430 /* The dependence relation DDR cannot be represented by a distance
1431 vector. */
1432
1433 static inline void
1434 non_affine_dependence_relation (struct data_dependence_relation *ddr)
1435 {
1436 if (dump_file && (dump_flags & TDF_DETAILS))
1437 fprintf (dump_file, "(Dependence relation cannot be represented by distance vector.) \n");
1438
1439 DDR_AFFINE_P (ddr) = false;
1440 }
1441
1442
1443
1444 /* This section contains the classic Banerjee tests. */
1445
1446 /* Returns true iff CHREC_A and CHREC_B are not dependent on any index
1447 variables, i.e., if the ZIV (Zero Index Variable) test is true. */
1448
1449 static inline bool
1450 ziv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1451 {
1452 return (evolution_function_is_constant_p (chrec_a)
1453 && evolution_function_is_constant_p (chrec_b));
1454 }
1455
1456 /* Returns true iff CHREC_A and CHREC_B are dependent on an index
1457 variable, i.e., if the SIV (Single Index Variable) test is true. */
1458
1459 static bool
1460 siv_subscript_p (const_tree chrec_a, const_tree chrec_b)
1461 {
1462 if ((evolution_function_is_constant_p (chrec_a)
1463 && evolution_function_is_univariate_p (chrec_b))
1464 || (evolution_function_is_constant_p (chrec_b)
1465 && evolution_function_is_univariate_p (chrec_a)))
1466 return true;
1467
1468 if (evolution_function_is_univariate_p (chrec_a)
1469 && evolution_function_is_univariate_p (chrec_b))
1470 {
1471 switch (TREE_CODE (chrec_a))
1472 {
1473 case POLYNOMIAL_CHREC:
1474 switch (TREE_CODE (chrec_b))
1475 {
1476 case POLYNOMIAL_CHREC:
1477 if (CHREC_VARIABLE (chrec_a) != CHREC_VARIABLE (chrec_b))
1478 return false;
1479
1480 default:
1481 return true;
1482 }
1483
1484 default:
1485 return true;
1486 }
1487 }
1488
1489 return false;
1490 }
1491
1492 /* Creates a conflict function with N dimensions. The affine functions
1493 in each dimension follow. */
1494
1495 static conflict_function *
1496 conflict_fn (unsigned n, ...)
1497 {
1498 unsigned i;
1499 conflict_function *ret = XCNEW (conflict_function);
1500 va_list ap;
1501
1502 gcc_assert (0 < n && n <= MAX_DIM);
1503 va_start(ap, n);
1504
1505 ret->n = n;
1506 for (i = 0; i < n; i++)
1507 ret->fns[i] = va_arg (ap, affine_fn);
1508 va_end(ap);
1509
1510 return ret;
1511 }
1512
1513 /* Returns constant affine function with value CST. */
1514
1515 static affine_fn
1516 affine_fn_cst (tree cst)
1517 {
1518 affine_fn fn = VEC_alloc (tree, heap, 1);
1519 VEC_quick_push (tree, fn, cst);
1520 return fn;
1521 }
1522
1523 /* Returns affine function with single variable, CST + COEF * x_DIM. */
1524
1525 static affine_fn
1526 affine_fn_univar (tree cst, unsigned dim, tree coef)
1527 {
1528 affine_fn fn = VEC_alloc (tree, heap, dim + 1);
1529 unsigned i;
1530
1531 gcc_assert (dim > 0);
1532 VEC_quick_push (tree, fn, cst);
1533 for (i = 1; i < dim; i++)
1534 VEC_quick_push (tree, fn, integer_zero_node);
1535 VEC_quick_push (tree, fn, coef);
1536 return fn;
1537 }
1538
1539 /* Analyze a ZIV (Zero Index Variable) subscript. *OVERLAPS_A and
1540 *OVERLAPS_B are initialized to the functions that describe the
1541 relation between the elements accessed twice by CHREC_A and
1542 CHREC_B. For k >= 0, the following property is verified:
1543
1544 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1545
1546 static void
1547 analyze_ziv_subscript (tree chrec_a,
1548 tree chrec_b,
1549 conflict_function **overlaps_a,
1550 conflict_function **overlaps_b,
1551 tree *last_conflicts)
1552 {
1553 tree type, difference;
1554 dependence_stats.num_ziv++;
1555
1556 if (dump_file && (dump_flags & TDF_DETAILS))
1557 fprintf (dump_file, "(analyze_ziv_subscript \n");
1558
1559 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1560 chrec_a = chrec_convert (type, chrec_a, NULL);
1561 chrec_b = chrec_convert (type, chrec_b, NULL);
1562 difference = chrec_fold_minus (type, chrec_a, chrec_b);
1563
1564 switch (TREE_CODE (difference))
1565 {
1566 case INTEGER_CST:
1567 if (integer_zerop (difference))
1568 {
1569 /* The difference is equal to zero: the accessed index
1570 overlaps for each iteration in the loop. */
1571 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1572 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
1573 *last_conflicts = chrec_dont_know;
1574 dependence_stats.num_ziv_dependent++;
1575 }
1576 else
1577 {
1578 /* The accesses do not overlap. */
1579 *overlaps_a = conflict_fn_no_dependence ();
1580 *overlaps_b = conflict_fn_no_dependence ();
1581 *last_conflicts = integer_zero_node;
1582 dependence_stats.num_ziv_independent++;
1583 }
1584 break;
1585
1586 default:
1587 /* We're not sure whether the indexes overlap. For the moment,
1588 conservatively answer "don't know". */
1589 if (dump_file && (dump_flags & TDF_DETAILS))
1590 fprintf (dump_file, "ziv test failed: difference is non-integer.\n");
1591
1592 *overlaps_a = conflict_fn_not_known ();
1593 *overlaps_b = conflict_fn_not_known ();
1594 *last_conflicts = chrec_dont_know;
1595 dependence_stats.num_ziv_unimplemented++;
1596 break;
1597 }
1598
1599 if (dump_file && (dump_flags & TDF_DETAILS))
1600 fprintf (dump_file, ")\n");
1601 }
1602
1603 /* Sets NIT to the estimated number of executions of the statements in
1604 LOOP. If CONSERVATIVE is true, we must be sure that NIT is at least as
1605 large as the number of iterations. If we have no reliable estimate,
1606 the function returns false, otherwise returns true. */
1607
1608 bool
1609 estimated_loop_iterations (struct loop *loop, bool conservative,
1610 double_int *nit)
1611 {
1612 estimate_numbers_of_iterations_loop (loop);
1613 if (conservative)
1614 {
1615 if (!loop->any_upper_bound)
1616 return false;
1617
1618 *nit = loop->nb_iterations_upper_bound;
1619 }
1620 else
1621 {
1622 if (!loop->any_estimate)
1623 return false;
1624
1625 *nit = loop->nb_iterations_estimate;
1626 }
1627
1628 return true;
1629 }
1630
1631 /* Similar to estimated_loop_iterations, but returns the estimate only
1632 if it fits to HOST_WIDE_INT. If this is not the case, or the estimate
1633 on the number of iterations of LOOP could not be derived, returns -1. */
1634
1635 HOST_WIDE_INT
1636 estimated_loop_iterations_int (struct loop *loop, bool conservative)
1637 {
1638 double_int nit;
1639 HOST_WIDE_INT hwi_nit;
1640
1641 if (!estimated_loop_iterations (loop, conservative, &nit))
1642 return -1;
1643
1644 if (!double_int_fits_in_shwi_p (nit))
1645 return -1;
1646 hwi_nit = double_int_to_shwi (nit);
1647
1648 return hwi_nit < 0 ? -1 : hwi_nit;
1649 }
1650
1651 /* Similar to estimated_loop_iterations, but returns the estimate as a tree,
1652 and only if it fits to the int type. If this is not the case, or the
1653 estimate on the number of iterations of LOOP could not be derived, returns
1654 chrec_dont_know. */
1655
1656 static tree
1657 estimated_loop_iterations_tree (struct loop *loop, bool conservative)
1658 {
1659 double_int nit;
1660 tree type;
1661
1662 if (!estimated_loop_iterations (loop, conservative, &nit))
1663 return chrec_dont_know;
1664
1665 type = lang_hooks.types.type_for_size (INT_TYPE_SIZE, true);
1666 if (!double_int_fits_to_tree_p (type, nit))
1667 return chrec_dont_know;
1668
1669 return double_int_to_tree (type, nit);
1670 }
1671
1672 /* Analyze a SIV (Single Index Variable) subscript where CHREC_A is a
1673 constant, and CHREC_B is an affine function. *OVERLAPS_A and
1674 *OVERLAPS_B are initialized to the functions that describe the
1675 relation between the elements accessed twice by CHREC_A and
1676 CHREC_B. For k >= 0, the following property is verified:
1677
1678 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
1679
1680 static void
1681 analyze_siv_subscript_cst_affine (tree chrec_a,
1682 tree chrec_b,
1683 conflict_function **overlaps_a,
1684 conflict_function **overlaps_b,
1685 tree *last_conflicts)
1686 {
1687 bool value0, value1, value2;
1688 tree type, difference, tmp;
1689
1690 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
1691 chrec_a = chrec_convert (type, chrec_a, NULL);
1692 chrec_b = chrec_convert (type, chrec_b, NULL);
1693 difference = chrec_fold_minus (type, initial_condition (chrec_b), chrec_a);
1694
1695 if (!chrec_is_positive (initial_condition (difference), &value0))
1696 {
1697 if (dump_file && (dump_flags & TDF_DETAILS))
1698 fprintf (dump_file, "siv test failed: chrec is not positive.\n");
1699
1700 dependence_stats.num_siv_unimplemented++;
1701 *overlaps_a = conflict_fn_not_known ();
1702 *overlaps_b = conflict_fn_not_known ();
1703 *last_conflicts = chrec_dont_know;
1704 return;
1705 }
1706 else
1707 {
1708 if (value0 == false)
1709 {
1710 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value1))
1711 {
1712 if (dump_file && (dump_flags & TDF_DETAILS))
1713 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1714
1715 *overlaps_a = conflict_fn_not_known ();
1716 *overlaps_b = conflict_fn_not_known ();
1717 *last_conflicts = chrec_dont_know;
1718 dependence_stats.num_siv_unimplemented++;
1719 return;
1720 }
1721 else
1722 {
1723 if (value1 == true)
1724 {
1725 /* Example:
1726 chrec_a = 12
1727 chrec_b = {10, +, 1}
1728 */
1729
1730 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1731 {
1732 HOST_WIDE_INT numiter;
1733 struct loop *loop = get_chrec_loop (chrec_b);
1734
1735 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1736 tmp = fold_build2 (EXACT_DIV_EXPR, type,
1737 fold_build1 (ABS_EXPR, type, difference),
1738 CHREC_RIGHT (chrec_b));
1739 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1740 *last_conflicts = integer_one_node;
1741
1742
1743 /* Perform weak-zero siv test to see if overlap is
1744 outside the loop bounds. */
1745 numiter = estimated_loop_iterations_int (loop, false);
1746
1747 if (numiter >= 0
1748 && compare_tree_int (tmp, numiter) > 0)
1749 {
1750 free_conflict_function (*overlaps_a);
1751 free_conflict_function (*overlaps_b);
1752 *overlaps_a = conflict_fn_no_dependence ();
1753 *overlaps_b = conflict_fn_no_dependence ();
1754 *last_conflicts = integer_zero_node;
1755 dependence_stats.num_siv_independent++;
1756 return;
1757 }
1758 dependence_stats.num_siv_dependent++;
1759 return;
1760 }
1761
1762 /* When the step does not divide the difference, there are
1763 no overlaps. */
1764 else
1765 {
1766 *overlaps_a = conflict_fn_no_dependence ();
1767 *overlaps_b = conflict_fn_no_dependence ();
1768 *last_conflicts = integer_zero_node;
1769 dependence_stats.num_siv_independent++;
1770 return;
1771 }
1772 }
1773
1774 else
1775 {
1776 /* Example:
1777 chrec_a = 12
1778 chrec_b = {10, +, -1}
1779
1780 In this case, chrec_a will not overlap with chrec_b. */
1781 *overlaps_a = conflict_fn_no_dependence ();
1782 *overlaps_b = conflict_fn_no_dependence ();
1783 *last_conflicts = integer_zero_node;
1784 dependence_stats.num_siv_independent++;
1785 return;
1786 }
1787 }
1788 }
1789 else
1790 {
1791 if (!chrec_is_positive (CHREC_RIGHT (chrec_b), &value2))
1792 {
1793 if (dump_file && (dump_flags & TDF_DETAILS))
1794 fprintf (dump_file, "siv test failed: chrec not positive.\n");
1795
1796 *overlaps_a = conflict_fn_not_known ();
1797 *overlaps_b = conflict_fn_not_known ();
1798 *last_conflicts = chrec_dont_know;
1799 dependence_stats.num_siv_unimplemented++;
1800 return;
1801 }
1802 else
1803 {
1804 if (value2 == false)
1805 {
1806 /* Example:
1807 chrec_a = 3
1808 chrec_b = {10, +, -1}
1809 */
1810 if (tree_fold_divides_p (CHREC_RIGHT (chrec_b), difference))
1811 {
1812 HOST_WIDE_INT numiter;
1813 struct loop *loop = get_chrec_loop (chrec_b);
1814
1815 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
1816 tmp = fold_build2 (EXACT_DIV_EXPR, type, difference,
1817 CHREC_RIGHT (chrec_b));
1818 *overlaps_b = conflict_fn (1, affine_fn_cst (tmp));
1819 *last_conflicts = integer_one_node;
1820
1821 /* Perform weak-zero siv test to see if overlap is
1822 outside the loop bounds. */
1823 numiter = estimated_loop_iterations_int (loop, false);
1824
1825 if (numiter >= 0
1826 && compare_tree_int (tmp, numiter) > 0)
1827 {
1828 free_conflict_function (*overlaps_a);
1829 free_conflict_function (*overlaps_b);
1830 *overlaps_a = conflict_fn_no_dependence ();
1831 *overlaps_b = conflict_fn_no_dependence ();
1832 *last_conflicts = integer_zero_node;
1833 dependence_stats.num_siv_independent++;
1834 return;
1835 }
1836 dependence_stats.num_siv_dependent++;
1837 return;
1838 }
1839
1840 /* When the step does not divide the difference, there
1841 are no overlaps. */
1842 else
1843 {
1844 *overlaps_a = conflict_fn_no_dependence ();
1845 *overlaps_b = conflict_fn_no_dependence ();
1846 *last_conflicts = integer_zero_node;
1847 dependence_stats.num_siv_independent++;
1848 return;
1849 }
1850 }
1851 else
1852 {
1853 /* Example:
1854 chrec_a = 3
1855 chrec_b = {4, +, 1}
1856
1857 In this case, chrec_a will not overlap with chrec_b. */
1858 *overlaps_a = conflict_fn_no_dependence ();
1859 *overlaps_b = conflict_fn_no_dependence ();
1860 *last_conflicts = integer_zero_node;
1861 dependence_stats.num_siv_independent++;
1862 return;
1863 }
1864 }
1865 }
1866 }
1867 }
1868
1869 /* Helper recursive function for initializing the matrix A. Returns
1870 the initial value of CHREC. */
1871
1872 static tree
1873 initialize_matrix_A (lambda_matrix A, tree chrec, unsigned index, int mult)
1874 {
1875 gcc_assert (chrec);
1876
1877 switch (TREE_CODE (chrec))
1878 {
1879 case POLYNOMIAL_CHREC:
1880 gcc_assert (TREE_CODE (CHREC_RIGHT (chrec)) == INTEGER_CST);
1881
1882 A[index][0] = mult * int_cst_value (CHREC_RIGHT (chrec));
1883 return initialize_matrix_A (A, CHREC_LEFT (chrec), index + 1, mult);
1884
1885 case PLUS_EXPR:
1886 case MULT_EXPR:
1887 case MINUS_EXPR:
1888 {
1889 tree op0 = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1890 tree op1 = initialize_matrix_A (A, TREE_OPERAND (chrec, 1), index, mult);
1891
1892 return chrec_fold_op (TREE_CODE (chrec), chrec_type (chrec), op0, op1);
1893 }
1894
1895 case NOP_EXPR:
1896 {
1897 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1898 return chrec_convert (chrec_type (chrec), op, NULL);
1899 }
1900
1901 case BIT_NOT_EXPR:
1902 {
1903 /* Handle ~X as -1 - X. */
1904 tree op = initialize_matrix_A (A, TREE_OPERAND (chrec, 0), index, mult);
1905 return chrec_fold_op (MINUS_EXPR, chrec_type (chrec),
1906 build_int_cst (TREE_TYPE (chrec), -1), op);
1907 }
1908
1909 case INTEGER_CST:
1910 return chrec;
1911
1912 default:
1913 gcc_unreachable ();
1914 return NULL_TREE;
1915 }
1916 }
1917
1918 #define FLOOR_DIV(x,y) ((x) / (y))
1919
1920 /* Solves the special case of the Diophantine equation:
1921 | {0, +, STEP_A}_x (OVERLAPS_A) = {0, +, STEP_B}_y (OVERLAPS_B)
1922
1923 Computes the descriptions OVERLAPS_A and OVERLAPS_B. NITER is the
1924 number of iterations that loops X and Y run. The overlaps will be
1925 constructed as evolutions in dimension DIM. */
1926
1927 static void
1928 compute_overlap_steps_for_affine_univar (int niter, int step_a, int step_b,
1929 affine_fn *overlaps_a,
1930 affine_fn *overlaps_b,
1931 tree *last_conflicts, int dim)
1932 {
1933 if (((step_a > 0 && step_b > 0)
1934 || (step_a < 0 && step_b < 0)))
1935 {
1936 int step_overlaps_a, step_overlaps_b;
1937 int gcd_steps_a_b, last_conflict, tau2;
1938
1939 gcd_steps_a_b = gcd (step_a, step_b);
1940 step_overlaps_a = step_b / gcd_steps_a_b;
1941 step_overlaps_b = step_a / gcd_steps_a_b;
1942
1943 if (niter > 0)
1944 {
1945 tau2 = FLOOR_DIV (niter, step_overlaps_a);
1946 tau2 = MIN (tau2, FLOOR_DIV (niter, step_overlaps_b));
1947 last_conflict = tau2;
1948 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
1949 }
1950 else
1951 *last_conflicts = chrec_dont_know;
1952
1953 *overlaps_a = affine_fn_univar (integer_zero_node, dim,
1954 build_int_cst (NULL_TREE,
1955 step_overlaps_a));
1956 *overlaps_b = affine_fn_univar (integer_zero_node, dim,
1957 build_int_cst (NULL_TREE,
1958 step_overlaps_b));
1959 }
1960
1961 else
1962 {
1963 *overlaps_a = affine_fn_cst (integer_zero_node);
1964 *overlaps_b = affine_fn_cst (integer_zero_node);
1965 *last_conflicts = integer_zero_node;
1966 }
1967 }
1968
1969 /* Solves the special case of a Diophantine equation where CHREC_A is
1970 an affine bivariate function, and CHREC_B is an affine univariate
1971 function. For example,
1972
1973 | {{0, +, 1}_x, +, 1335}_y = {0, +, 1336}_z
1974
1975 has the following overlapping functions:
1976
1977 | x (t, u, v) = {{0, +, 1336}_t, +, 1}_v
1978 | y (t, u, v) = {{0, +, 1336}_u, +, 1}_v
1979 | z (t, u, v) = {{{0, +, 1}_t, +, 1335}_u, +, 1}_v
1980
1981 FORNOW: This is a specialized implementation for a case occurring in
1982 a common benchmark. Implement the general algorithm. */
1983
1984 static void
1985 compute_overlap_steps_for_affine_1_2 (tree chrec_a, tree chrec_b,
1986 conflict_function **overlaps_a,
1987 conflict_function **overlaps_b,
1988 tree *last_conflicts)
1989 {
1990 bool xz_p, yz_p, xyz_p;
1991 int step_x, step_y, step_z;
1992 HOST_WIDE_INT niter_x, niter_y, niter_z, niter;
1993 affine_fn overlaps_a_xz, overlaps_b_xz;
1994 affine_fn overlaps_a_yz, overlaps_b_yz;
1995 affine_fn overlaps_a_xyz, overlaps_b_xyz;
1996 affine_fn ova1, ova2, ovb;
1997 tree last_conflicts_xz, last_conflicts_yz, last_conflicts_xyz;
1998
1999 step_x = int_cst_value (CHREC_RIGHT (CHREC_LEFT (chrec_a)));
2000 step_y = int_cst_value (CHREC_RIGHT (chrec_a));
2001 step_z = int_cst_value (CHREC_RIGHT (chrec_b));
2002
2003 niter_x =
2004 estimated_loop_iterations_int (get_chrec_loop (CHREC_LEFT (chrec_a)),
2005 false);
2006 niter_y = estimated_loop_iterations_int (get_chrec_loop (chrec_a), false);
2007 niter_z = estimated_loop_iterations_int (get_chrec_loop (chrec_b), false);
2008
2009 if (niter_x < 0 || niter_y < 0 || niter_z < 0)
2010 {
2011 if (dump_file && (dump_flags & TDF_DETAILS))
2012 fprintf (dump_file, "overlap steps test failed: no iteration counts.\n");
2013
2014 *overlaps_a = conflict_fn_not_known ();
2015 *overlaps_b = conflict_fn_not_known ();
2016 *last_conflicts = chrec_dont_know;
2017 return;
2018 }
2019
2020 niter = MIN (niter_x, niter_z);
2021 compute_overlap_steps_for_affine_univar (niter, step_x, step_z,
2022 &overlaps_a_xz,
2023 &overlaps_b_xz,
2024 &last_conflicts_xz, 1);
2025 niter = MIN (niter_y, niter_z);
2026 compute_overlap_steps_for_affine_univar (niter, step_y, step_z,
2027 &overlaps_a_yz,
2028 &overlaps_b_yz,
2029 &last_conflicts_yz, 2);
2030 niter = MIN (niter_x, niter_z);
2031 niter = MIN (niter_y, niter);
2032 compute_overlap_steps_for_affine_univar (niter, step_x + step_y, step_z,
2033 &overlaps_a_xyz,
2034 &overlaps_b_xyz,
2035 &last_conflicts_xyz, 3);
2036
2037 xz_p = !integer_zerop (last_conflicts_xz);
2038 yz_p = !integer_zerop (last_conflicts_yz);
2039 xyz_p = !integer_zerop (last_conflicts_xyz);
2040
2041 if (xz_p || yz_p || xyz_p)
2042 {
2043 ova1 = affine_fn_cst (integer_zero_node);
2044 ova2 = affine_fn_cst (integer_zero_node);
2045 ovb = affine_fn_cst (integer_zero_node);
2046 if (xz_p)
2047 {
2048 affine_fn t0 = ova1;
2049 affine_fn t2 = ovb;
2050
2051 ova1 = affine_fn_plus (ova1, overlaps_a_xz);
2052 ovb = affine_fn_plus (ovb, overlaps_b_xz);
2053 affine_fn_free (t0);
2054 affine_fn_free (t2);
2055 *last_conflicts = last_conflicts_xz;
2056 }
2057 if (yz_p)
2058 {
2059 affine_fn t0 = ova2;
2060 affine_fn t2 = ovb;
2061
2062 ova2 = affine_fn_plus (ova2, overlaps_a_yz);
2063 ovb = affine_fn_plus (ovb, overlaps_b_yz);
2064 affine_fn_free (t0);
2065 affine_fn_free (t2);
2066 *last_conflicts = last_conflicts_yz;
2067 }
2068 if (xyz_p)
2069 {
2070 affine_fn t0 = ova1;
2071 affine_fn t2 = ova2;
2072 affine_fn t4 = ovb;
2073
2074 ova1 = affine_fn_plus (ova1, overlaps_a_xyz);
2075 ova2 = affine_fn_plus (ova2, overlaps_a_xyz);
2076 ovb = affine_fn_plus (ovb, overlaps_b_xyz);
2077 affine_fn_free (t0);
2078 affine_fn_free (t2);
2079 affine_fn_free (t4);
2080 *last_conflicts = last_conflicts_xyz;
2081 }
2082 *overlaps_a = conflict_fn (2, ova1, ova2);
2083 *overlaps_b = conflict_fn (1, ovb);
2084 }
2085 else
2086 {
2087 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2088 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2089 *last_conflicts = integer_zero_node;
2090 }
2091
2092 affine_fn_free (overlaps_a_xz);
2093 affine_fn_free (overlaps_b_xz);
2094 affine_fn_free (overlaps_a_yz);
2095 affine_fn_free (overlaps_b_yz);
2096 affine_fn_free (overlaps_a_xyz);
2097 affine_fn_free (overlaps_b_xyz);
2098 }
2099
2100 /* Determines the overlapping elements due to accesses CHREC_A and
2101 CHREC_B, that are affine functions. This function cannot handle
2102 symbolic evolution functions, ie. when initial conditions are
2103 parameters, because it uses lambda matrices of integers. */
2104
2105 static void
2106 analyze_subscript_affine_affine (tree chrec_a,
2107 tree chrec_b,
2108 conflict_function **overlaps_a,
2109 conflict_function **overlaps_b,
2110 tree *last_conflicts)
2111 {
2112 unsigned nb_vars_a, nb_vars_b, dim;
2113 HOST_WIDE_INT init_a, init_b, gamma, gcd_alpha_beta;
2114 lambda_matrix A, U, S;
2115
2116 if (eq_evolutions_p (chrec_a, chrec_b))
2117 {
2118 /* The accessed index overlaps for each iteration in the
2119 loop. */
2120 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2121 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2122 *last_conflicts = chrec_dont_know;
2123 return;
2124 }
2125 if (dump_file && (dump_flags & TDF_DETAILS))
2126 fprintf (dump_file, "(analyze_subscript_affine_affine \n");
2127
2128 /* For determining the initial intersection, we have to solve a
2129 Diophantine equation. This is the most time consuming part.
2130
2131 For answering to the question: "Is there a dependence?" we have
2132 to prove that there exists a solution to the Diophantine
2133 equation, and that the solution is in the iteration domain,
2134 i.e. the solution is positive or zero, and that the solution
2135 happens before the upper bound loop.nb_iterations. Otherwise
2136 there is no dependence. This function outputs a description of
2137 the iterations that hold the intersections. */
2138
2139 nb_vars_a = nb_vars_in_chrec (chrec_a);
2140 nb_vars_b = nb_vars_in_chrec (chrec_b);
2141
2142 dim = nb_vars_a + nb_vars_b;
2143 U = lambda_matrix_new (dim, dim);
2144 A = lambda_matrix_new (dim, 1);
2145 S = lambda_matrix_new (dim, 1);
2146
2147 init_a = int_cst_value (initialize_matrix_A (A, chrec_a, 0, 1));
2148 init_b = int_cst_value (initialize_matrix_A (A, chrec_b, nb_vars_a, -1));
2149 gamma = init_b - init_a;
2150
2151 /* Don't do all the hard work of solving the Diophantine equation
2152 when we already know the solution: for example,
2153 | {3, +, 1}_1
2154 | {3, +, 4}_2
2155 | gamma = 3 - 3 = 0.
2156 Then the first overlap occurs during the first iterations:
2157 | {3, +, 1}_1 ({0, +, 4}_x) = {3, +, 4}_2 ({0, +, 1}_x)
2158 */
2159 if (gamma == 0)
2160 {
2161 if (nb_vars_a == 1 && nb_vars_b == 1)
2162 {
2163 HOST_WIDE_INT step_a, step_b;
2164 HOST_WIDE_INT niter, niter_a, niter_b;
2165 affine_fn ova, ovb;
2166
2167 niter_a = estimated_loop_iterations_int (get_chrec_loop (chrec_a),
2168 false);
2169 niter_b = estimated_loop_iterations_int (get_chrec_loop (chrec_b),
2170 false);
2171 niter = MIN (niter_a, niter_b);
2172 step_a = int_cst_value (CHREC_RIGHT (chrec_a));
2173 step_b = int_cst_value (CHREC_RIGHT (chrec_b));
2174
2175 compute_overlap_steps_for_affine_univar (niter, step_a, step_b,
2176 &ova, &ovb,
2177 last_conflicts, 1);
2178 *overlaps_a = conflict_fn (1, ova);
2179 *overlaps_b = conflict_fn (1, ovb);
2180 }
2181
2182 else if (nb_vars_a == 2 && nb_vars_b == 1)
2183 compute_overlap_steps_for_affine_1_2
2184 (chrec_a, chrec_b, overlaps_a, overlaps_b, last_conflicts);
2185
2186 else if (nb_vars_a == 1 && nb_vars_b == 2)
2187 compute_overlap_steps_for_affine_1_2
2188 (chrec_b, chrec_a, overlaps_b, overlaps_a, last_conflicts);
2189
2190 else
2191 {
2192 if (dump_file && (dump_flags & TDF_DETAILS))
2193 fprintf (dump_file, "affine-affine test failed: too many variables.\n");
2194 *overlaps_a = conflict_fn_not_known ();
2195 *overlaps_b = conflict_fn_not_known ();
2196 *last_conflicts = chrec_dont_know;
2197 }
2198 goto end_analyze_subs_aa;
2199 }
2200
2201 /* U.A = S */
2202 lambda_matrix_right_hermite (A, dim, 1, S, U);
2203
2204 if (S[0][0] < 0)
2205 {
2206 S[0][0] *= -1;
2207 lambda_matrix_row_negate (U, dim, 0);
2208 }
2209 gcd_alpha_beta = S[0][0];
2210
2211 /* Something went wrong: for example in {1, +, 0}_5 vs. {0, +, 0}_5,
2212 but that is a quite strange case. Instead of ICEing, answer
2213 don't know. */
2214 if (gcd_alpha_beta == 0)
2215 {
2216 *overlaps_a = conflict_fn_not_known ();
2217 *overlaps_b = conflict_fn_not_known ();
2218 *last_conflicts = chrec_dont_know;
2219 goto end_analyze_subs_aa;
2220 }
2221
2222 /* The classic "gcd-test". */
2223 if (!int_divides_p (gcd_alpha_beta, gamma))
2224 {
2225 /* The "gcd-test" has determined that there is no integer
2226 solution, i.e. there is no dependence. */
2227 *overlaps_a = conflict_fn_no_dependence ();
2228 *overlaps_b = conflict_fn_no_dependence ();
2229 *last_conflicts = integer_zero_node;
2230 }
2231
2232 /* Both access functions are univariate. This includes SIV and MIV cases. */
2233 else if (nb_vars_a == 1 && nb_vars_b == 1)
2234 {
2235 /* Both functions should have the same evolution sign. */
2236 if (((A[0][0] > 0 && -A[1][0] > 0)
2237 || (A[0][0] < 0 && -A[1][0] < 0)))
2238 {
2239 /* The solutions are given by:
2240 |
2241 | [GAMMA/GCD_ALPHA_BETA t].[u11 u12] = [x0]
2242 | [u21 u22] [y0]
2243
2244 For a given integer t. Using the following variables,
2245
2246 | i0 = u11 * gamma / gcd_alpha_beta
2247 | j0 = u12 * gamma / gcd_alpha_beta
2248 | i1 = u21
2249 | j1 = u22
2250
2251 the solutions are:
2252
2253 | x0 = i0 + i1 * t,
2254 | y0 = j0 + j1 * t. */
2255 HOST_WIDE_INT i0, j0, i1, j1;
2256
2257 i0 = U[0][0] * gamma / gcd_alpha_beta;
2258 j0 = U[0][1] * gamma / gcd_alpha_beta;
2259 i1 = U[1][0];
2260 j1 = U[1][1];
2261
2262 if ((i1 == 0 && i0 < 0)
2263 || (j1 == 0 && j0 < 0))
2264 {
2265 /* There is no solution.
2266 FIXME: The case "i0 > nb_iterations, j0 > nb_iterations"
2267 falls in here, but for the moment we don't look at the
2268 upper bound of the iteration domain. */
2269 *overlaps_a = conflict_fn_no_dependence ();
2270 *overlaps_b = conflict_fn_no_dependence ();
2271 *last_conflicts = integer_zero_node;
2272 goto end_analyze_subs_aa;
2273 }
2274
2275 if (i1 > 0 && j1 > 0)
2276 {
2277 HOST_WIDE_INT niter_a = estimated_loop_iterations_int
2278 (get_chrec_loop (chrec_a), false);
2279 HOST_WIDE_INT niter_b = estimated_loop_iterations_int
2280 (get_chrec_loop (chrec_b), false);
2281 HOST_WIDE_INT niter = MIN (niter_a, niter_b);
2282
2283 /* (X0, Y0) is a solution of the Diophantine equation:
2284 "chrec_a (X0) = chrec_b (Y0)". */
2285 HOST_WIDE_INT tau1 = MAX (CEIL (-i0, i1),
2286 CEIL (-j0, j1));
2287 HOST_WIDE_INT x0 = i1 * tau1 + i0;
2288 HOST_WIDE_INT y0 = j1 * tau1 + j0;
2289
2290 /* (X1, Y1) is the smallest positive solution of the eq
2291 "chrec_a (X1) = chrec_b (Y1)", i.e. this is where the
2292 first conflict occurs. */
2293 HOST_WIDE_INT min_multiple = MIN (x0 / i1, y0 / j1);
2294 HOST_WIDE_INT x1 = x0 - i1 * min_multiple;
2295 HOST_WIDE_INT y1 = y0 - j1 * min_multiple;
2296
2297 if (niter > 0)
2298 {
2299 HOST_WIDE_INT tau2 = MIN (FLOOR_DIV (niter - i0, i1),
2300 FLOOR_DIV (niter - j0, j1));
2301 HOST_WIDE_INT last_conflict = tau2 - (x1 - i0)/i1;
2302
2303 /* If the overlap occurs outside of the bounds of the
2304 loop, there is no dependence. */
2305 if (x1 >= niter || y1 >= niter)
2306 {
2307 *overlaps_a = conflict_fn_no_dependence ();
2308 *overlaps_b = conflict_fn_no_dependence ();
2309 *last_conflicts = integer_zero_node;
2310 goto end_analyze_subs_aa;
2311 }
2312 else
2313 *last_conflicts = build_int_cst (NULL_TREE, last_conflict);
2314 }
2315 else
2316 *last_conflicts = chrec_dont_know;
2317
2318 *overlaps_a
2319 = conflict_fn (1,
2320 affine_fn_univar (build_int_cst (NULL_TREE, x1),
2321 1,
2322 build_int_cst (NULL_TREE, i1)));
2323 *overlaps_b
2324 = conflict_fn (1,
2325 affine_fn_univar (build_int_cst (NULL_TREE, y1),
2326 1,
2327 build_int_cst (NULL_TREE, j1)));
2328 }
2329 else
2330 {
2331 /* FIXME: For the moment, the upper bound of the
2332 iteration domain for i and j is not checked. */
2333 if (dump_file && (dump_flags & TDF_DETAILS))
2334 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2335 *overlaps_a = conflict_fn_not_known ();
2336 *overlaps_b = conflict_fn_not_known ();
2337 *last_conflicts = chrec_dont_know;
2338 }
2339 }
2340 else
2341 {
2342 if (dump_file && (dump_flags & TDF_DETAILS))
2343 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2344 *overlaps_a = conflict_fn_not_known ();
2345 *overlaps_b = conflict_fn_not_known ();
2346 *last_conflicts = chrec_dont_know;
2347 }
2348 }
2349 else
2350 {
2351 if (dump_file && (dump_flags & TDF_DETAILS))
2352 fprintf (dump_file, "affine-affine test failed: unimplemented.\n");
2353 *overlaps_a = conflict_fn_not_known ();
2354 *overlaps_b = conflict_fn_not_known ();
2355 *last_conflicts = chrec_dont_know;
2356 }
2357
2358 end_analyze_subs_aa:
2359 if (dump_file && (dump_flags & TDF_DETAILS))
2360 {
2361 fprintf (dump_file, " (overlaps_a = ");
2362 dump_conflict_function (dump_file, *overlaps_a);
2363 fprintf (dump_file, ")\n (overlaps_b = ");
2364 dump_conflict_function (dump_file, *overlaps_b);
2365 fprintf (dump_file, ")\n");
2366 fprintf (dump_file, ")\n");
2367 }
2368 }
2369
2370 /* Returns true when analyze_subscript_affine_affine can be used for
2371 determining the dependence relation between chrec_a and chrec_b,
2372 that contain symbols. This function modifies chrec_a and chrec_b
2373 such that the analysis result is the same, and such that they don't
2374 contain symbols, and then can safely be passed to the analyzer.
2375
2376 Example: The analysis of the following tuples of evolutions produce
2377 the same results: {x+1, +, 1}_1 vs. {x+3, +, 1}_1, and {-2, +, 1}_1
2378 vs. {0, +, 1}_1
2379
2380 {x+1, +, 1}_1 ({2, +, 1}_1) = {x+3, +, 1}_1 ({0, +, 1}_1)
2381 {-2, +, 1}_1 ({2, +, 1}_1) = {0, +, 1}_1 ({0, +, 1}_1)
2382 */
2383
2384 static bool
2385 can_use_analyze_subscript_affine_affine (tree *chrec_a, tree *chrec_b)
2386 {
2387 tree diff, type, left_a, left_b, right_b;
2388
2389 if (chrec_contains_symbols (CHREC_RIGHT (*chrec_a))
2390 || chrec_contains_symbols (CHREC_RIGHT (*chrec_b)))
2391 /* FIXME: For the moment not handled. Might be refined later. */
2392 return false;
2393
2394 type = chrec_type (*chrec_a);
2395 left_a = CHREC_LEFT (*chrec_a);
2396 left_b = chrec_convert (type, CHREC_LEFT (*chrec_b), NULL);
2397 diff = chrec_fold_minus (type, left_a, left_b);
2398
2399 if (!evolution_function_is_constant_p (diff))
2400 return false;
2401
2402 if (dump_file && (dump_flags & TDF_DETAILS))
2403 fprintf (dump_file, "can_use_subscript_aff_aff_for_symbolic \n");
2404
2405 *chrec_a = build_polynomial_chrec (CHREC_VARIABLE (*chrec_a),
2406 diff, CHREC_RIGHT (*chrec_a));
2407 right_b = chrec_convert (type, CHREC_RIGHT (*chrec_b), NULL);
2408 *chrec_b = build_polynomial_chrec (CHREC_VARIABLE (*chrec_b),
2409 build_int_cst (type, 0),
2410 right_b);
2411 return true;
2412 }
2413
2414 /* Analyze a SIV (Single Index Variable) subscript. *OVERLAPS_A and
2415 *OVERLAPS_B are initialized to the functions that describe the
2416 relation between the elements accessed twice by CHREC_A and
2417 CHREC_B. For k >= 0, the following property is verified:
2418
2419 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2420
2421 static void
2422 analyze_siv_subscript (tree chrec_a,
2423 tree chrec_b,
2424 conflict_function **overlaps_a,
2425 conflict_function **overlaps_b,
2426 tree *last_conflicts,
2427 int loop_nest_num)
2428 {
2429 dependence_stats.num_siv++;
2430
2431 if (dump_file && (dump_flags & TDF_DETAILS))
2432 fprintf (dump_file, "(analyze_siv_subscript \n");
2433
2434 if (evolution_function_is_constant_p (chrec_a)
2435 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2436 analyze_siv_subscript_cst_affine (chrec_a, chrec_b,
2437 overlaps_a, overlaps_b, last_conflicts);
2438
2439 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2440 && evolution_function_is_constant_p (chrec_b))
2441 analyze_siv_subscript_cst_affine (chrec_b, chrec_a,
2442 overlaps_b, overlaps_a, last_conflicts);
2443
2444 else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
2445 && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
2446 {
2447 if (!chrec_contains_symbols (chrec_a)
2448 && !chrec_contains_symbols (chrec_b))
2449 {
2450 analyze_subscript_affine_affine (chrec_a, chrec_b,
2451 overlaps_a, overlaps_b,
2452 last_conflicts);
2453
2454 if (CF_NOT_KNOWN_P (*overlaps_a)
2455 || CF_NOT_KNOWN_P (*overlaps_b))
2456 dependence_stats.num_siv_unimplemented++;
2457 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2458 || CF_NO_DEPENDENCE_P (*overlaps_b))
2459 dependence_stats.num_siv_independent++;
2460 else
2461 dependence_stats.num_siv_dependent++;
2462 }
2463 else if (can_use_analyze_subscript_affine_affine (&chrec_a,
2464 &chrec_b))
2465 {
2466 analyze_subscript_affine_affine (chrec_a, chrec_b,
2467 overlaps_a, overlaps_b,
2468 last_conflicts);
2469
2470 if (CF_NOT_KNOWN_P (*overlaps_a)
2471 || CF_NOT_KNOWN_P (*overlaps_b))
2472 dependence_stats.num_siv_unimplemented++;
2473 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2474 || CF_NO_DEPENDENCE_P (*overlaps_b))
2475 dependence_stats.num_siv_independent++;
2476 else
2477 dependence_stats.num_siv_dependent++;
2478 }
2479 else
2480 goto siv_subscript_dontknow;
2481 }
2482
2483 else
2484 {
2485 siv_subscript_dontknow:;
2486 if (dump_file && (dump_flags & TDF_DETAILS))
2487 fprintf (dump_file, "siv test failed: unimplemented.\n");
2488 *overlaps_a = conflict_fn_not_known ();
2489 *overlaps_b = conflict_fn_not_known ();
2490 *last_conflicts = chrec_dont_know;
2491 dependence_stats.num_siv_unimplemented++;
2492 }
2493
2494 if (dump_file && (dump_flags & TDF_DETAILS))
2495 fprintf (dump_file, ")\n");
2496 }
2497
2498 /* Returns false if we can prove that the greatest common divisor of the steps
2499 of CHREC does not divide CST, false otherwise. */
2500
2501 static bool
2502 gcd_of_steps_may_divide_p (const_tree chrec, const_tree cst)
2503 {
2504 HOST_WIDE_INT cd = 0, val;
2505 tree step;
2506
2507 if (!host_integerp (cst, 0))
2508 return true;
2509 val = tree_low_cst (cst, 0);
2510
2511 while (TREE_CODE (chrec) == POLYNOMIAL_CHREC)
2512 {
2513 step = CHREC_RIGHT (chrec);
2514 if (!host_integerp (step, 0))
2515 return true;
2516 cd = gcd (cd, tree_low_cst (step, 0));
2517 chrec = CHREC_LEFT (chrec);
2518 }
2519
2520 return val % cd == 0;
2521 }
2522
2523 /* Analyze a MIV (Multiple Index Variable) subscript with respect to
2524 LOOP_NEST. *OVERLAPS_A and *OVERLAPS_B are initialized to the
2525 functions that describe the relation between the elements accessed
2526 twice by CHREC_A and CHREC_B. For k >= 0, the following property
2527 is verified:
2528
2529 CHREC_A (*OVERLAPS_A (k)) = CHREC_B (*OVERLAPS_B (k)). */
2530
2531 static void
2532 analyze_miv_subscript (tree chrec_a,
2533 tree chrec_b,
2534 conflict_function **overlaps_a,
2535 conflict_function **overlaps_b,
2536 tree *last_conflicts,
2537 struct loop *loop_nest)
2538 {
2539 /* FIXME: This is a MIV subscript, not yet handled.
2540 Example: (A[{1, +, 1}_1] vs. A[{1, +, 1}_2]) that comes from
2541 (A[i] vs. A[j]).
2542
2543 In the SIV test we had to solve a Diophantine equation with two
2544 variables. In the MIV case we have to solve a Diophantine
2545 equation with 2*n variables (if the subscript uses n IVs).
2546 */
2547 tree type, difference;
2548
2549 dependence_stats.num_miv++;
2550 if (dump_file && (dump_flags & TDF_DETAILS))
2551 fprintf (dump_file, "(analyze_miv_subscript \n");
2552
2553 type = signed_type_for_types (TREE_TYPE (chrec_a), TREE_TYPE (chrec_b));
2554 chrec_a = chrec_convert (type, chrec_a, NULL);
2555 chrec_b = chrec_convert (type, chrec_b, NULL);
2556 difference = chrec_fold_minus (type, chrec_a, chrec_b);
2557
2558 if (eq_evolutions_p (chrec_a, chrec_b))
2559 {
2560 /* Access functions are the same: all the elements are accessed
2561 in the same order. */
2562 *overlaps_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2563 *overlaps_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2564 *last_conflicts = estimated_loop_iterations_tree
2565 (get_chrec_loop (chrec_a), true);
2566 dependence_stats.num_miv_dependent++;
2567 }
2568
2569 else if (evolution_function_is_constant_p (difference)
2570 /* For the moment, the following is verified:
2571 evolution_function_is_affine_multivariate_p (chrec_a,
2572 loop_nest->num) */
2573 && !gcd_of_steps_may_divide_p (chrec_a, difference))
2574 {
2575 /* testsuite/.../ssa-chrec-33.c
2576 {{21, +, 2}_1, +, -2}_2 vs. {{20, +, 2}_1, +, -2}_2
2577
2578 The difference is 1, and all the evolution steps are multiples
2579 of 2, consequently there are no overlapping elements. */
2580 *overlaps_a = conflict_fn_no_dependence ();
2581 *overlaps_b = conflict_fn_no_dependence ();
2582 *last_conflicts = integer_zero_node;
2583 dependence_stats.num_miv_independent++;
2584 }
2585
2586 else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
2587 && !chrec_contains_symbols (chrec_a)
2588 && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
2589 && !chrec_contains_symbols (chrec_b))
2590 {
2591 /* testsuite/.../ssa-chrec-35.c
2592 {0, +, 1}_2 vs. {0, +, 1}_3
2593 the overlapping elements are respectively located at iterations:
2594 {0, +, 1}_x and {0, +, 1}_x,
2595 in other words, we have the equality:
2596 {0, +, 1}_2 ({0, +, 1}_x) = {0, +, 1}_3 ({0, +, 1}_x)
2597
2598 Other examples:
2599 {{0, +, 1}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y) =
2600 {0, +, 1}_1 ({{0, +, 1}_x, +, 2}_y)
2601
2602 {{0, +, 2}_1, +, 3}_2 ({0, +, 1}_y, {0, +, 1}_x) =
2603 {{0, +, 3}_1, +, 2}_2 ({0, +, 1}_x, {0, +, 1}_y)
2604 */
2605 analyze_subscript_affine_affine (chrec_a, chrec_b,
2606 overlaps_a, overlaps_b, last_conflicts);
2607
2608 if (CF_NOT_KNOWN_P (*overlaps_a)
2609 || CF_NOT_KNOWN_P (*overlaps_b))
2610 dependence_stats.num_miv_unimplemented++;
2611 else if (CF_NO_DEPENDENCE_P (*overlaps_a)
2612 || CF_NO_DEPENDENCE_P (*overlaps_b))
2613 dependence_stats.num_miv_independent++;
2614 else
2615 dependence_stats.num_miv_dependent++;
2616 }
2617
2618 else
2619 {
2620 /* When the analysis is too difficult, answer "don't know". */
2621 if (dump_file && (dump_flags & TDF_DETAILS))
2622 fprintf (dump_file, "analyze_miv_subscript test failed: unimplemented.\n");
2623
2624 *overlaps_a = conflict_fn_not_known ();
2625 *overlaps_b = conflict_fn_not_known ();
2626 *last_conflicts = chrec_dont_know;
2627 dependence_stats.num_miv_unimplemented++;
2628 }
2629
2630 if (dump_file && (dump_flags & TDF_DETAILS))
2631 fprintf (dump_file, ")\n");
2632 }
2633
2634 /* Determines the iterations for which CHREC_A is equal to CHREC_B in
2635 with respect to LOOP_NEST. OVERLAP_ITERATIONS_A and
2636 OVERLAP_ITERATIONS_B are initialized with two functions that
2637 describe the iterations that contain conflicting elements.
2638
2639 Remark: For an integer k >= 0, the following equality is true:
2640
2641 CHREC_A (OVERLAP_ITERATIONS_A (k)) == CHREC_B (OVERLAP_ITERATIONS_B (k)).
2642 */
2643
2644 static void
2645 analyze_overlapping_iterations (tree chrec_a,
2646 tree chrec_b,
2647 conflict_function **overlap_iterations_a,
2648 conflict_function **overlap_iterations_b,
2649 tree *last_conflicts, struct loop *loop_nest)
2650 {
2651 unsigned int lnn = loop_nest->num;
2652
2653 dependence_stats.num_subscript_tests++;
2654
2655 if (dump_file && (dump_flags & TDF_DETAILS))
2656 {
2657 fprintf (dump_file, "(analyze_overlapping_iterations \n");
2658 fprintf (dump_file, " (chrec_a = ");
2659 print_generic_expr (dump_file, chrec_a, 0);
2660 fprintf (dump_file, ")\n (chrec_b = ");
2661 print_generic_expr (dump_file, chrec_b, 0);
2662 fprintf (dump_file, ")\n");
2663 }
2664
2665 if (chrec_a == NULL_TREE
2666 || chrec_b == NULL_TREE
2667 || chrec_contains_undetermined (chrec_a)
2668 || chrec_contains_undetermined (chrec_b))
2669 {
2670 dependence_stats.num_subscript_undetermined++;
2671
2672 *overlap_iterations_a = conflict_fn_not_known ();
2673 *overlap_iterations_b = conflict_fn_not_known ();
2674 }
2675
2676 /* If they are the same chrec, and are affine, they overlap
2677 on every iteration. */
2678 else if (eq_evolutions_p (chrec_a, chrec_b)
2679 && evolution_function_is_affine_multivariate_p (chrec_a, lnn))
2680 {
2681 dependence_stats.num_same_subscript_function++;
2682 *overlap_iterations_a = conflict_fn (1, affine_fn_cst (integer_zero_node));
2683 *overlap_iterations_b = conflict_fn (1, affine_fn_cst (integer_zero_node));
2684 *last_conflicts = chrec_dont_know;
2685 }
2686
2687 /* If they aren't the same, and aren't affine, we can't do anything
2688 yet. */
2689 else if ((chrec_contains_symbols (chrec_a)
2690 || chrec_contains_symbols (chrec_b))
2691 && (!evolution_function_is_affine_multivariate_p (chrec_a, lnn)
2692 || !evolution_function_is_affine_multivariate_p (chrec_b, lnn)))
2693 {
2694 dependence_stats.num_subscript_undetermined++;
2695 *overlap_iterations_a = conflict_fn_not_known ();
2696 *overlap_iterations_b = conflict_fn_not_known ();
2697 }
2698
2699 else if (ziv_subscript_p (chrec_a, chrec_b))
2700 analyze_ziv_subscript (chrec_a, chrec_b,
2701 overlap_iterations_a, overlap_iterations_b,
2702 last_conflicts);
2703
2704 else if (siv_subscript_p (chrec_a, chrec_b))
2705 analyze_siv_subscript (chrec_a, chrec_b,
2706 overlap_iterations_a, overlap_iterations_b,
2707 last_conflicts, lnn);
2708
2709 else
2710 analyze_miv_subscript (chrec_a, chrec_b,
2711 overlap_iterations_a, overlap_iterations_b,
2712 last_conflicts, loop_nest);
2713
2714 if (dump_file && (dump_flags & TDF_DETAILS))
2715 {
2716 fprintf (dump_file, " (overlap_iterations_a = ");
2717 dump_conflict_function (dump_file, *overlap_iterations_a);
2718 fprintf (dump_file, ")\n (overlap_iterations_b = ");
2719 dump_conflict_function (dump_file, *overlap_iterations_b);
2720 fprintf (dump_file, ")\n");
2721 fprintf (dump_file, ")\n");
2722 }
2723 }
2724
2725 /* Helper function for uniquely inserting distance vectors. */
2726
2727 static void
2728 save_dist_v (struct data_dependence_relation *ddr, lambda_vector dist_v)
2729 {
2730 unsigned i;
2731 lambda_vector v;
2732
2733 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, v); i++)
2734 if (lambda_vector_equal (v, dist_v, DDR_NB_LOOPS (ddr)))
2735 return;
2736
2737 VEC_safe_push (lambda_vector, heap, DDR_DIST_VECTS (ddr), dist_v);
2738 }
2739
2740 /* Helper function for uniquely inserting direction vectors. */
2741
2742 static void
2743 save_dir_v (struct data_dependence_relation *ddr, lambda_vector dir_v)
2744 {
2745 unsigned i;
2746 lambda_vector v;
2747
2748 for (i = 0; VEC_iterate (lambda_vector, DDR_DIR_VECTS (ddr), i, v); i++)
2749 if (lambda_vector_equal (v, dir_v, DDR_NB_LOOPS (ddr)))
2750 return;
2751
2752 VEC_safe_push (lambda_vector, heap, DDR_DIR_VECTS (ddr), dir_v);
2753 }
2754
2755 /* Add a distance of 1 on all the loops outer than INDEX. If we
2756 haven't yet determined a distance for this outer loop, push a new
2757 distance vector composed of the previous distance, and a distance
2758 of 1 for this outer loop. Example:
2759
2760 | loop_1
2761 | loop_2
2762 | A[10]
2763 | endloop_2
2764 | endloop_1
2765
2766 Saved vectors are of the form (dist_in_1, dist_in_2). First, we
2767 save (0, 1), then we have to save (1, 0). */
2768
2769 static void
2770 add_outer_distances (struct data_dependence_relation *ddr,
2771 lambda_vector dist_v, int index)
2772 {
2773 /* For each outer loop where init_v is not set, the accesses are
2774 in dependence of distance 1 in the loop. */
2775 while (--index >= 0)
2776 {
2777 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2778 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
2779 save_v[index] = 1;
2780 save_dist_v (ddr, save_v);
2781 }
2782 }
2783
2784 /* Return false when fail to represent the data dependence as a
2785 distance vector. INIT_B is set to true when a component has been
2786 added to the distance vector DIST_V. INDEX_CARRY is then set to
2787 the index in DIST_V that carries the dependence. */
2788
2789 static bool
2790 build_classic_dist_vector_1 (struct data_dependence_relation *ddr,
2791 struct data_reference *ddr_a,
2792 struct data_reference *ddr_b,
2793 lambda_vector dist_v, bool *init_b,
2794 int *index_carry)
2795 {
2796 unsigned i;
2797 lambda_vector init_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2798
2799 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2800 {
2801 tree access_fn_a, access_fn_b;
2802 struct subscript *subscript = DDR_SUBSCRIPT (ddr, i);
2803
2804 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2805 {
2806 non_affine_dependence_relation (ddr);
2807 return false;
2808 }
2809
2810 access_fn_a = DR_ACCESS_FN (ddr_a, i);
2811 access_fn_b = DR_ACCESS_FN (ddr_b, i);
2812
2813 if (TREE_CODE (access_fn_a) == POLYNOMIAL_CHREC
2814 && TREE_CODE (access_fn_b) == POLYNOMIAL_CHREC)
2815 {
2816 int dist, index;
2817 int index_a = index_in_loop_nest (CHREC_VARIABLE (access_fn_a),
2818 DDR_LOOP_NEST (ddr));
2819 int index_b = index_in_loop_nest (CHREC_VARIABLE (access_fn_b),
2820 DDR_LOOP_NEST (ddr));
2821
2822 /* The dependence is carried by the outermost loop. Example:
2823 | loop_1
2824 | A[{4, +, 1}_1]
2825 | loop_2
2826 | A[{5, +, 1}_2]
2827 | endloop_2
2828 | endloop_1
2829 In this case, the dependence is carried by loop_1. */
2830 index = index_a < index_b ? index_a : index_b;
2831 *index_carry = MIN (index, *index_carry);
2832
2833 if (chrec_contains_undetermined (SUB_DISTANCE (subscript)))
2834 {
2835 non_affine_dependence_relation (ddr);
2836 return false;
2837 }
2838
2839 dist = int_cst_value (SUB_DISTANCE (subscript));
2840
2841 /* This is the subscript coupling test. If we have already
2842 recorded a distance for this loop (a distance coming from
2843 another subscript), it should be the same. For example,
2844 in the following code, there is no dependence:
2845
2846 | loop i = 0, N, 1
2847 | T[i+1][i] = ...
2848 | ... = T[i][i]
2849 | endloop
2850 */
2851 if (init_v[index] != 0 && dist_v[index] != dist)
2852 {
2853 finalize_ddr_dependent (ddr, chrec_known);
2854 return false;
2855 }
2856
2857 dist_v[index] = dist;
2858 init_v[index] = 1;
2859 *init_b = true;
2860 }
2861 else if (!operand_equal_p (access_fn_a, access_fn_b, 0))
2862 {
2863 /* This can be for example an affine vs. constant dependence
2864 (T[i] vs. T[3]) that is not an affine dependence and is
2865 not representable as a distance vector. */
2866 non_affine_dependence_relation (ddr);
2867 return false;
2868 }
2869 }
2870
2871 return true;
2872 }
2873
2874 /* Return true when the DDR contains only constant access functions. */
2875
2876 static bool
2877 constant_access_functions (const struct data_dependence_relation *ddr)
2878 {
2879 unsigned i;
2880
2881 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2882 if (!evolution_function_is_constant_p (DR_ACCESS_FN (DDR_A (ddr), i))
2883 || !evolution_function_is_constant_p (DR_ACCESS_FN (DDR_B (ddr), i)))
2884 return false;
2885
2886 return true;
2887 }
2888
2889 /* Helper function for the case where DDR_A and DDR_B are the same
2890 multivariate access function with a constant step. For an example
2891 see pr34635-1.c. */
2892
2893 static void
2894 add_multivariate_self_dist (struct data_dependence_relation *ddr, tree c_2)
2895 {
2896 int x_1, x_2;
2897 tree c_1 = CHREC_LEFT (c_2);
2898 tree c_0 = CHREC_LEFT (c_1);
2899 lambda_vector dist_v;
2900 int v1, v2, cd;
2901
2902 /* Polynomials with more than 2 variables are not handled yet. When
2903 the evolution steps are parameters, it is not possible to
2904 represent the dependence using classical distance vectors. */
2905 if (TREE_CODE (c_0) != INTEGER_CST
2906 || TREE_CODE (CHREC_RIGHT (c_1)) != INTEGER_CST
2907 || TREE_CODE (CHREC_RIGHT (c_2)) != INTEGER_CST)
2908 {
2909 DDR_AFFINE_P (ddr) = false;
2910 return;
2911 }
2912
2913 x_2 = index_in_loop_nest (CHREC_VARIABLE (c_2), DDR_LOOP_NEST (ddr));
2914 x_1 = index_in_loop_nest (CHREC_VARIABLE (c_1), DDR_LOOP_NEST (ddr));
2915
2916 /* For "{{0, +, 2}_1, +, 3}_2" the distance vector is (3, -2). */
2917 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2918 v1 = int_cst_value (CHREC_RIGHT (c_1));
2919 v2 = int_cst_value (CHREC_RIGHT (c_2));
2920 cd = gcd (v1, v2);
2921 v1 /= cd;
2922 v2 /= cd;
2923
2924 if (v2 < 0)
2925 {
2926 v2 = -v2;
2927 v1 = -v1;
2928 }
2929
2930 dist_v[x_1] = v2;
2931 dist_v[x_2] = -v1;
2932 save_dist_v (ddr, dist_v);
2933
2934 add_outer_distances (ddr, dist_v, x_1);
2935 }
2936
2937 /* Helper function for the case where DDR_A and DDR_B are the same
2938 access functions. */
2939
2940 static void
2941 add_other_self_distances (struct data_dependence_relation *ddr)
2942 {
2943 lambda_vector dist_v;
2944 unsigned i;
2945 int index_carry = DDR_NB_LOOPS (ddr);
2946
2947 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
2948 {
2949 tree access_fun = DR_ACCESS_FN (DDR_A (ddr), i);
2950
2951 if (TREE_CODE (access_fun) == POLYNOMIAL_CHREC)
2952 {
2953 if (!evolution_function_is_univariate_p (access_fun))
2954 {
2955 if (DDR_NUM_SUBSCRIPTS (ddr) != 1)
2956 {
2957 DDR_ARE_DEPENDENT (ddr) = chrec_dont_know;
2958 return;
2959 }
2960
2961 access_fun = DR_ACCESS_FN (DDR_A (ddr), 0);
2962
2963 if (TREE_CODE (CHREC_LEFT (access_fun)) == POLYNOMIAL_CHREC)
2964 add_multivariate_self_dist (ddr, access_fun);
2965 else
2966 /* The evolution step is not constant: it varies in
2967 the outer loop, so this cannot be represented by a
2968 distance vector. For example in pr34635.c the
2969 evolution is {0, +, {0, +, 4}_1}_2. */
2970 DDR_AFFINE_P (ddr) = false;
2971
2972 return;
2973 }
2974
2975 index_carry = MIN (index_carry,
2976 index_in_loop_nest (CHREC_VARIABLE (access_fun),
2977 DDR_LOOP_NEST (ddr)));
2978 }
2979 }
2980
2981 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2982 add_outer_distances (ddr, dist_v, index_carry);
2983 }
2984
2985 static void
2986 insert_innermost_unit_dist_vector (struct data_dependence_relation *ddr)
2987 {
2988 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
2989
2990 dist_v[DDR_INNER_LOOP (ddr)] = 1;
2991 save_dist_v (ddr, dist_v);
2992 }
2993
2994 /* Adds a unit distance vector to DDR when there is a 0 overlap. This
2995 is the case for example when access functions are the same and
2996 equal to a constant, as in:
2997
2998 | loop_1
2999 | A[3] = ...
3000 | ... = A[3]
3001 | endloop_1
3002
3003 in which case the distance vectors are (0) and (1). */
3004
3005 static void
3006 add_distance_for_zero_overlaps (struct data_dependence_relation *ddr)
3007 {
3008 unsigned i, j;
3009
3010 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3011 {
3012 subscript_p sub = DDR_SUBSCRIPT (ddr, i);
3013 conflict_function *ca = SUB_CONFLICTS_IN_A (sub);
3014 conflict_function *cb = SUB_CONFLICTS_IN_B (sub);
3015
3016 for (j = 0; j < ca->n; j++)
3017 if (affine_function_zero_p (ca->fns[j]))
3018 {
3019 insert_innermost_unit_dist_vector (ddr);
3020 return;
3021 }
3022
3023 for (j = 0; j < cb->n; j++)
3024 if (affine_function_zero_p (cb->fns[j]))
3025 {
3026 insert_innermost_unit_dist_vector (ddr);
3027 return;
3028 }
3029 }
3030 }
3031
3032 /* Compute the classic per loop distance vector. DDR is the data
3033 dependence relation to build a vector from. Return false when fail
3034 to represent the data dependence as a distance vector. */
3035
3036 static bool
3037 build_classic_dist_vector (struct data_dependence_relation *ddr,
3038 struct loop *loop_nest)
3039 {
3040 bool init_b = false;
3041 int index_carry = DDR_NB_LOOPS (ddr);
3042 lambda_vector dist_v;
3043
3044 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3045 return false;
3046
3047 if (same_access_functions (ddr))
3048 {
3049 /* Save the 0 vector. */
3050 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3051 save_dist_v (ddr, dist_v);
3052
3053 if (constant_access_functions (ddr))
3054 add_distance_for_zero_overlaps (ddr);
3055
3056 if (DDR_NB_LOOPS (ddr) > 1)
3057 add_other_self_distances (ddr);
3058
3059 return true;
3060 }
3061
3062 dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3063 if (!build_classic_dist_vector_1 (ddr, DDR_A (ddr), DDR_B (ddr),
3064 dist_v, &init_b, &index_carry))
3065 return false;
3066
3067 /* Save the distance vector if we initialized one. */
3068 if (init_b)
3069 {
3070 /* Verify a basic constraint: classic distance vectors should
3071 always be lexicographically positive.
3072
3073 Data references are collected in the order of execution of
3074 the program, thus for the following loop
3075
3076 | for (i = 1; i < 100; i++)
3077 | for (j = 1; j < 100; j++)
3078 | {
3079 | t = T[j+1][i-1]; // A
3080 | T[j][i] = t + 2; // B
3081 | }
3082
3083 references are collected following the direction of the wind:
3084 A then B. The data dependence tests are performed also
3085 following this order, such that we're looking at the distance
3086 separating the elements accessed by A from the elements later
3087 accessed by B. But in this example, the distance returned by
3088 test_dep (A, B) is lexicographically negative (-1, 1), that
3089 means that the access A occurs later than B with respect to
3090 the outer loop, ie. we're actually looking upwind. In this
3091 case we solve test_dep (B, A) looking downwind to the
3092 lexicographically positive solution, that returns the
3093 distance vector (1, -1). */
3094 if (!lambda_vector_lexico_pos (dist_v, DDR_NB_LOOPS (ddr)))
3095 {
3096 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3097 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3098 loop_nest))
3099 return false;
3100 compute_subscript_distance (ddr);
3101 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3102 save_v, &init_b, &index_carry))
3103 return false;
3104 save_dist_v (ddr, save_v);
3105 DDR_REVERSED_P (ddr) = true;
3106
3107 /* In this case there is a dependence forward for all the
3108 outer loops:
3109
3110 | for (k = 1; k < 100; k++)
3111 | for (i = 1; i < 100; i++)
3112 | for (j = 1; j < 100; j++)
3113 | {
3114 | t = T[j+1][i-1]; // A
3115 | T[j][i] = t + 2; // B
3116 | }
3117
3118 the vectors are:
3119 (0, 1, -1)
3120 (1, 1, -1)
3121 (1, -1, 1)
3122 */
3123 if (DDR_NB_LOOPS (ddr) > 1)
3124 {
3125 add_outer_distances (ddr, save_v, index_carry);
3126 add_outer_distances (ddr, dist_v, index_carry);
3127 }
3128 }
3129 else
3130 {
3131 lambda_vector save_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3132 lambda_vector_copy (dist_v, save_v, DDR_NB_LOOPS (ddr));
3133
3134 if (DDR_NB_LOOPS (ddr) > 1)
3135 {
3136 lambda_vector opposite_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3137
3138 if (!subscript_dependence_tester_1 (ddr, DDR_B (ddr),
3139 DDR_A (ddr), loop_nest))
3140 return false;
3141 compute_subscript_distance (ddr);
3142 if (!build_classic_dist_vector_1 (ddr, DDR_B (ddr), DDR_A (ddr),
3143 opposite_v, &init_b,
3144 &index_carry))
3145 return false;
3146
3147 save_dist_v (ddr, save_v);
3148 add_outer_distances (ddr, dist_v, index_carry);
3149 add_outer_distances (ddr, opposite_v, index_carry);
3150 }
3151 else
3152 save_dist_v (ddr, save_v);
3153 }
3154 }
3155 else
3156 {
3157 /* There is a distance of 1 on all the outer loops: Example:
3158 there is a dependence of distance 1 on loop_1 for the array A.
3159
3160 | loop_1
3161 | A[5] = ...
3162 | endloop
3163 */
3164 add_outer_distances (ddr, dist_v,
3165 lambda_vector_first_nz (dist_v,
3166 DDR_NB_LOOPS (ddr), 0));
3167 }
3168
3169 if (dump_file && (dump_flags & TDF_DETAILS))
3170 {
3171 unsigned i;
3172
3173 fprintf (dump_file, "(build_classic_dist_vector\n");
3174 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3175 {
3176 fprintf (dump_file, " dist_vector = (");
3177 print_lambda_vector (dump_file, DDR_DIST_VECT (ddr, i),
3178 DDR_NB_LOOPS (ddr));
3179 fprintf (dump_file, " )\n");
3180 }
3181 fprintf (dump_file, ")\n");
3182 }
3183
3184 return true;
3185 }
3186
3187 /* Return the direction for a given distance.
3188 FIXME: Computing dir this way is suboptimal, since dir can catch
3189 cases that dist is unable to represent. */
3190
3191 static inline enum data_dependence_direction
3192 dir_from_dist (int dist)
3193 {
3194 if (dist > 0)
3195 return dir_positive;
3196 else if (dist < 0)
3197 return dir_negative;
3198 else
3199 return dir_equal;
3200 }
3201
3202 /* Compute the classic per loop direction vector. DDR is the data
3203 dependence relation to build a vector from. */
3204
3205 static void
3206 build_classic_dir_vector (struct data_dependence_relation *ddr)
3207 {
3208 unsigned i, j;
3209 lambda_vector dist_v;
3210
3211 for (i = 0; VEC_iterate (lambda_vector, DDR_DIST_VECTS (ddr), i, dist_v); i++)
3212 {
3213 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3214
3215 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3216 dir_v[j] = dir_from_dist (dist_v[j]);
3217
3218 save_dir_v (ddr, dir_v);
3219 }
3220 }
3221
3222 /* Helper function. Returns true when there is a dependence between
3223 data references DRA and DRB. */
3224
3225 static bool
3226 subscript_dependence_tester_1 (struct data_dependence_relation *ddr,
3227 struct data_reference *dra,
3228 struct data_reference *drb,
3229 struct loop *loop_nest)
3230 {
3231 unsigned int i;
3232 tree last_conflicts;
3233 struct subscript *subscript;
3234
3235 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3236 i++)
3237 {
3238 conflict_function *overlaps_a, *overlaps_b;
3239
3240 analyze_overlapping_iterations (DR_ACCESS_FN (dra, i),
3241 DR_ACCESS_FN (drb, i),
3242 &overlaps_a, &overlaps_b,
3243 &last_conflicts, loop_nest);
3244
3245 if (CF_NOT_KNOWN_P (overlaps_a)
3246 || CF_NOT_KNOWN_P (overlaps_b))
3247 {
3248 finalize_ddr_dependent (ddr, chrec_dont_know);
3249 dependence_stats.num_dependence_undetermined++;
3250 free_conflict_function (overlaps_a);
3251 free_conflict_function (overlaps_b);
3252 return false;
3253 }
3254
3255 else if (CF_NO_DEPENDENCE_P (overlaps_a)
3256 || CF_NO_DEPENDENCE_P (overlaps_b))
3257 {
3258 finalize_ddr_dependent (ddr, chrec_known);
3259 dependence_stats.num_dependence_independent++;
3260 free_conflict_function (overlaps_a);
3261 free_conflict_function (overlaps_b);
3262 return false;
3263 }
3264
3265 else
3266 {
3267 if (SUB_CONFLICTS_IN_A (subscript))
3268 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3269 if (SUB_CONFLICTS_IN_B (subscript))
3270 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3271
3272 SUB_CONFLICTS_IN_A (subscript) = overlaps_a;
3273 SUB_CONFLICTS_IN_B (subscript) = overlaps_b;
3274 SUB_LAST_CONFLICT (subscript) = last_conflicts;
3275 }
3276 }
3277
3278 return true;
3279 }
3280
3281 /* Computes the conflicting iterations in LOOP_NEST, and initialize DDR. */
3282
3283 static void
3284 subscript_dependence_tester (struct data_dependence_relation *ddr,
3285 struct loop *loop_nest)
3286 {
3287
3288 if (dump_file && (dump_flags & TDF_DETAILS))
3289 fprintf (dump_file, "(subscript_dependence_tester \n");
3290
3291 if (subscript_dependence_tester_1 (ddr, DDR_A (ddr), DDR_B (ddr), loop_nest))
3292 dependence_stats.num_dependence_dependent++;
3293
3294 compute_subscript_distance (ddr);
3295 if (build_classic_dist_vector (ddr, loop_nest))
3296 build_classic_dir_vector (ddr);
3297
3298 if (dump_file && (dump_flags & TDF_DETAILS))
3299 fprintf (dump_file, ")\n");
3300 }
3301
3302 /* Returns true when all the access functions of A are affine or
3303 constant with respect to LOOP_NEST. */
3304
3305 static bool
3306 access_functions_are_affine_or_constant_p (const struct data_reference *a,
3307 const struct loop *loop_nest)
3308 {
3309 unsigned int i;
3310 VEC(tree,heap) *fns = DR_ACCESS_FNS (a);
3311 tree t;
3312
3313 for (i = 0; VEC_iterate (tree, fns, i, t); i++)
3314 if (!evolution_function_is_invariant_p (t, loop_nest->num)
3315 && !evolution_function_is_affine_multivariate_p (t, loop_nest->num))
3316 return false;
3317
3318 return true;
3319 }
3320
3321 /* Return true if we can create an affine data-ref for OP in STMT. */
3322
3323 bool
3324 stmt_simple_memref_p (struct loop *loop, gimple stmt, tree op)
3325 {
3326 data_reference_p dr;
3327 bool res = true;
3328
3329 dr = create_data_ref (loop, op, stmt, true);
3330 if (!access_functions_are_affine_or_constant_p (dr, loop))
3331 res = false;
3332
3333 free_data_ref (dr);
3334 return res;
3335 }
3336
3337 /* Initializes an equation for an OMEGA problem using the information
3338 contained in the ACCESS_FUN. Returns true when the operation
3339 succeeded.
3340
3341 PB is the omega constraint system.
3342 EQ is the number of the equation to be initialized.
3343 OFFSET is used for shifting the variables names in the constraints:
3344 a constrain is composed of 2 * the number of variables surrounding
3345 dependence accesses. OFFSET is set either to 0 for the first n variables,
3346 then it is set to n.
3347 ACCESS_FUN is expected to be an affine chrec. */
3348
3349 static bool
3350 init_omega_eq_with_af (omega_pb pb, unsigned eq,
3351 unsigned int offset, tree access_fun,
3352 struct data_dependence_relation *ddr)
3353 {
3354 switch (TREE_CODE (access_fun))
3355 {
3356 case POLYNOMIAL_CHREC:
3357 {
3358 tree left = CHREC_LEFT (access_fun);
3359 tree right = CHREC_RIGHT (access_fun);
3360 int var = CHREC_VARIABLE (access_fun);
3361 unsigned var_idx;
3362
3363 if (TREE_CODE (right) != INTEGER_CST)
3364 return false;
3365
3366 var_idx = index_in_loop_nest (var, DDR_LOOP_NEST (ddr));
3367 pb->eqs[eq].coef[offset + var_idx + 1] = int_cst_value (right);
3368
3369 /* Compute the innermost loop index. */
3370 DDR_INNER_LOOP (ddr) = MAX (DDR_INNER_LOOP (ddr), var_idx);
3371
3372 if (offset == 0)
3373 pb->eqs[eq].coef[var_idx + DDR_NB_LOOPS (ddr) + 1]
3374 += int_cst_value (right);
3375
3376 switch (TREE_CODE (left))
3377 {
3378 case POLYNOMIAL_CHREC:
3379 return init_omega_eq_with_af (pb, eq, offset, left, ddr);
3380
3381 case INTEGER_CST:
3382 pb->eqs[eq].coef[0] += int_cst_value (left);
3383 return true;
3384
3385 default:
3386 return false;
3387 }
3388 }
3389
3390 case INTEGER_CST:
3391 pb->eqs[eq].coef[0] += int_cst_value (access_fun);
3392 return true;
3393
3394 default:
3395 return false;
3396 }
3397 }
3398
3399 /* As explained in the comments preceding init_omega_for_ddr, we have
3400 to set up a system for each loop level, setting outer loops
3401 variation to zero, and current loop variation to positive or zero.
3402 Save each lexico positive distance vector. */
3403
3404 static void
3405 omega_extract_distance_vectors (omega_pb pb,
3406 struct data_dependence_relation *ddr)
3407 {
3408 int eq, geq;
3409 unsigned i, j;
3410 struct loop *loopi, *loopj;
3411 enum omega_result res;
3412
3413 /* Set a new problem for each loop in the nest. The basis is the
3414 problem that we have initialized until now. On top of this we
3415 add new constraints. */
3416 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3417 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3418 {
3419 int dist = 0;
3420 omega_pb copy = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr),
3421 DDR_NB_LOOPS (ddr));
3422
3423 omega_copy_problem (copy, pb);
3424
3425 /* For all the outer loops "loop_j", add "dj = 0". */
3426 for (j = 0;
3427 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3428 {
3429 eq = omega_add_zero_eq (copy, omega_black);
3430 copy->eqs[eq].coef[j + 1] = 1;
3431 }
3432
3433 /* For "loop_i", add "0 <= di". */
3434 geq = omega_add_zero_geq (copy, omega_black);
3435 copy->geqs[geq].coef[i + 1] = 1;
3436
3437 /* Reduce the constraint system, and test that the current
3438 problem is feasible. */
3439 res = omega_simplify_problem (copy);
3440 if (res == omega_false
3441 || res == omega_unknown
3442 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3443 goto next_problem;
3444
3445 for (eq = 0; eq < copy->num_subs; eq++)
3446 if (copy->subs[eq].key == (int) i + 1)
3447 {
3448 dist = copy->subs[eq].coef[0];
3449 goto found_dist;
3450 }
3451
3452 if (dist == 0)
3453 {
3454 /* Reinitialize problem... */
3455 omega_copy_problem (copy, pb);
3456 for (j = 0;
3457 j < i && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), j, loopj); j++)
3458 {
3459 eq = omega_add_zero_eq (copy, omega_black);
3460 copy->eqs[eq].coef[j + 1] = 1;
3461 }
3462
3463 /* ..., but this time "di = 1". */
3464 eq = omega_add_zero_eq (copy, omega_black);
3465 copy->eqs[eq].coef[i + 1] = 1;
3466 copy->eqs[eq].coef[0] = -1;
3467
3468 res = omega_simplify_problem (copy);
3469 if (res == omega_false
3470 || res == omega_unknown
3471 || copy->num_geqs > (int) DDR_NB_LOOPS (ddr))
3472 goto next_problem;
3473
3474 for (eq = 0; eq < copy->num_subs; eq++)
3475 if (copy->subs[eq].key == (int) i + 1)
3476 {
3477 dist = copy->subs[eq].coef[0];
3478 goto found_dist;
3479 }
3480 }
3481
3482 found_dist:;
3483 /* Save the lexicographically positive distance vector. */
3484 if (dist >= 0)
3485 {
3486 lambda_vector dist_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3487 lambda_vector dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3488
3489 dist_v[i] = dist;
3490
3491 for (eq = 0; eq < copy->num_subs; eq++)
3492 if (copy->subs[eq].key > 0)
3493 {
3494 dist = copy->subs[eq].coef[0];
3495 dist_v[copy->subs[eq].key - 1] = dist;
3496 }
3497
3498 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3499 dir_v[j] = dir_from_dist (dist_v[j]);
3500
3501 save_dist_v (ddr, dist_v);
3502 save_dir_v (ddr, dir_v);
3503 }
3504
3505 next_problem:;
3506 omega_free_problem (copy);
3507 }
3508 }
3509
3510 /* This is called for each subscript of a tuple of data references:
3511 insert an equality for representing the conflicts. */
3512
3513 static bool
3514 omega_setup_subscript (tree access_fun_a, tree access_fun_b,
3515 struct data_dependence_relation *ddr,
3516 omega_pb pb, bool *maybe_dependent)
3517 {
3518 int eq;
3519 tree type = signed_type_for_types (TREE_TYPE (access_fun_a),
3520 TREE_TYPE (access_fun_b));
3521 tree fun_a = chrec_convert (type, access_fun_a, NULL);
3522 tree fun_b = chrec_convert (type, access_fun_b, NULL);
3523 tree difference = chrec_fold_minus (type, fun_a, fun_b);
3524
3525 /* When the fun_a - fun_b is not constant, the dependence is not
3526 captured by the classic distance vector representation. */
3527 if (TREE_CODE (difference) != INTEGER_CST)
3528 return false;
3529
3530 /* ZIV test. */
3531 if (ziv_subscript_p (fun_a, fun_b) && !integer_zerop (difference))
3532 {
3533 /* There is no dependence. */
3534 *maybe_dependent = false;
3535 return true;
3536 }
3537
3538 fun_b = chrec_fold_multiply (type, fun_b, integer_minus_one_node);
3539
3540 eq = omega_add_zero_eq (pb, omega_black);
3541 if (!init_omega_eq_with_af (pb, eq, DDR_NB_LOOPS (ddr), fun_a, ddr)
3542 || !init_omega_eq_with_af (pb, eq, 0, fun_b, ddr))
3543 /* There is probably a dependence, but the system of
3544 constraints cannot be built: answer "don't know". */
3545 return false;
3546
3547 /* GCD test. */
3548 if (DDR_NB_LOOPS (ddr) != 0 && pb->eqs[eq].coef[0]
3549 && !int_divides_p (lambda_vector_gcd
3550 ((lambda_vector) &(pb->eqs[eq].coef[1]),
3551 2 * DDR_NB_LOOPS (ddr)),
3552 pb->eqs[eq].coef[0]))
3553 {
3554 /* There is no dependence. */
3555 *maybe_dependent = false;
3556 return true;
3557 }
3558
3559 return true;
3560 }
3561
3562 /* Helper function, same as init_omega_for_ddr but specialized for
3563 data references A and B. */
3564
3565 static bool
3566 init_omega_for_ddr_1 (struct data_reference *dra, struct data_reference *drb,
3567 struct data_dependence_relation *ddr,
3568 omega_pb pb, bool *maybe_dependent)
3569 {
3570 unsigned i;
3571 int ineq;
3572 struct loop *loopi;
3573 unsigned nb_loops = DDR_NB_LOOPS (ddr);
3574
3575 /* Insert an equality per subscript. */
3576 for (i = 0; i < DDR_NUM_SUBSCRIPTS (ddr); i++)
3577 {
3578 if (!omega_setup_subscript (DR_ACCESS_FN (dra, i), DR_ACCESS_FN (drb, i),
3579 ddr, pb, maybe_dependent))
3580 return false;
3581 else if (*maybe_dependent == false)
3582 {
3583 /* There is no dependence. */
3584 DDR_ARE_DEPENDENT (ddr) = chrec_known;
3585 return true;
3586 }
3587 }
3588
3589 /* Insert inequalities: constraints corresponding to the iteration
3590 domain, i.e. the loops surrounding the references "loop_x" and
3591 the distance variables "dx". The layout of the OMEGA
3592 representation is as follows:
3593 - coef[0] is the constant
3594 - coef[1..nb_loops] are the protected variables that will not be
3595 removed by the solver: the "dx"
3596 - coef[nb_loops + 1, 2*nb_loops] are the loop variables: "loop_x".
3597 */
3598 for (i = 0; i <= DDR_INNER_LOOP (ddr)
3599 && VEC_iterate (loop_p, DDR_LOOP_NEST (ddr), i, loopi); i++)
3600 {
3601 HOST_WIDE_INT nbi = estimated_loop_iterations_int (loopi, false);
3602
3603 /* 0 <= loop_x */
3604 ineq = omega_add_zero_geq (pb, omega_black);
3605 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3606
3607 /* 0 <= loop_x + dx */
3608 ineq = omega_add_zero_geq (pb, omega_black);
3609 pb->geqs[ineq].coef[i + nb_loops + 1] = 1;
3610 pb->geqs[ineq].coef[i + 1] = 1;
3611
3612 if (nbi != -1)
3613 {
3614 /* loop_x <= nb_iters */
3615 ineq = omega_add_zero_geq (pb, omega_black);
3616 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3617 pb->geqs[ineq].coef[0] = nbi;
3618
3619 /* loop_x + dx <= nb_iters */
3620 ineq = omega_add_zero_geq (pb, omega_black);
3621 pb->geqs[ineq].coef[i + nb_loops + 1] = -1;
3622 pb->geqs[ineq].coef[i + 1] = -1;
3623 pb->geqs[ineq].coef[0] = nbi;
3624
3625 /* A step "dx" bigger than nb_iters is not feasible, so
3626 add "0 <= nb_iters + dx", */
3627 ineq = omega_add_zero_geq (pb, omega_black);
3628 pb->geqs[ineq].coef[i + 1] = 1;
3629 pb->geqs[ineq].coef[0] = nbi;
3630 /* and "dx <= nb_iters". */
3631 ineq = omega_add_zero_geq (pb, omega_black);
3632 pb->geqs[ineq].coef[i + 1] = -1;
3633 pb->geqs[ineq].coef[0] = nbi;
3634 }
3635 }
3636
3637 omega_extract_distance_vectors (pb, ddr);
3638
3639 return true;
3640 }
3641
3642 /* Sets up the Omega dependence problem for the data dependence
3643 relation DDR. Returns false when the constraint system cannot be
3644 built, ie. when the test answers "don't know". Returns true
3645 otherwise, and when independence has been proved (using one of the
3646 trivial dependence test), set MAYBE_DEPENDENT to false, otherwise
3647 set MAYBE_DEPENDENT to true.
3648
3649 Example: for setting up the dependence system corresponding to the
3650 conflicting accesses
3651
3652 | loop_i
3653 | loop_j
3654 | A[i, i+1] = ...
3655 | ... A[2*j, 2*(i + j)]
3656 | endloop_j
3657 | endloop_i
3658
3659 the following constraints come from the iteration domain:
3660
3661 0 <= i <= Ni
3662 0 <= i + di <= Ni
3663 0 <= j <= Nj
3664 0 <= j + dj <= Nj
3665
3666 where di, dj are the distance variables. The constraints
3667 representing the conflicting elements are:
3668
3669 i = 2 * (j + dj)
3670 i + 1 = 2 * (i + di + j + dj)
3671
3672 For asking that the resulting distance vector (di, dj) be
3673 lexicographically positive, we insert the constraint "di >= 0". If
3674 "di = 0" in the solution, we fix that component to zero, and we
3675 look at the inner loops: we set a new problem where all the outer
3676 loop distances are zero, and fix this inner component to be
3677 positive. When one of the components is positive, we save that
3678 distance, and set a new problem where the distance on this loop is
3679 zero, searching for other distances in the inner loops. Here is
3680 the classic example that illustrates that we have to set for each
3681 inner loop a new problem:
3682
3683 | loop_1
3684 | loop_2
3685 | A[10]
3686 | endloop_2
3687 | endloop_1
3688
3689 we have to save two distances (1, 0) and (0, 1).
3690
3691 Given two array references, refA and refB, we have to set the
3692 dependence problem twice, refA vs. refB and refB vs. refA, and we
3693 cannot do a single test, as refB might occur before refA in the
3694 inner loops, and the contrary when considering outer loops: ex.
3695
3696 | loop_0
3697 | loop_1
3698 | loop_2
3699 | T[{1,+,1}_2][{1,+,1}_1] // refA
3700 | T[{2,+,1}_2][{0,+,1}_1] // refB
3701 | endloop_2
3702 | endloop_1
3703 | endloop_0
3704
3705 refB touches the elements in T before refA, and thus for the same
3706 loop_0 refB precedes refA: ie. the distance vector (0, 1, -1)
3707 but for successive loop_0 iterations, we have (1, -1, 1)
3708
3709 The Omega solver expects the distance variables ("di" in the
3710 previous example) to come first in the constraint system (as
3711 variables to be protected, or "safe" variables), the constraint
3712 system is built using the following layout:
3713
3714 "cst | distance vars | index vars".
3715 */
3716
3717 static bool
3718 init_omega_for_ddr (struct data_dependence_relation *ddr,
3719 bool *maybe_dependent)
3720 {
3721 omega_pb pb;
3722 bool res = false;
3723
3724 *maybe_dependent = true;
3725
3726 if (same_access_functions (ddr))
3727 {
3728 unsigned j;
3729 lambda_vector dir_v;
3730
3731 /* Save the 0 vector. */
3732 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3733 dir_v = lambda_vector_new (DDR_NB_LOOPS (ddr));
3734 for (j = 0; j < DDR_NB_LOOPS (ddr); j++)
3735 dir_v[j] = dir_equal;
3736 save_dir_v (ddr, dir_v);
3737
3738 /* Save the dependences carried by outer loops. */
3739 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3740 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3741 maybe_dependent);
3742 omega_free_problem (pb);
3743 return res;
3744 }
3745
3746 /* Omega expects the protected variables (those that have to be kept
3747 after elimination) to appear first in the constraint system.
3748 These variables are the distance variables. In the following
3749 initialization we declare NB_LOOPS safe variables, and the total
3750 number of variables for the constraint system is 2*NB_LOOPS. */
3751 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3752 res = init_omega_for_ddr_1 (DDR_A (ddr), DDR_B (ddr), ddr, pb,
3753 maybe_dependent);
3754 omega_free_problem (pb);
3755
3756 /* Stop computation if not decidable, or no dependence. */
3757 if (res == false || *maybe_dependent == false)
3758 return res;
3759
3760 pb = omega_alloc_problem (2 * DDR_NB_LOOPS (ddr), DDR_NB_LOOPS (ddr));
3761 res = init_omega_for_ddr_1 (DDR_B (ddr), DDR_A (ddr), ddr, pb,
3762 maybe_dependent);
3763 omega_free_problem (pb);
3764
3765 return res;
3766 }
3767
3768 /* Return true when DDR contains the same information as that stored
3769 in DIR_VECTS and in DIST_VECTS, return false otherwise. */
3770
3771 static bool
3772 ddr_consistent_p (FILE *file,
3773 struct data_dependence_relation *ddr,
3774 VEC (lambda_vector, heap) *dist_vects,
3775 VEC (lambda_vector, heap) *dir_vects)
3776 {
3777 unsigned int i, j;
3778
3779 /* If dump_file is set, output there. */
3780 if (dump_file && (dump_flags & TDF_DETAILS))
3781 file = dump_file;
3782
3783 if (VEC_length (lambda_vector, dist_vects) != DDR_NUM_DIST_VECTS (ddr))
3784 {
3785 lambda_vector b_dist_v;
3786 fprintf (file, "\n(Number of distance vectors differ: Banerjee has %d, Omega has %d.\n",
3787 VEC_length (lambda_vector, dist_vects),
3788 DDR_NUM_DIST_VECTS (ddr));
3789
3790 fprintf (file, "Banerjee dist vectors:\n");
3791 for (i = 0; VEC_iterate (lambda_vector, dist_vects, i, b_dist_v); i++)
3792 print_lambda_vector (file, b_dist_v, DDR_NB_LOOPS (ddr));
3793
3794 fprintf (file, "Omega dist vectors:\n");
3795 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3796 print_lambda_vector (file, DDR_DIST_VECT (ddr, i), DDR_NB_LOOPS (ddr));
3797
3798 fprintf (file, "data dependence relation:\n");
3799 dump_data_dependence_relation (file, ddr);
3800
3801 fprintf (file, ")\n");
3802 return false;
3803 }
3804
3805 if (VEC_length (lambda_vector, dir_vects) != DDR_NUM_DIR_VECTS (ddr))
3806 {
3807 fprintf (file, "\n(Number of direction vectors differ: Banerjee has %d, Omega has %d.)\n",
3808 VEC_length (lambda_vector, dir_vects),
3809 DDR_NUM_DIR_VECTS (ddr));
3810 return false;
3811 }
3812
3813 for (i = 0; i < DDR_NUM_DIST_VECTS (ddr); i++)
3814 {
3815 lambda_vector a_dist_v;
3816 lambda_vector b_dist_v = DDR_DIST_VECT (ddr, i);
3817
3818 /* Distance vectors are not ordered in the same way in the DDR
3819 and in the DIST_VECTS: search for a matching vector. */
3820 for (j = 0; VEC_iterate (lambda_vector, dist_vects, j, a_dist_v); j++)
3821 if (lambda_vector_equal (a_dist_v, b_dist_v, DDR_NB_LOOPS (ddr)))
3822 break;
3823
3824 if (j == VEC_length (lambda_vector, dist_vects))
3825 {
3826 fprintf (file, "\n(Dist vectors from the first dependence analyzer:\n");
3827 print_dist_vectors (file, dist_vects, DDR_NB_LOOPS (ddr));
3828 fprintf (file, "not found in Omega dist vectors:\n");
3829 print_dist_vectors (file, DDR_DIST_VECTS (ddr), DDR_NB_LOOPS (ddr));
3830 fprintf (file, "data dependence relation:\n");
3831 dump_data_dependence_relation (file, ddr);
3832 fprintf (file, ")\n");
3833 }
3834 }
3835
3836 for (i = 0; i < DDR_NUM_DIR_VECTS (ddr); i++)
3837 {
3838 lambda_vector a_dir_v;
3839 lambda_vector b_dir_v = DDR_DIR_VECT (ddr, i);
3840
3841 /* Direction vectors are not ordered in the same way in the DDR
3842 and in the DIR_VECTS: search for a matching vector. */
3843 for (j = 0; VEC_iterate (lambda_vector, dir_vects, j, a_dir_v); j++)
3844 if (lambda_vector_equal (a_dir_v, b_dir_v, DDR_NB_LOOPS (ddr)))
3845 break;
3846
3847 if (j == VEC_length (lambda_vector, dist_vects))
3848 {
3849 fprintf (file, "\n(Dir vectors from the first dependence analyzer:\n");
3850 print_dir_vectors (file, dir_vects, DDR_NB_LOOPS (ddr));
3851 fprintf (file, "not found in Omega dir vectors:\n");
3852 print_dir_vectors (file, DDR_DIR_VECTS (ddr), DDR_NB_LOOPS (ddr));
3853 fprintf (file, "data dependence relation:\n");
3854 dump_data_dependence_relation (file, ddr);
3855 fprintf (file, ")\n");
3856 }
3857 }
3858
3859 return true;
3860 }
3861
3862 /* This computes the affine dependence relation between A and B with
3863 respect to LOOP_NEST. CHREC_KNOWN is used for representing the
3864 independence between two accesses, while CHREC_DONT_KNOW is used
3865 for representing the unknown relation.
3866
3867 Note that it is possible to stop the computation of the dependence
3868 relation the first time we detect a CHREC_KNOWN element for a given
3869 subscript. */
3870
3871 static void
3872 compute_affine_dependence (struct data_dependence_relation *ddr,
3873 struct loop *loop_nest)
3874 {
3875 struct data_reference *dra = DDR_A (ddr);
3876 struct data_reference *drb = DDR_B (ddr);
3877
3878 if (dump_file && (dump_flags & TDF_DETAILS))
3879 {
3880 fprintf (dump_file, "(compute_affine_dependence\n");
3881 fprintf (dump_file, " (stmt_a = \n");
3882 print_gimple_stmt (dump_file, DR_STMT (dra), 0, 0);
3883 fprintf (dump_file, ")\n (stmt_b = \n");
3884 print_gimple_stmt (dump_file, DR_STMT (drb), 0, 0);
3885 fprintf (dump_file, ")\n");
3886 }
3887
3888 /* Analyze only when the dependence relation is not yet known. */
3889 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE
3890 && !DDR_SELF_REFERENCE (ddr))
3891 {
3892 dependence_stats.num_dependence_tests++;
3893
3894 if (access_functions_are_affine_or_constant_p (dra, loop_nest)
3895 && access_functions_are_affine_or_constant_p (drb, loop_nest))
3896 {
3897 if (flag_check_data_deps)
3898 {
3899 /* Compute the dependences using the first algorithm. */
3900 subscript_dependence_tester (ddr, loop_nest);
3901
3902 if (dump_file && (dump_flags & TDF_DETAILS))
3903 {
3904 fprintf (dump_file, "\n\nBanerjee Analyzer\n");
3905 dump_data_dependence_relation (dump_file, ddr);
3906 }
3907
3908 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
3909 {
3910 bool maybe_dependent;
3911 VEC (lambda_vector, heap) *dir_vects, *dist_vects;
3912
3913 /* Save the result of the first DD analyzer. */
3914 dist_vects = DDR_DIST_VECTS (ddr);
3915 dir_vects = DDR_DIR_VECTS (ddr);
3916
3917 /* Reset the information. */
3918 DDR_DIST_VECTS (ddr) = NULL;
3919 DDR_DIR_VECTS (ddr) = NULL;
3920
3921 /* Compute the same information using Omega. */
3922 if (!init_omega_for_ddr (ddr, &maybe_dependent))
3923 goto csys_dont_know;
3924
3925 if (dump_file && (dump_flags & TDF_DETAILS))
3926 {
3927 fprintf (dump_file, "Omega Analyzer\n");
3928 dump_data_dependence_relation (dump_file, ddr);
3929 }
3930
3931 /* Check that we get the same information. */
3932 if (maybe_dependent)
3933 gcc_assert (ddr_consistent_p (stderr, ddr, dist_vects,
3934 dir_vects));
3935 }
3936 }
3937 else
3938 subscript_dependence_tester (ddr, loop_nest);
3939 }
3940
3941 /* As a last case, if the dependence cannot be determined, or if
3942 the dependence is considered too difficult to determine, answer
3943 "don't know". */
3944 else
3945 {
3946 csys_dont_know:;
3947 dependence_stats.num_dependence_undetermined++;
3948
3949 if (dump_file && (dump_flags & TDF_DETAILS))
3950 {
3951 fprintf (dump_file, "Data ref a:\n");
3952 dump_data_reference (dump_file, dra);
3953 fprintf (dump_file, "Data ref b:\n");
3954 dump_data_reference (dump_file, drb);
3955 fprintf (dump_file, "affine dependence test not usable: access function not affine or constant.\n");
3956 }
3957 finalize_ddr_dependent (ddr, chrec_dont_know);
3958 }
3959 }
3960
3961 if (dump_file && (dump_flags & TDF_DETAILS))
3962 fprintf (dump_file, ")\n");
3963 }
3964
3965 /* This computes the dependence relation for the same data
3966 reference into DDR. */
3967
3968 static void
3969 compute_self_dependence (struct data_dependence_relation *ddr)
3970 {
3971 unsigned int i;
3972 struct subscript *subscript;
3973
3974 if (DDR_ARE_DEPENDENT (ddr) != NULL_TREE)
3975 return;
3976
3977 for (i = 0; VEC_iterate (subscript_p, DDR_SUBSCRIPTS (ddr), i, subscript);
3978 i++)
3979 {
3980 if (SUB_CONFLICTS_IN_A (subscript))
3981 free_conflict_function (SUB_CONFLICTS_IN_A (subscript));
3982 if (SUB_CONFLICTS_IN_B (subscript))
3983 free_conflict_function (SUB_CONFLICTS_IN_B (subscript));
3984
3985 /* The accessed index overlaps for each iteration. */
3986 SUB_CONFLICTS_IN_A (subscript)
3987 = conflict_fn (1, affine_fn_cst (integer_zero_node));
3988 SUB_CONFLICTS_IN_B (subscript)
3989 = conflict_fn (1, affine_fn_cst (integer_zero_node));
3990 SUB_LAST_CONFLICT (subscript) = chrec_dont_know;
3991 }
3992
3993 /* The distance vector is the zero vector. */
3994 save_dist_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3995 save_dir_v (ddr, lambda_vector_new (DDR_NB_LOOPS (ddr)));
3996 }
3997
3998 /* Compute in DEPENDENCE_RELATIONS the data dependence graph for all
3999 the data references in DATAREFS, in the LOOP_NEST. When
4000 COMPUTE_SELF_AND_RR is FALSE, don't compute read-read and self
4001 relations. */
4002
4003 void
4004 compute_all_dependences (VEC (data_reference_p, heap) *datarefs,
4005 VEC (ddr_p, heap) **dependence_relations,
4006 VEC (loop_p, heap) *loop_nest,
4007 bool compute_self_and_rr)
4008 {
4009 struct data_dependence_relation *ddr;
4010 struct data_reference *a, *b;
4011 unsigned int i, j;
4012
4013 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, a); i++)
4014 for (j = i + 1; VEC_iterate (data_reference_p, datarefs, j, b); j++)
4015 if (!DR_IS_READ (a) || !DR_IS_READ (b) || compute_self_and_rr)
4016 {
4017 ddr = initialize_data_dependence_relation (a, b, loop_nest);
4018 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4019 compute_affine_dependence (ddr, VEC_index (loop_p, loop_nest, 0));
4020 }
4021
4022 if (compute_self_and_rr)
4023 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, a); i++)
4024 {
4025 ddr = initialize_data_dependence_relation (a, a, loop_nest);
4026 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4027 compute_self_dependence (ddr);
4028 }
4029 }
4030
4031 /* Stores the locations of memory references in STMT to REFERENCES. Returns
4032 true if STMT clobbers memory, false otherwise. */
4033
4034 bool
4035 get_references_in_stmt (gimple stmt, VEC (data_ref_loc, heap) **references)
4036 {
4037 bool clobbers_memory = false;
4038 data_ref_loc *ref;
4039 tree *op0, *op1;
4040 enum gimple_code stmt_code = gimple_code (stmt);
4041
4042 *references = NULL;
4043
4044 /* ASM_EXPR and CALL_EXPR may embed arbitrary side effects.
4045 Calls have side-effects, except those to const or pure
4046 functions. */
4047 if ((stmt_code == GIMPLE_CALL
4048 && !(gimple_call_flags (stmt) & (ECF_CONST | ECF_PURE)))
4049 || (stmt_code == GIMPLE_ASM
4050 && gimple_asm_volatile_p (stmt)))
4051 clobbers_memory = true;
4052
4053 if (ZERO_SSA_OPERANDS (stmt, SSA_OP_ALL_VIRTUALS))
4054 return clobbers_memory;
4055
4056 if (stmt_code == GIMPLE_ASSIGN)
4057 {
4058 tree base;
4059 op0 = gimple_assign_lhs_ptr (stmt);
4060 op1 = gimple_assign_rhs1_ptr (stmt);
4061
4062 if (DECL_P (*op1)
4063 || (REFERENCE_CLASS_P (*op1)
4064 && (base = get_base_address (*op1))
4065 && TREE_CODE (base) != SSA_NAME))
4066 {
4067 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4068 ref->pos = op1;
4069 ref->is_read = true;
4070 }
4071
4072 if (DECL_P (*op0)
4073 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
4074 {
4075 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4076 ref->pos = op0;
4077 ref->is_read = false;
4078 }
4079 }
4080 else if (stmt_code == GIMPLE_CALL)
4081 {
4082 unsigned i, n = gimple_call_num_args (stmt);
4083
4084 for (i = 0; i < n; i++)
4085 {
4086 op0 = gimple_call_arg_ptr (stmt, i);
4087
4088 if (DECL_P (*op0)
4089 || (REFERENCE_CLASS_P (*op0) && get_base_address (*op0)))
4090 {
4091 ref = VEC_safe_push (data_ref_loc, heap, *references, NULL);
4092 ref->pos = op0;
4093 ref->is_read = true;
4094 }
4095 }
4096 }
4097
4098 return clobbers_memory;
4099 }
4100
4101 /* Stores the data references in STMT to DATAREFS. If there is an unanalyzable
4102 reference, returns false, otherwise returns true. NEST is the outermost
4103 loop of the loop nest in which the references should be analyzed. */
4104
4105 bool
4106 find_data_references_in_stmt (struct loop *nest, gimple stmt,
4107 VEC (data_reference_p, heap) **datarefs)
4108 {
4109 unsigned i;
4110 VEC (data_ref_loc, heap) *references;
4111 data_ref_loc *ref;
4112 bool ret = true;
4113 data_reference_p dr;
4114
4115 if (get_references_in_stmt (stmt, &references))
4116 {
4117 VEC_free (data_ref_loc, heap, references);
4118 return false;
4119 }
4120
4121 for (i = 0; VEC_iterate (data_ref_loc, references, i, ref); i++)
4122 {
4123 dr = create_data_ref (nest, *ref->pos, stmt, ref->is_read);
4124 gcc_assert (dr != NULL);
4125
4126 /* FIXME -- data dependence analysis does not work correctly for objects with
4127 invariant addresses. Let us fail here until the problem is fixed. */
4128 if (dr_address_invariant_p (dr))
4129 {
4130 free_data_ref (dr);
4131 if (dump_file && (dump_flags & TDF_DETAILS))
4132 fprintf (dump_file, "\tFAILED as dr address is invariant\n");
4133 ret = false;
4134 break;
4135 }
4136
4137 VEC_safe_push (data_reference_p, heap, *datarefs, dr);
4138 }
4139 VEC_free (data_ref_loc, heap, references);
4140 return ret;
4141 }
4142
4143 /* Search the data references in LOOP, and record the information into
4144 DATAREFS. Returns chrec_dont_know when failing to analyze a
4145 difficult case, returns NULL_TREE otherwise.
4146
4147 TODO: This function should be made smarter so that it can handle address
4148 arithmetic as if they were array accesses, etc. */
4149
4150 tree
4151 find_data_references_in_loop (struct loop *loop,
4152 VEC (data_reference_p, heap) **datarefs)
4153 {
4154 basic_block bb, *bbs;
4155 unsigned int i;
4156 gimple_stmt_iterator bsi;
4157
4158 bbs = get_loop_body_in_dom_order (loop);
4159
4160 for (i = 0; i < loop->num_nodes; i++)
4161 {
4162 bb = bbs[i];
4163
4164 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4165 {
4166 gimple stmt = gsi_stmt (bsi);
4167
4168 if (!find_data_references_in_stmt (loop, stmt, datarefs))
4169 {
4170 struct data_reference *res;
4171 res = XCNEW (struct data_reference);
4172 VEC_safe_push (data_reference_p, heap, *datarefs, res);
4173
4174 free (bbs);
4175 return chrec_dont_know;
4176 }
4177 }
4178 }
4179 free (bbs);
4180
4181 return NULL_TREE;
4182 }
4183
4184 /* Recursive helper function. */
4185
4186 static bool
4187 find_loop_nest_1 (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4188 {
4189 /* Inner loops of the nest should not contain siblings. Example:
4190 when there are two consecutive loops,
4191
4192 | loop_0
4193 | loop_1
4194 | A[{0, +, 1}_1]
4195 | endloop_1
4196 | loop_2
4197 | A[{0, +, 1}_2]
4198 | endloop_2
4199 | endloop_0
4200
4201 the dependence relation cannot be captured by the distance
4202 abstraction. */
4203 if (loop->next)
4204 return false;
4205
4206 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4207 if (loop->inner)
4208 return find_loop_nest_1 (loop->inner, loop_nest);
4209 return true;
4210 }
4211
4212 /* Return false when the LOOP is not well nested. Otherwise return
4213 true and insert in LOOP_NEST the loops of the nest. LOOP_NEST will
4214 contain the loops from the outermost to the innermost, as they will
4215 appear in the classic distance vector. */
4216
4217 bool
4218 find_loop_nest (struct loop *loop, VEC (loop_p, heap) **loop_nest)
4219 {
4220 VEC_safe_push (loop_p, heap, *loop_nest, loop);
4221 if (loop->inner)
4222 return find_loop_nest_1 (loop->inner, loop_nest);
4223 return true;
4224 }
4225
4226 /* Returns true when the data dependences have been computed, false otherwise.
4227 Given a loop nest LOOP, the following vectors are returned:
4228 DATAREFS is initialized to all the array elements contained in this loop,
4229 DEPENDENCE_RELATIONS contains the relations between the data references.
4230 Compute read-read and self relations if
4231 COMPUTE_SELF_AND_READ_READ_DEPENDENCES is TRUE. */
4232
4233 bool
4234 compute_data_dependences_for_loop (struct loop *loop,
4235 bool compute_self_and_read_read_dependences,
4236 VEC (data_reference_p, heap) **datarefs,
4237 VEC (ddr_p, heap) **dependence_relations)
4238 {
4239 bool res = true;
4240 VEC (loop_p, heap) *vloops = VEC_alloc (loop_p, heap, 3);
4241
4242 memset (&dependence_stats, 0, sizeof (dependence_stats));
4243
4244 /* If the loop nest is not well formed, or one of the data references
4245 is not computable, give up without spending time to compute other
4246 dependences. */
4247 if (!loop
4248 || !find_loop_nest (loop, &vloops)
4249 || find_data_references_in_loop (loop, datarefs) == chrec_dont_know)
4250 {
4251 struct data_dependence_relation *ddr;
4252
4253 /* Insert a single relation into dependence_relations:
4254 chrec_dont_know. */
4255 ddr = initialize_data_dependence_relation (NULL, NULL, vloops);
4256 VEC_safe_push (ddr_p, heap, *dependence_relations, ddr);
4257 res = false;
4258 }
4259 else
4260 compute_all_dependences (*datarefs, dependence_relations, vloops,
4261 compute_self_and_read_read_dependences);
4262
4263 if (dump_file && (dump_flags & TDF_STATS))
4264 {
4265 fprintf (dump_file, "Dependence tester statistics:\n");
4266
4267 fprintf (dump_file, "Number of dependence tests: %d\n",
4268 dependence_stats.num_dependence_tests);
4269 fprintf (dump_file, "Number of dependence tests classified dependent: %d\n",
4270 dependence_stats.num_dependence_dependent);
4271 fprintf (dump_file, "Number of dependence tests classified independent: %d\n",
4272 dependence_stats.num_dependence_independent);
4273 fprintf (dump_file, "Number of undetermined dependence tests: %d\n",
4274 dependence_stats.num_dependence_undetermined);
4275
4276 fprintf (dump_file, "Number of subscript tests: %d\n",
4277 dependence_stats.num_subscript_tests);
4278 fprintf (dump_file, "Number of undetermined subscript tests: %d\n",
4279 dependence_stats.num_subscript_undetermined);
4280 fprintf (dump_file, "Number of same subscript function: %d\n",
4281 dependence_stats.num_same_subscript_function);
4282
4283 fprintf (dump_file, "Number of ziv tests: %d\n",
4284 dependence_stats.num_ziv);
4285 fprintf (dump_file, "Number of ziv tests returning dependent: %d\n",
4286 dependence_stats.num_ziv_dependent);
4287 fprintf (dump_file, "Number of ziv tests returning independent: %d\n",
4288 dependence_stats.num_ziv_independent);
4289 fprintf (dump_file, "Number of ziv tests unimplemented: %d\n",
4290 dependence_stats.num_ziv_unimplemented);
4291
4292 fprintf (dump_file, "Number of siv tests: %d\n",
4293 dependence_stats.num_siv);
4294 fprintf (dump_file, "Number of siv tests returning dependent: %d\n",
4295 dependence_stats.num_siv_dependent);
4296 fprintf (dump_file, "Number of siv tests returning independent: %d\n",
4297 dependence_stats.num_siv_independent);
4298 fprintf (dump_file, "Number of siv tests unimplemented: %d\n",
4299 dependence_stats.num_siv_unimplemented);
4300
4301 fprintf (dump_file, "Number of miv tests: %d\n",
4302 dependence_stats.num_miv);
4303 fprintf (dump_file, "Number of miv tests returning dependent: %d\n",
4304 dependence_stats.num_miv_dependent);
4305 fprintf (dump_file, "Number of miv tests returning independent: %d\n",
4306 dependence_stats.num_miv_independent);
4307 fprintf (dump_file, "Number of miv tests unimplemented: %d\n",
4308 dependence_stats.num_miv_unimplemented);
4309 }
4310
4311 return res;
4312 }
4313
4314 /* Entry point (for testing only). Analyze all the data references
4315 and the dependence relations in LOOP.
4316
4317 The data references are computed first.
4318
4319 A relation on these nodes is represented by a complete graph. Some
4320 of the relations could be of no interest, thus the relations can be
4321 computed on demand.
4322
4323 In the following function we compute all the relations. This is
4324 just a first implementation that is here for:
4325 - for showing how to ask for the dependence relations,
4326 - for the debugging the whole dependence graph,
4327 - for the dejagnu testcases and maintenance.
4328
4329 It is possible to ask only for a part of the graph, avoiding to
4330 compute the whole dependence graph. The computed dependences are
4331 stored in a knowledge base (KB) such that later queries don't
4332 recompute the same information. The implementation of this KB is
4333 transparent to the optimizer, and thus the KB can be changed with a
4334 more efficient implementation, or the KB could be disabled. */
4335 static void
4336 analyze_all_data_dependences (struct loop *loop)
4337 {
4338 unsigned int i;
4339 int nb_data_refs = 10;
4340 VEC (data_reference_p, heap) *datarefs =
4341 VEC_alloc (data_reference_p, heap, nb_data_refs);
4342 VEC (ddr_p, heap) *dependence_relations =
4343 VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs);
4344
4345 /* Compute DDs on the whole function. */
4346 compute_data_dependences_for_loop (loop, false, &datarefs,
4347 &dependence_relations);
4348
4349 if (dump_file)
4350 {
4351 dump_data_dependence_relations (dump_file, dependence_relations);
4352 fprintf (dump_file, "\n\n");
4353
4354 if (dump_flags & TDF_DETAILS)
4355 dump_dist_dir_vectors (dump_file, dependence_relations);
4356
4357 if (dump_flags & TDF_STATS)
4358 {
4359 unsigned nb_top_relations = 0;
4360 unsigned nb_bot_relations = 0;
4361 unsigned nb_basename_differ = 0;
4362 unsigned nb_chrec_relations = 0;
4363 struct data_dependence_relation *ddr;
4364
4365 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4366 {
4367 if (chrec_contains_undetermined (DDR_ARE_DEPENDENT (ddr)))
4368 nb_top_relations++;
4369
4370 else if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
4371 {
4372 struct data_reference *a = DDR_A (ddr);
4373 struct data_reference *b = DDR_B (ddr);
4374
4375 if (!bitmap_intersect_p (DR_VOPS (a), DR_VOPS (b)))
4376 nb_basename_differ++;
4377 else
4378 nb_bot_relations++;
4379 }
4380
4381 else
4382 nb_chrec_relations++;
4383 }
4384
4385 gather_stats_on_scev_database ();
4386 }
4387 }
4388
4389 free_dependence_relations (dependence_relations);
4390 free_data_refs (datarefs);
4391 }
4392
4393 /* Computes all the data dependences and check that the results of
4394 several analyzers are the same. */
4395
4396 void
4397 tree_check_data_deps (void)
4398 {
4399 loop_iterator li;
4400 struct loop *loop_nest;
4401
4402 FOR_EACH_LOOP (li, loop_nest, 0)
4403 analyze_all_data_dependences (loop_nest);
4404 }
4405
4406 /* Free the memory used by a data dependence relation DDR. */
4407
4408 void
4409 free_dependence_relation (struct data_dependence_relation *ddr)
4410 {
4411 if (ddr == NULL)
4412 return;
4413
4414 if (DDR_SUBSCRIPTS (ddr))
4415 free_subscripts (DDR_SUBSCRIPTS (ddr));
4416 if (DDR_DIST_VECTS (ddr))
4417 VEC_free (lambda_vector, heap, DDR_DIST_VECTS (ddr));
4418 if (DDR_DIR_VECTS (ddr))
4419 VEC_free (lambda_vector, heap, DDR_DIR_VECTS (ddr));
4420
4421 free (ddr);
4422 }
4423
4424 /* Free the memory used by the data dependence relations from
4425 DEPENDENCE_RELATIONS. */
4426
4427 void
4428 free_dependence_relations (VEC (ddr_p, heap) *dependence_relations)
4429 {
4430 unsigned int i;
4431 struct data_dependence_relation *ddr;
4432 VEC (loop_p, heap) *loop_nest = NULL;
4433
4434 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4435 {
4436 if (ddr == NULL)
4437 continue;
4438 if (loop_nest == NULL)
4439 loop_nest = DDR_LOOP_NEST (ddr);
4440 else
4441 gcc_assert (DDR_LOOP_NEST (ddr) == NULL
4442 || DDR_LOOP_NEST (ddr) == loop_nest);
4443 free_dependence_relation (ddr);
4444 }
4445
4446 if (loop_nest)
4447 VEC_free (loop_p, heap, loop_nest);
4448 VEC_free (ddr_p, heap, dependence_relations);
4449 }
4450
4451 /* Free the memory used by the data references from DATAREFS. */
4452
4453 void
4454 free_data_refs (VEC (data_reference_p, heap) *datarefs)
4455 {
4456 unsigned int i;
4457 struct data_reference *dr;
4458
4459 for (i = 0; VEC_iterate (data_reference_p, datarefs, i, dr); i++)
4460 free_data_ref (dr);
4461 VEC_free (data_reference_p, heap, datarefs);
4462 }
4463
4464
4465
4466 /* Dump vertex I in RDG to FILE. */
4467
4468 void
4469 dump_rdg_vertex (FILE *file, struct graph *rdg, int i)
4470 {
4471 struct vertex *v = &(rdg->vertices[i]);
4472 struct graph_edge *e;
4473
4474 fprintf (file, "(vertex %d: (%s%s) (in:", i,
4475 RDG_MEM_WRITE_STMT (rdg, i) ? "w" : "",
4476 RDG_MEM_READS_STMT (rdg, i) ? "r" : "");
4477
4478 if (v->pred)
4479 for (e = v->pred; e; e = e->pred_next)
4480 fprintf (file, " %d", e->src);
4481
4482 fprintf (file, ") (out:");
4483
4484 if (v->succ)
4485 for (e = v->succ; e; e = e->succ_next)
4486 fprintf (file, " %d", e->dest);
4487
4488 fprintf (file, ") \n");
4489 print_gimple_stmt (file, RDGV_STMT (v), 0, TDF_VOPS|TDF_MEMSYMS);
4490 fprintf (file, ")\n");
4491 }
4492
4493 /* Call dump_rdg_vertex on stderr. */
4494
4495 void
4496 debug_rdg_vertex (struct graph *rdg, int i)
4497 {
4498 dump_rdg_vertex (stderr, rdg, i);
4499 }
4500
4501 /* Dump component C of RDG to FILE. If DUMPED is non-null, set the
4502 dumped vertices to that bitmap. */
4503
4504 void dump_rdg_component (FILE *file, struct graph *rdg, int c, bitmap dumped)
4505 {
4506 int i;
4507
4508 fprintf (file, "(%d\n", c);
4509
4510 for (i = 0; i < rdg->n_vertices; i++)
4511 if (rdg->vertices[i].component == c)
4512 {
4513 if (dumped)
4514 bitmap_set_bit (dumped, i);
4515
4516 dump_rdg_vertex (file, rdg, i);
4517 }
4518
4519 fprintf (file, ")\n");
4520 }
4521
4522 /* Call dump_rdg_vertex on stderr. */
4523
4524 void
4525 debug_rdg_component (struct graph *rdg, int c)
4526 {
4527 dump_rdg_component (stderr, rdg, c, NULL);
4528 }
4529
4530 /* Dump the reduced dependence graph RDG to FILE. */
4531
4532 void
4533 dump_rdg (FILE *file, struct graph *rdg)
4534 {
4535 int i;
4536 bitmap dumped = BITMAP_ALLOC (NULL);
4537
4538 fprintf (file, "(rdg\n");
4539
4540 for (i = 0; i < rdg->n_vertices; i++)
4541 if (!bitmap_bit_p (dumped, i))
4542 dump_rdg_component (file, rdg, rdg->vertices[i].component, dumped);
4543
4544 fprintf (file, ")\n");
4545 BITMAP_FREE (dumped);
4546 }
4547
4548 /* Call dump_rdg on stderr. */
4549
4550 void
4551 debug_rdg (struct graph *rdg)
4552 {
4553 dump_rdg (stderr, rdg);
4554 }
4555
4556 static void
4557 dot_rdg_1 (FILE *file, struct graph *rdg)
4558 {
4559 int i;
4560
4561 fprintf (file, "digraph RDG {\n");
4562
4563 for (i = 0; i < rdg->n_vertices; i++)
4564 {
4565 struct vertex *v = &(rdg->vertices[i]);
4566 struct graph_edge *e;
4567
4568 /* Highlight reads from memory. */
4569 if (RDG_MEM_READS_STMT (rdg, i))
4570 fprintf (file, "%d [style=filled, fillcolor=green]\n", i);
4571
4572 /* Highlight stores to memory. */
4573 if (RDG_MEM_WRITE_STMT (rdg, i))
4574 fprintf (file, "%d [style=filled, fillcolor=red]\n", i);
4575
4576 if (v->succ)
4577 for (e = v->succ; e; e = e->succ_next)
4578 switch (RDGE_TYPE (e))
4579 {
4580 case input_dd:
4581 fprintf (file, "%d -> %d [label=input] \n", i, e->dest);
4582 break;
4583
4584 case output_dd:
4585 fprintf (file, "%d -> %d [label=output] \n", i, e->dest);
4586 break;
4587
4588 case flow_dd:
4589 /* These are the most common dependences: don't print these. */
4590 fprintf (file, "%d -> %d \n", i, e->dest);
4591 break;
4592
4593 case anti_dd:
4594 fprintf (file, "%d -> %d [label=anti] \n", i, e->dest);
4595 break;
4596
4597 default:
4598 gcc_unreachable ();
4599 }
4600 }
4601
4602 fprintf (file, "}\n\n");
4603 }
4604
4605 /* Display SCOP using dotty. */
4606
4607 void
4608 dot_rdg (struct graph *rdg)
4609 {
4610 FILE *file = fopen ("/tmp/rdg.dot", "w");
4611 gcc_assert (file != NULL);
4612
4613 dot_rdg_1 (file, rdg);
4614 fclose (file);
4615
4616 system ("dotty /tmp/rdg.dot");
4617 }
4618
4619
4620 /* This structure is used for recording the mapping statement index in
4621 the RDG. */
4622
4623 struct rdg_vertex_info GTY(())
4624 {
4625 gimple stmt;
4626 int index;
4627 };
4628
4629 /* Returns the index of STMT in RDG. */
4630
4631 int
4632 rdg_vertex_for_stmt (struct graph *rdg, gimple stmt)
4633 {
4634 struct rdg_vertex_info rvi, *slot;
4635
4636 rvi.stmt = stmt;
4637 slot = (struct rdg_vertex_info *) htab_find (rdg->indices, &rvi);
4638
4639 if (!slot)
4640 return -1;
4641
4642 return slot->index;
4643 }
4644
4645 /* Creates an edge in RDG for each distance vector from DDR. The
4646 order that we keep track of in the RDG is the order in which
4647 statements have to be executed. */
4648
4649 static void
4650 create_rdg_edge_for_ddr (struct graph *rdg, ddr_p ddr)
4651 {
4652 struct graph_edge *e;
4653 int va, vb;
4654 data_reference_p dra = DDR_A (ddr);
4655 data_reference_p drb = DDR_B (ddr);
4656 unsigned level = ddr_dependence_level (ddr);
4657
4658 /* For non scalar dependences, when the dependence is REVERSED,
4659 statement B has to be executed before statement A. */
4660 if (level > 0
4661 && !DDR_REVERSED_P (ddr))
4662 {
4663 data_reference_p tmp = dra;
4664 dra = drb;
4665 drb = tmp;
4666 }
4667
4668 va = rdg_vertex_for_stmt (rdg, DR_STMT (dra));
4669 vb = rdg_vertex_for_stmt (rdg, DR_STMT (drb));
4670
4671 if (va < 0 || vb < 0)
4672 return;
4673
4674 e = add_edge (rdg, va, vb);
4675 e->data = XNEW (struct rdg_edge);
4676
4677 RDGE_LEVEL (e) = level;
4678 RDGE_RELATION (e) = ddr;
4679
4680 /* Determines the type of the data dependence. */
4681 if (DR_IS_READ (dra) && DR_IS_READ (drb))
4682 RDGE_TYPE (e) = input_dd;
4683 else if (!DR_IS_READ (dra) && !DR_IS_READ (drb))
4684 RDGE_TYPE (e) = output_dd;
4685 else if (!DR_IS_READ (dra) && DR_IS_READ (drb))
4686 RDGE_TYPE (e) = flow_dd;
4687 else if (DR_IS_READ (dra) && !DR_IS_READ (drb))
4688 RDGE_TYPE (e) = anti_dd;
4689 }
4690
4691 /* Creates dependence edges in RDG for all the uses of DEF. IDEF is
4692 the index of DEF in RDG. */
4693
4694 static void
4695 create_rdg_edges_for_scalar (struct graph *rdg, tree def, int idef)
4696 {
4697 use_operand_p imm_use_p;
4698 imm_use_iterator iterator;
4699
4700 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, def)
4701 {
4702 struct graph_edge *e;
4703 int use = rdg_vertex_for_stmt (rdg, USE_STMT (imm_use_p));
4704
4705 if (use < 0)
4706 continue;
4707
4708 e = add_edge (rdg, idef, use);
4709 e->data = XNEW (struct rdg_edge);
4710 RDGE_TYPE (e) = flow_dd;
4711 RDGE_RELATION (e) = NULL;
4712 }
4713 }
4714
4715 /* Creates the edges of the reduced dependence graph RDG. */
4716
4717 static void
4718 create_rdg_edges (struct graph *rdg, VEC (ddr_p, heap) *ddrs)
4719 {
4720 int i;
4721 struct data_dependence_relation *ddr;
4722 def_operand_p def_p;
4723 ssa_op_iter iter;
4724
4725 for (i = 0; VEC_iterate (ddr_p, ddrs, i, ddr); i++)
4726 if (DDR_ARE_DEPENDENT (ddr) == NULL_TREE)
4727 create_rdg_edge_for_ddr (rdg, ddr);
4728
4729 for (i = 0; i < rdg->n_vertices; i++)
4730 FOR_EACH_PHI_OR_STMT_DEF (def_p, RDG_STMT (rdg, i),
4731 iter, SSA_OP_DEF)
4732 create_rdg_edges_for_scalar (rdg, DEF_FROM_PTR (def_p), i);
4733 }
4734
4735 /* Build the vertices of the reduced dependence graph RDG. */
4736
4737 void
4738 create_rdg_vertices (struct graph *rdg, VEC (gimple, heap) *stmts)
4739 {
4740 int i, j;
4741 gimple stmt;
4742
4743 for (i = 0; VEC_iterate (gimple, stmts, i, stmt); i++)
4744 {
4745 VEC (data_ref_loc, heap) *references;
4746 data_ref_loc *ref;
4747 struct vertex *v = &(rdg->vertices[i]);
4748 struct rdg_vertex_info *rvi = XNEW (struct rdg_vertex_info);
4749 struct rdg_vertex_info **slot;
4750
4751 rvi->stmt = stmt;
4752 rvi->index = i;
4753 slot = (struct rdg_vertex_info **) htab_find_slot (rdg->indices, rvi, INSERT);
4754
4755 if (!*slot)
4756 *slot = rvi;
4757 else
4758 free (rvi);
4759
4760 v->data = XNEW (struct rdg_vertex);
4761 RDG_STMT (rdg, i) = stmt;
4762
4763 RDG_MEM_WRITE_STMT (rdg, i) = false;
4764 RDG_MEM_READS_STMT (rdg, i) = false;
4765 if (gimple_code (stmt) == GIMPLE_PHI)
4766 continue;
4767
4768 get_references_in_stmt (stmt, &references);
4769 for (j = 0; VEC_iterate (data_ref_loc, references, j, ref); j++)
4770 if (!ref->is_read)
4771 RDG_MEM_WRITE_STMT (rdg, i) = true;
4772 else
4773 RDG_MEM_READS_STMT (rdg, i) = true;
4774
4775 VEC_free (data_ref_loc, heap, references);
4776 }
4777 }
4778
4779 /* Initialize STMTS with all the statements of LOOP. When
4780 INCLUDE_PHIS is true, include also the PHI nodes. The order in
4781 which we discover statements is important as
4782 generate_loops_for_partition is using the same traversal for
4783 identifying statements. */
4784
4785 static void
4786 stmts_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4787 {
4788 unsigned int i;
4789 basic_block *bbs = get_loop_body_in_dom_order (loop);
4790
4791 for (i = 0; i < loop->num_nodes; i++)
4792 {
4793 basic_block bb = bbs[i];
4794 gimple_stmt_iterator bsi;
4795 gimple stmt;
4796
4797 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4798 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
4799
4800 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4801 {
4802 stmt = gsi_stmt (bsi);
4803 if (gimple_code (stmt) != GIMPLE_LABEL)
4804 VEC_safe_push (gimple, heap, *stmts, stmt);
4805 }
4806 }
4807
4808 free (bbs);
4809 }
4810
4811 /* Returns true when all the dependences are computable. */
4812
4813 static bool
4814 known_dependences_p (VEC (ddr_p, heap) *dependence_relations)
4815 {
4816 ddr_p ddr;
4817 unsigned int i;
4818
4819 for (i = 0; VEC_iterate (ddr_p, dependence_relations, i, ddr); i++)
4820 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
4821 return false;
4822
4823 return true;
4824 }
4825
4826 /* Computes a hash function for element ELT. */
4827
4828 static hashval_t
4829 hash_stmt_vertex_info (const void *elt)
4830 {
4831 const struct rdg_vertex_info *const rvi =
4832 (const struct rdg_vertex_info *) elt;
4833 gimple stmt = rvi->stmt;
4834
4835 return htab_hash_pointer (stmt);
4836 }
4837
4838 /* Compares database elements E1 and E2. */
4839
4840 static int
4841 eq_stmt_vertex_info (const void *e1, const void *e2)
4842 {
4843 const struct rdg_vertex_info *elt1 = (const struct rdg_vertex_info *) e1;
4844 const struct rdg_vertex_info *elt2 = (const struct rdg_vertex_info *) e2;
4845
4846 return elt1->stmt == elt2->stmt;
4847 }
4848
4849 /* Free the element E. */
4850
4851 static void
4852 hash_stmt_vertex_del (void *e)
4853 {
4854 free (e);
4855 }
4856
4857 /* Build the Reduced Dependence Graph (RDG) with one vertex per
4858 statement of the loop nest, and one edge per data dependence or
4859 scalar dependence. */
4860
4861 struct graph *
4862 build_empty_rdg (int n_stmts)
4863 {
4864 int nb_data_refs = 10;
4865 struct graph *rdg = new_graph (n_stmts);
4866
4867 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
4868 eq_stmt_vertex_info, hash_stmt_vertex_del);
4869 return rdg;
4870 }
4871
4872 /* Build the Reduced Dependence Graph (RDG) with one vertex per
4873 statement of the loop nest, and one edge per data dependence or
4874 scalar dependence. */
4875
4876 struct graph *
4877 build_rdg (struct loop *loop)
4878 {
4879 int nb_data_refs = 10;
4880 struct graph *rdg = NULL;
4881 VEC (ddr_p, heap) *dependence_relations;
4882 VEC (data_reference_p, heap) *datarefs;
4883 VEC (gimple, heap) *stmts = VEC_alloc (gimple, heap, nb_data_refs);
4884
4885 dependence_relations = VEC_alloc (ddr_p, heap, nb_data_refs * nb_data_refs) ;
4886 datarefs = VEC_alloc (data_reference_p, heap, nb_data_refs);
4887 compute_data_dependences_for_loop (loop,
4888 false,
4889 &datarefs,
4890 &dependence_relations);
4891
4892 if (!known_dependences_p (dependence_relations))
4893 {
4894 free_dependence_relations (dependence_relations);
4895 free_data_refs (datarefs);
4896 VEC_free (gimple, heap, stmts);
4897
4898 return rdg;
4899 }
4900
4901 stmts_from_loop (loop, &stmts);
4902 rdg = build_empty_rdg (VEC_length (gimple, stmts));
4903
4904 rdg->indices = htab_create (nb_data_refs, hash_stmt_vertex_info,
4905 eq_stmt_vertex_info, hash_stmt_vertex_del);
4906 create_rdg_vertices (rdg, stmts);
4907 create_rdg_edges (rdg, dependence_relations);
4908
4909 VEC_free (gimple, heap, stmts);
4910 return rdg;
4911 }
4912
4913 /* Free the reduced dependence graph RDG. */
4914
4915 void
4916 free_rdg (struct graph *rdg)
4917 {
4918 int i;
4919
4920 for (i = 0; i < rdg->n_vertices; i++)
4921 free (rdg->vertices[i].data);
4922
4923 htab_delete (rdg->indices);
4924 free_graph (rdg);
4925 }
4926
4927 /* Initialize STMTS with all the statements of LOOP that contain a
4928 store to memory. */
4929
4930 void
4931 stores_from_loop (struct loop *loop, VEC (gimple, heap) **stmts)
4932 {
4933 unsigned int i;
4934 basic_block *bbs = get_loop_body_in_dom_order (loop);
4935
4936 for (i = 0; i < loop->num_nodes; i++)
4937 {
4938 basic_block bb = bbs[i];
4939 gimple_stmt_iterator bsi;
4940
4941 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
4942 if (!ZERO_SSA_OPERANDS (gsi_stmt (bsi), SSA_OP_VDEF))
4943 VEC_safe_push (gimple, heap, *stmts, gsi_stmt (bsi));
4944 }
4945
4946 free (bbs);
4947 }
4948
4949 /* For a data reference REF, return the declaration of its base
4950 address or NULL_TREE if the base is not determined. */
4951
4952 static inline tree
4953 ref_base_address (gimple stmt, data_ref_loc *ref)
4954 {
4955 tree base = NULL_TREE;
4956 tree base_address;
4957 struct data_reference *dr = XCNEW (struct data_reference);
4958
4959 DR_STMT (dr) = stmt;
4960 DR_REF (dr) = *ref->pos;
4961 dr_analyze_innermost (dr);
4962 base_address = DR_BASE_ADDRESS (dr);
4963
4964 if (!base_address)
4965 goto end;
4966
4967 switch (TREE_CODE (base_address))
4968 {
4969 case ADDR_EXPR:
4970 base = TREE_OPERAND (base_address, 0);
4971 break;
4972
4973 default:
4974 base = base_address;
4975 break;
4976 }
4977
4978 end:
4979 free_data_ref (dr);
4980 return base;
4981 }
4982
4983 /* Determines whether the statement from vertex V of the RDG has a
4984 definition used outside the loop that contains this statement. */
4985
4986 bool
4987 rdg_defs_used_in_other_loops_p (struct graph *rdg, int v)
4988 {
4989 gimple stmt = RDG_STMT (rdg, v);
4990 struct loop *loop = loop_containing_stmt (stmt);
4991 use_operand_p imm_use_p;
4992 imm_use_iterator iterator;
4993 ssa_op_iter it;
4994 def_operand_p def_p;
4995
4996 if (!loop)
4997 return true;
4998
4999 FOR_EACH_PHI_OR_STMT_DEF (def_p, stmt, it, SSA_OP_DEF)
5000 {
5001 FOR_EACH_IMM_USE_FAST (imm_use_p, iterator, DEF_FROM_PTR (def_p))
5002 {
5003 if (loop_containing_stmt (USE_STMT (imm_use_p)) != loop)
5004 return true;
5005 }
5006 }
5007
5008 return false;
5009 }
5010
5011 /* Determines whether statements S1 and S2 access to similar memory
5012 locations. Two memory accesses are considered similar when they
5013 have the same base address declaration, i.e. when their
5014 ref_base_address is the same. */
5015
5016 bool
5017 have_similar_memory_accesses (gimple s1, gimple s2)
5018 {
5019 bool res = false;
5020 unsigned i, j;
5021 VEC (data_ref_loc, heap) *refs1, *refs2;
5022 data_ref_loc *ref1, *ref2;
5023
5024 get_references_in_stmt (s1, &refs1);
5025 get_references_in_stmt (s2, &refs2);
5026
5027 for (i = 0; VEC_iterate (data_ref_loc, refs1, i, ref1); i++)
5028 {
5029 tree base1 = ref_base_address (s1, ref1);
5030
5031 if (base1)
5032 for (j = 0; VEC_iterate (data_ref_loc, refs2, j, ref2); j++)
5033 if (base1 == ref_base_address (s2, ref2))
5034 {
5035 res = true;
5036 goto end;
5037 }
5038 }
5039
5040 end:
5041 VEC_free (data_ref_loc, heap, refs1);
5042 VEC_free (data_ref_loc, heap, refs2);
5043 return res;
5044 }
5045
5046 /* Helper function for the hashtab. */
5047
5048 static int
5049 have_similar_memory_accesses_1 (const void *s1, const void *s2)
5050 {
5051 return have_similar_memory_accesses (CONST_CAST_GIMPLE ((const_gimple) s1),
5052 CONST_CAST_GIMPLE ((const_gimple) s2));
5053 }
5054
5055 /* Helper function for the hashtab. */
5056
5057 static hashval_t
5058 ref_base_address_1 (const void *s)
5059 {
5060 gimple stmt = CONST_CAST_GIMPLE ((const_gimple) s);
5061 unsigned i;
5062 VEC (data_ref_loc, heap) *refs;
5063 data_ref_loc *ref;
5064 hashval_t res = 0;
5065
5066 get_references_in_stmt (stmt, &refs);
5067
5068 for (i = 0; VEC_iterate (data_ref_loc, refs, i, ref); i++)
5069 if (!ref->is_read)
5070 {
5071 res = htab_hash_pointer (ref_base_address (stmt, ref));
5072 break;
5073 }
5074
5075 VEC_free (data_ref_loc, heap, refs);
5076 return res;
5077 }
5078
5079 /* Try to remove duplicated write data references from STMTS. */
5080
5081 void
5082 remove_similar_memory_refs (VEC (gimple, heap) **stmts)
5083 {
5084 unsigned i;
5085 gimple stmt;
5086 htab_t seen = htab_create (VEC_length (gimple, *stmts), ref_base_address_1,
5087 have_similar_memory_accesses_1, NULL);
5088
5089 for (i = 0; VEC_iterate (gimple, *stmts, i, stmt); )
5090 {
5091 void **slot;
5092
5093 slot = htab_find_slot (seen, stmt, INSERT);
5094
5095 if (*slot)
5096 VEC_ordered_remove (gimple, *stmts, i);
5097 else
5098 {
5099 *slot = (void *) stmt;
5100 i++;
5101 }
5102 }
5103
5104 htab_delete (seen);
5105 }
5106
5107 /* Returns the index of PARAMETER in the parameters vector of the
5108 ACCESS_MATRIX. If PARAMETER does not exist return -1. */
5109
5110 int
5111 access_matrix_get_index_for_parameter (tree parameter,
5112 struct access_matrix *access_matrix)
5113 {
5114 int i;
5115 VEC (tree,heap) *lambda_parameters = AM_PARAMETERS (access_matrix);
5116 tree lambda_parameter;
5117
5118 for (i = 0; VEC_iterate (tree, lambda_parameters, i, lambda_parameter); i++)
5119 if (lambda_parameter == parameter)
5120 return i + AM_NB_INDUCTION_VARS (access_matrix);
5121
5122 return -1;
5123 }